From 5b236608f84ceec714469b33f17d487a6de173d5 Mon Sep 17 00:00:00 2001 From: fullsizemalt <106900403+fullsizemalt@users.noreply.github.com> Date: Sun, 28 Dec 2025 16:28:58 -0800 Subject: [PATCH] feat: Add SongCanon API for cross-band song linking - Add routers/canon.py with endpoints: - GET /canon - list all canonical songs with versions - GET /canon/{slug} - get canon with all band versions - GET /canon/song/{id}/related - get related versions - Add link_canon_songs.py auto-linker script - Finds songs with same title across bands - Creates SongCanon entries automatically - Run with --apply to execute --- backend/link_canon_songs.py | 159 ++++++++++++++++++++++++++++++++++++ backend/main.py | 3 +- backend/routers/canon.py | 142 ++++++++++++++++++++++++++++++++ 3 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 backend/link_canon_songs.py create mode 100644 backend/routers/canon.py diff --git a/backend/link_canon_songs.py b/backend/link_canon_songs.py new file mode 100644 index 0000000..283905d --- /dev/null +++ b/backend/link_canon_songs.py @@ -0,0 +1,159 @@ +""" +Auto-linker script to find and link shared songs across bands. + +This script identifies songs with matching titles across different verticals +and creates SongCanon entries to link them together. + +Common shared songs in the jam scene: +- Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias) +- Traditional songs (Amazing Grace, etc.) +- Songs that multiple bands cover +""" + +from sqlmodel import Session, select +from database import engine +from models import Song, SongCanon, Vertical +import re + + +def normalize_title(title: str) -> str: + """Normalize song title for matching""" + # Lowercase + t = title.lower() + # Remove common suffixes + t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes + t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators + t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators + # Remove special characters + t = re.sub(r'[^\w\s]', '', t) + # Normalize whitespace + t = ' '.join(t.split()) + return t + + +def generate_slug(title: str) -> str: + """Generate URL-safe slug from title""" + slug = title.lower() + slug = re.sub(r'[^\w\s-]', '', slug) + slug = re.sub(r'[\s_]+', '-', slug) + slug = re.sub(r'-+', '-', slug) + return slug.strip('-') + + +def find_shared_songs(): + """Find songs that appear in multiple verticals""" + print("Finding shared songs across bands...\n") + + with Session(engine) as session: + # Get all songs grouped by normalized title + all_songs = session.exec(select(Song)).all() + + # Group by normalized title + title_groups = {} + for song in all_songs: + norm = normalize_title(song.title) + if norm not in title_groups: + title_groups[norm] = [] + title_groups[norm].append(song) + + # Find songs that appear in multiple verticals + shared = {} + for norm_title, songs in title_groups.items(): + vertical_ids = set(s.vertical_id for s in songs) + if len(vertical_ids) > 1: + shared[norm_title] = songs + + print(f"Found {len(shared)} songs shared across bands:\n") + + for norm_title, songs in sorted(shared.items()): + # Get band names + bands = [] + for song in songs: + vertical = session.get(Vertical, song.vertical_id) + if vertical: + bands.append(f"{vertical.name} ({song.title})") + + print(f" {norm_title}") + for band in bands: + print(f" - {band}") + print() + + return shared + + +def create_canon_links(dry_run: bool = True): + """Create SongCanon entries and link songs to them""" + print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n") + + with Session(engine) as session: + shared = find_shared_songs() + + created = 0 + linked = 0 + + for norm_title, songs in shared.items(): + # Use the most common title as the canonical title + title_counts = {} + for song in songs: + t = song.title + title_counts[t] = title_counts.get(t, 0) + 1 + + canonical_title = max(title_counts, key=title_counts.get) + slug = generate_slug(canonical_title) + + # Check if canon already exists + existing = session.exec( + select(SongCanon).where(SongCanon.slug == slug) + ).first() + + if existing: + canon = existing + print(f" Found existing: {canonical_title}") + else: + # Determine original artist + original_artist = None + for song in songs: + if song.original_artist: + original_artist = song.original_artist + break + + canon = SongCanon( + title=canonical_title, + slug=slug, + original_artist=original_artist + ) + + if not dry_run: + session.add(canon) + session.commit() + session.refresh(canon) + + created += 1 + print(f" Created canon: {canonical_title}") + + # Link songs to canon + for song in songs: + if song.canon_id != (canon.id if canon.id else None): + if not dry_run: + song.canon_id = canon.id + session.add(song) + linked += 1 + + if not dry_run: + session.commit() + + print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs") + print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs") + + if dry_run: + print("\nRun with dry_run=False to apply changes.") + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "--apply": + create_canon_links(dry_run=False) + else: + create_canon_links(dry_run=True) + print("\nTo apply changes, run: python link_canon_songs.py --apply") diff --git a/backend/main.py b/backend/main.py index e5dff19..5b363c9 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,6 +1,6 @@ from fastapi import FastAPI import os -from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals +from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals, canon from fastapi.middleware.cors import CORSMiddleware @@ -45,6 +45,7 @@ app.include_router(videos.router) app.include_router(musicians.router) app.include_router(sequences.router) app.include_router(verticals.router) +app.include_router(canon.router) # Optional features - can be disabled via env vars diff --git a/backend/routers/canon.py b/backend/routers/canon.py new file mode 100644 index 0000000..28b997d --- /dev/null +++ b/backend/routers/canon.py @@ -0,0 +1,142 @@ +from fastapi import APIRouter, Depends, HTTPException +from sqlmodel import Session, select +from typing import List +from database import get_session +from models import SongCanon, Song, Vertical +from pydantic import BaseModel + +router = APIRouter(prefix="/canon", tags=["canon"]) + + +class SongVersionRead(BaseModel): + id: int + title: str + slug: str | None + vertical_id: int + vertical_name: str + vertical_slug: str + + +class SongCanonRead(BaseModel): + id: int + title: str + slug: str + original_artist: str | None + notes: str | None + versions: List[SongVersionRead] + + +class SongCanonCreate(BaseModel): + title: str + slug: str + original_artist: str | None = None + notes: str | None = None + + +@router.get("/", response_model=List[SongCanonRead]) +def list_canon_songs( + limit: int = 50, + offset: int = 0, + session: Session = Depends(get_session) +): + """List all canonical songs with their cross-band versions""" + canons = session.exec( + select(SongCanon).offset(offset).limit(limit) + ).all() + + result = [] + for canon in canons: + versions = [] + songs = session.exec( + select(Song).where(Song.canon_id == canon.id) + ).all() + + for song in songs: + vertical = session.get(Vertical, song.vertical_id) + versions.append({ + "id": song.id, + "title": song.title, + "slug": song.slug, + "vertical_id": song.vertical_id, + "vertical_name": vertical.name if vertical else "Unknown", + "vertical_slug": vertical.slug if vertical else "unknown" + }) + + result.append({ + "id": canon.id, + "title": canon.title, + "slug": canon.slug, + "original_artist": canon.original_artist, + "notes": canon.notes, + "versions": versions + }) + + return result + + +@router.get("/{slug}", response_model=SongCanonRead) +def get_canon_song(slug: str, session: Session = Depends(get_session)): + """Get a canonical song with all its band-specific versions""" + canon = session.exec( + select(SongCanon).where(SongCanon.slug == slug) + ).first() + + if not canon: + raise HTTPException(status_code=404, detail="Canonical song not found") + + versions = [] + songs = session.exec( + select(Song).where(Song.canon_id == canon.id) + ).all() + + for song in songs: + vertical = session.get(Vertical, song.vertical_id) + versions.append({ + "id": song.id, + "title": song.title, + "slug": song.slug, + "vertical_id": song.vertical_id, + "vertical_name": vertical.name if vertical else "Unknown", + "vertical_slug": vertical.slug if vertical else "unknown" + }) + + return { + "id": canon.id, + "title": canon.title, + "slug": canon.slug, + "original_artist": canon.original_artist, + "notes": canon.notes, + "versions": versions + } + + +@router.get("/song/{song_id}/related", response_model=List[SongVersionRead]) +def get_related_versions(song_id: int, session: Session = Depends(get_session)): + """Get all versions of the same song across bands""" + song = session.get(Song, song_id) + if not song: + raise HTTPException(status_code=404, detail="Song not found") + + if not song.canon_id: + return [] + + # Get all songs with same canon_id (excluding this one) + related = session.exec( + select(Song) + .where(Song.canon_id == song.canon_id) + .where(Song.id != song_id) + ).all() + + result = [] + for s in related: + vertical = session.get(Vertical, s.vertical_id) + result.append({ + "id": s.id, + "title": s.title, + "slug": s.slug, + "vertical_id": s.vertical_id, + "vertical_name": vertical.name if vertical else "Unknown", + "vertical_slug": vertical.slug if vertical else "unknown" + }) + + return result