feat: Add SongCanon API for cross-band song linking

- Add routers/canon.py with endpoints:
  - GET /canon - list all canonical songs with versions
  - GET /canon/{slug} - get canon with all band versions
  - GET /canon/song/{id}/related - get related versions
- Add link_canon_songs.py auto-linker script
  - Finds songs with same title across bands
  - Creates SongCanon entries automatically
  - Run with --apply to execute
This commit is contained in:
fullsizemalt 2025-12-28 16:28:58 -08:00
parent 19c5e97e7f
commit 5b236608f8
3 changed files with 303 additions and 1 deletions

159
backend/link_canon_songs.py Normal file
View file

@ -0,0 +1,159 @@
"""
Auto-linker script to find and link shared songs across bands.
This script identifies songs with matching titles across different verticals
and creates SongCanon entries to link them together.
Common shared songs in the jam scene:
- Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias)
- Traditional songs (Amazing Grace, etc.)
- Songs that multiple bands cover
"""
from sqlmodel import Session, select
from database import engine
from models import Song, SongCanon, Vertical
import re
def normalize_title(title: str) -> str:
"""Normalize song title for matching"""
# Lowercase
t = title.lower()
# Remove common suffixes
t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes
t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators
t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators
# Remove special characters
t = re.sub(r'[^\w\s]', '', t)
# Normalize whitespace
t = ' '.join(t.split())
return t
def generate_slug(title: str) -> str:
"""Generate URL-safe slug from title"""
slug = title.lower()
slug = re.sub(r'[^\w\s-]', '', slug)
slug = re.sub(r'[\s_]+', '-', slug)
slug = re.sub(r'-+', '-', slug)
return slug.strip('-')
def find_shared_songs():
"""Find songs that appear in multiple verticals"""
print("Finding shared songs across bands...\n")
with Session(engine) as session:
# Get all songs grouped by normalized title
all_songs = session.exec(select(Song)).all()
# Group by normalized title
title_groups = {}
for song in all_songs:
norm = normalize_title(song.title)
if norm not in title_groups:
title_groups[norm] = []
title_groups[norm].append(song)
# Find songs that appear in multiple verticals
shared = {}
for norm_title, songs in title_groups.items():
vertical_ids = set(s.vertical_id for s in songs)
if len(vertical_ids) > 1:
shared[norm_title] = songs
print(f"Found {len(shared)} songs shared across bands:\n")
for norm_title, songs in sorted(shared.items()):
# Get band names
bands = []
for song in songs:
vertical = session.get(Vertical, song.vertical_id)
if vertical:
bands.append(f"{vertical.name} ({song.title})")
print(f" {norm_title}")
for band in bands:
print(f" - {band}")
print()
return shared
def create_canon_links(dry_run: bool = True):
"""Create SongCanon entries and link songs to them"""
print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n")
with Session(engine) as session:
shared = find_shared_songs()
created = 0
linked = 0
for norm_title, songs in shared.items():
# Use the most common title as the canonical title
title_counts = {}
for song in songs:
t = song.title
title_counts[t] = title_counts.get(t, 0) + 1
canonical_title = max(title_counts, key=title_counts.get)
slug = generate_slug(canonical_title)
# Check if canon already exists
existing = session.exec(
select(SongCanon).where(SongCanon.slug == slug)
).first()
if existing:
canon = existing
print(f" Found existing: {canonical_title}")
else:
# Determine original artist
original_artist = None
for song in songs:
if song.original_artist:
original_artist = song.original_artist
break
canon = SongCanon(
title=canonical_title,
slug=slug,
original_artist=original_artist
)
if not dry_run:
session.add(canon)
session.commit()
session.refresh(canon)
created += 1
print(f" Created canon: {canonical_title}")
# Link songs to canon
for song in songs:
if song.canon_id != (canon.id if canon.id else None):
if not dry_run:
song.canon_id = canon.id
session.add(song)
linked += 1
if not dry_run:
session.commit()
print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs")
print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs")
if dry_run:
print("\nRun with dry_run=False to apply changes.")
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "--apply":
create_canon_links(dry_run=False)
else:
create_canon_links(dry_run=True)
print("\nTo apply changes, run: python link_canon_songs.py --apply")

View file

@ -1,6 +1,6 @@
from fastapi import FastAPI
import os
from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals
from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals, canon
from fastapi.middleware.cors import CORSMiddleware
@ -45,6 +45,7 @@ app.include_router(videos.router)
app.include_router(musicians.router)
app.include_router(sequences.router)
app.include_router(verticals.router)
app.include_router(canon.router)
# Optional features - can be disabled via env vars

142
backend/routers/canon.py Normal file
View file

@ -0,0 +1,142 @@
from fastapi import APIRouter, Depends, HTTPException
from sqlmodel import Session, select
from typing import List
from database import get_session
from models import SongCanon, Song, Vertical
from pydantic import BaseModel
router = APIRouter(prefix="/canon", tags=["canon"])
class SongVersionRead(BaseModel):
id: int
title: str
slug: str | None
vertical_id: int
vertical_name: str
vertical_slug: str
class SongCanonRead(BaseModel):
id: int
title: str
slug: str
original_artist: str | None
notes: str | None
versions: List[SongVersionRead]
class SongCanonCreate(BaseModel):
title: str
slug: str
original_artist: str | None = None
notes: str | None = None
@router.get("/", response_model=List[SongCanonRead])
def list_canon_songs(
limit: int = 50,
offset: int = 0,
session: Session = Depends(get_session)
):
"""List all canonical songs with their cross-band versions"""
canons = session.exec(
select(SongCanon).offset(offset).limit(limit)
).all()
result = []
for canon in canons:
versions = []
songs = session.exec(
select(Song).where(Song.canon_id == canon.id)
).all()
for song in songs:
vertical = session.get(Vertical, song.vertical_id)
versions.append({
"id": song.id,
"title": song.title,
"slug": song.slug,
"vertical_id": song.vertical_id,
"vertical_name": vertical.name if vertical else "Unknown",
"vertical_slug": vertical.slug if vertical else "unknown"
})
result.append({
"id": canon.id,
"title": canon.title,
"slug": canon.slug,
"original_artist": canon.original_artist,
"notes": canon.notes,
"versions": versions
})
return result
@router.get("/{slug}", response_model=SongCanonRead)
def get_canon_song(slug: str, session: Session = Depends(get_session)):
"""Get a canonical song with all its band-specific versions"""
canon = session.exec(
select(SongCanon).where(SongCanon.slug == slug)
).first()
if not canon:
raise HTTPException(status_code=404, detail="Canonical song not found")
versions = []
songs = session.exec(
select(Song).where(Song.canon_id == canon.id)
).all()
for song in songs:
vertical = session.get(Vertical, song.vertical_id)
versions.append({
"id": song.id,
"title": song.title,
"slug": song.slug,
"vertical_id": song.vertical_id,
"vertical_name": vertical.name if vertical else "Unknown",
"vertical_slug": vertical.slug if vertical else "unknown"
})
return {
"id": canon.id,
"title": canon.title,
"slug": canon.slug,
"original_artist": canon.original_artist,
"notes": canon.notes,
"versions": versions
}
@router.get("/song/{song_id}/related", response_model=List[SongVersionRead])
def get_related_versions(song_id: int, session: Session = Depends(get_session)):
"""Get all versions of the same song across bands"""
song = session.get(Song, song_id)
if not song:
raise HTTPException(status_code=404, detail="Song not found")
if not song.canon_id:
return []
# Get all songs with same canon_id (excluding this one)
related = session.exec(
select(Song)
.where(Song.canon_id == song.canon_id)
.where(Song.id != song_id)
).all()
result = []
for s in related:
vertical = session.get(Vertical, s.vertical_id)
result.append({
"id": s.id,
"title": s.title,
"slug": s.slug,
"vertical_id": s.vertical_id,
"vertical_name": vertical.name if vertical else "Unknown",
"vertical_slug": vertical.slug if vertical else "unknown"
})
return result