feat: Add SongCanon API for cross-band song linking
- Add routers/canon.py with endpoints:
- GET /canon - list all canonical songs with versions
- GET /canon/{slug} - get canon with all band versions
- GET /canon/song/{id}/related - get related versions
- Add link_canon_songs.py auto-linker script
- Finds songs with same title across bands
- Creates SongCanon entries automatically
- Run with --apply to execute
This commit is contained in:
parent
19c5e97e7f
commit
5b236608f8
3 changed files with 303 additions and 1 deletions
159
backend/link_canon_songs.py
Normal file
159
backend/link_canon_songs.py
Normal file
|
|
@ -0,0 +1,159 @@
|
||||||
|
"""
|
||||||
|
Auto-linker script to find and link shared songs across bands.
|
||||||
|
|
||||||
|
This script identifies songs with matching titles across different verticals
|
||||||
|
and creates SongCanon entries to link them together.
|
||||||
|
|
||||||
|
Common shared songs in the jam scene:
|
||||||
|
- Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias)
|
||||||
|
- Traditional songs (Amazing Grace, etc.)
|
||||||
|
- Songs that multiple bands cover
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
from database import engine
|
||||||
|
from models import Song, SongCanon, Vertical
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_title(title: str) -> str:
|
||||||
|
"""Normalize song title for matching"""
|
||||||
|
# Lowercase
|
||||||
|
t = title.lower()
|
||||||
|
# Remove common suffixes
|
||||||
|
t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes
|
||||||
|
t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators
|
||||||
|
t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators
|
||||||
|
# Remove special characters
|
||||||
|
t = re.sub(r'[^\w\s]', '', t)
|
||||||
|
# Normalize whitespace
|
||||||
|
t = ' '.join(t.split())
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
def generate_slug(title: str) -> str:
|
||||||
|
"""Generate URL-safe slug from title"""
|
||||||
|
slug = title.lower()
|
||||||
|
slug = re.sub(r'[^\w\s-]', '', slug)
|
||||||
|
slug = re.sub(r'[\s_]+', '-', slug)
|
||||||
|
slug = re.sub(r'-+', '-', slug)
|
||||||
|
return slug.strip('-')
|
||||||
|
|
||||||
|
|
||||||
|
def find_shared_songs():
|
||||||
|
"""Find songs that appear in multiple verticals"""
|
||||||
|
print("Finding shared songs across bands...\n")
|
||||||
|
|
||||||
|
with Session(engine) as session:
|
||||||
|
# Get all songs grouped by normalized title
|
||||||
|
all_songs = session.exec(select(Song)).all()
|
||||||
|
|
||||||
|
# Group by normalized title
|
||||||
|
title_groups = {}
|
||||||
|
for song in all_songs:
|
||||||
|
norm = normalize_title(song.title)
|
||||||
|
if norm not in title_groups:
|
||||||
|
title_groups[norm] = []
|
||||||
|
title_groups[norm].append(song)
|
||||||
|
|
||||||
|
# Find songs that appear in multiple verticals
|
||||||
|
shared = {}
|
||||||
|
for norm_title, songs in title_groups.items():
|
||||||
|
vertical_ids = set(s.vertical_id for s in songs)
|
||||||
|
if len(vertical_ids) > 1:
|
||||||
|
shared[norm_title] = songs
|
||||||
|
|
||||||
|
print(f"Found {len(shared)} songs shared across bands:\n")
|
||||||
|
|
||||||
|
for norm_title, songs in sorted(shared.items()):
|
||||||
|
# Get band names
|
||||||
|
bands = []
|
||||||
|
for song in songs:
|
||||||
|
vertical = session.get(Vertical, song.vertical_id)
|
||||||
|
if vertical:
|
||||||
|
bands.append(f"{vertical.name} ({song.title})")
|
||||||
|
|
||||||
|
print(f" {norm_title}")
|
||||||
|
for band in bands:
|
||||||
|
print(f" - {band}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
return shared
|
||||||
|
|
||||||
|
|
||||||
|
def create_canon_links(dry_run: bool = True):
|
||||||
|
"""Create SongCanon entries and link songs to them"""
|
||||||
|
print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n")
|
||||||
|
|
||||||
|
with Session(engine) as session:
|
||||||
|
shared = find_shared_songs()
|
||||||
|
|
||||||
|
created = 0
|
||||||
|
linked = 0
|
||||||
|
|
||||||
|
for norm_title, songs in shared.items():
|
||||||
|
# Use the most common title as the canonical title
|
||||||
|
title_counts = {}
|
||||||
|
for song in songs:
|
||||||
|
t = song.title
|
||||||
|
title_counts[t] = title_counts.get(t, 0) + 1
|
||||||
|
|
||||||
|
canonical_title = max(title_counts, key=title_counts.get)
|
||||||
|
slug = generate_slug(canonical_title)
|
||||||
|
|
||||||
|
# Check if canon already exists
|
||||||
|
existing = session.exec(
|
||||||
|
select(SongCanon).where(SongCanon.slug == slug)
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
canon = existing
|
||||||
|
print(f" Found existing: {canonical_title}")
|
||||||
|
else:
|
||||||
|
# Determine original artist
|
||||||
|
original_artist = None
|
||||||
|
for song in songs:
|
||||||
|
if song.original_artist:
|
||||||
|
original_artist = song.original_artist
|
||||||
|
break
|
||||||
|
|
||||||
|
canon = SongCanon(
|
||||||
|
title=canonical_title,
|
||||||
|
slug=slug,
|
||||||
|
original_artist=original_artist
|
||||||
|
)
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
session.add(canon)
|
||||||
|
session.commit()
|
||||||
|
session.refresh(canon)
|
||||||
|
|
||||||
|
created += 1
|
||||||
|
print(f" Created canon: {canonical_title}")
|
||||||
|
|
||||||
|
# Link songs to canon
|
||||||
|
for song in songs:
|
||||||
|
if song.canon_id != (canon.id if canon.id else None):
|
||||||
|
if not dry_run:
|
||||||
|
song.canon_id = canon.id
|
||||||
|
session.add(song)
|
||||||
|
linked += 1
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs")
|
||||||
|
print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
print("\nRun with dry_run=False to apply changes.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if len(sys.argv) > 1 and sys.argv[1] == "--apply":
|
||||||
|
create_canon_links(dry_run=False)
|
||||||
|
else:
|
||||||
|
create_canon_links(dry_run=True)
|
||||||
|
print("\nTo apply changes, run: python link_canon_songs.py --apply")
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
import os
|
import os
|
||||||
from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals
|
from routers import auth, shows, venues, songs, social, tours, artists, preferences, reviews, badges, nicknames, moderation, attendance, groups, users, search, performances, notifications, feed, leaderboards, stats, admin, chase, gamification, videos, musicians, sequences, verticals, canon
|
||||||
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
|
@ -45,6 +45,7 @@ app.include_router(videos.router)
|
||||||
app.include_router(musicians.router)
|
app.include_router(musicians.router)
|
||||||
app.include_router(sequences.router)
|
app.include_router(sequences.router)
|
||||||
app.include_router(verticals.router)
|
app.include_router(verticals.router)
|
||||||
|
app.include_router(canon.router)
|
||||||
|
|
||||||
|
|
||||||
# Optional features - can be disabled via env vars
|
# Optional features - can be disabled via env vars
|
||||||
|
|
|
||||||
142
backend/routers/canon.py
Normal file
142
backend/routers/canon.py
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
from typing import List
|
||||||
|
from database import get_session
|
||||||
|
from models import SongCanon, Song, Vertical
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/canon", tags=["canon"])
|
||||||
|
|
||||||
|
|
||||||
|
class SongVersionRead(BaseModel):
|
||||||
|
id: int
|
||||||
|
title: str
|
||||||
|
slug: str | None
|
||||||
|
vertical_id: int
|
||||||
|
vertical_name: str
|
||||||
|
vertical_slug: str
|
||||||
|
|
||||||
|
|
||||||
|
class SongCanonRead(BaseModel):
|
||||||
|
id: int
|
||||||
|
title: str
|
||||||
|
slug: str
|
||||||
|
original_artist: str | None
|
||||||
|
notes: str | None
|
||||||
|
versions: List[SongVersionRead]
|
||||||
|
|
||||||
|
|
||||||
|
class SongCanonCreate(BaseModel):
|
||||||
|
title: str
|
||||||
|
slug: str
|
||||||
|
original_artist: str | None = None
|
||||||
|
notes: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/", response_model=List[SongCanonRead])
|
||||||
|
def list_canon_songs(
|
||||||
|
limit: int = 50,
|
||||||
|
offset: int = 0,
|
||||||
|
session: Session = Depends(get_session)
|
||||||
|
):
|
||||||
|
"""List all canonical songs with their cross-band versions"""
|
||||||
|
canons = session.exec(
|
||||||
|
select(SongCanon).offset(offset).limit(limit)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for canon in canons:
|
||||||
|
versions = []
|
||||||
|
songs = session.exec(
|
||||||
|
select(Song).where(Song.canon_id == canon.id)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
for song in songs:
|
||||||
|
vertical = session.get(Vertical, song.vertical_id)
|
||||||
|
versions.append({
|
||||||
|
"id": song.id,
|
||||||
|
"title": song.title,
|
||||||
|
"slug": song.slug,
|
||||||
|
"vertical_id": song.vertical_id,
|
||||||
|
"vertical_name": vertical.name if vertical else "Unknown",
|
||||||
|
"vertical_slug": vertical.slug if vertical else "unknown"
|
||||||
|
})
|
||||||
|
|
||||||
|
result.append({
|
||||||
|
"id": canon.id,
|
||||||
|
"title": canon.title,
|
||||||
|
"slug": canon.slug,
|
||||||
|
"original_artist": canon.original_artist,
|
||||||
|
"notes": canon.notes,
|
||||||
|
"versions": versions
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{slug}", response_model=SongCanonRead)
|
||||||
|
def get_canon_song(slug: str, session: Session = Depends(get_session)):
|
||||||
|
"""Get a canonical song with all its band-specific versions"""
|
||||||
|
canon = session.exec(
|
||||||
|
select(SongCanon).where(SongCanon.slug == slug)
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not canon:
|
||||||
|
raise HTTPException(status_code=404, detail="Canonical song not found")
|
||||||
|
|
||||||
|
versions = []
|
||||||
|
songs = session.exec(
|
||||||
|
select(Song).where(Song.canon_id == canon.id)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
for song in songs:
|
||||||
|
vertical = session.get(Vertical, song.vertical_id)
|
||||||
|
versions.append({
|
||||||
|
"id": song.id,
|
||||||
|
"title": song.title,
|
||||||
|
"slug": song.slug,
|
||||||
|
"vertical_id": song.vertical_id,
|
||||||
|
"vertical_name": vertical.name if vertical else "Unknown",
|
||||||
|
"vertical_slug": vertical.slug if vertical else "unknown"
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": canon.id,
|
||||||
|
"title": canon.title,
|
||||||
|
"slug": canon.slug,
|
||||||
|
"original_artist": canon.original_artist,
|
||||||
|
"notes": canon.notes,
|
||||||
|
"versions": versions
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/song/{song_id}/related", response_model=List[SongVersionRead])
|
||||||
|
def get_related_versions(song_id: int, session: Session = Depends(get_session)):
|
||||||
|
"""Get all versions of the same song across bands"""
|
||||||
|
song = session.get(Song, song_id)
|
||||||
|
if not song:
|
||||||
|
raise HTTPException(status_code=404, detail="Song not found")
|
||||||
|
|
||||||
|
if not song.canon_id:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get all songs with same canon_id (excluding this one)
|
||||||
|
related = session.exec(
|
||||||
|
select(Song)
|
||||||
|
.where(Song.canon_id == song.canon_id)
|
||||||
|
.where(Song.id != song_id)
|
||||||
|
).all()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for s in related:
|
||||||
|
vertical = session.get(Vertical, s.vertical_id)
|
||||||
|
result.append({
|
||||||
|
"id": s.id,
|
||||||
|
"title": s.title,
|
||||||
|
"slug": s.slug,
|
||||||
|
"vertical_id": s.vertical_id,
|
||||||
|
"vertical_name": vertical.name if vertical else "Unknown",
|
||||||
|
"vertical_slug": vertical.slug if vertical else "unknown"
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
Loading…
Add table
Reference in a new issue