fediversion/backend/link_canon_songs.py
fullsizemalt 5b236608f8 feat: Add SongCanon API for cross-band song linking
- Add routers/canon.py with endpoints:
  - GET /canon - list all canonical songs with versions
  - GET /canon/{slug} - get canon with all band versions
  - GET /canon/song/{id}/related - get related versions
- Add link_canon_songs.py auto-linker script
  - Finds songs with same title across bands
  - Creates SongCanon entries automatically
  - Run with --apply to execute
2025-12-28 16:28:58 -08:00

159 lines
5.2 KiB
Python

"""
Auto-linker script to find and link shared songs across bands.
This script identifies songs with matching titles across different verticals
and creates SongCanon entries to link them together.
Common shared songs in the jam scene:
- Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias)
- Traditional songs (Amazing Grace, etc.)
- Songs that multiple bands cover
"""
from sqlmodel import Session, select
from database import engine
from models import Song, SongCanon, Vertical
import re
def normalize_title(title: str) -> str:
"""Normalize song title for matching"""
# Lowercase
t = title.lower()
# Remove common suffixes
t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes
t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators
t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators
# Remove special characters
t = re.sub(r'[^\w\s]', '', t)
# Normalize whitespace
t = ' '.join(t.split())
return t
def generate_slug(title: str) -> str:
"""Generate URL-safe slug from title"""
slug = title.lower()
slug = re.sub(r'[^\w\s-]', '', slug)
slug = re.sub(r'[\s_]+', '-', slug)
slug = re.sub(r'-+', '-', slug)
return slug.strip('-')
def find_shared_songs():
"""Find songs that appear in multiple verticals"""
print("Finding shared songs across bands...\n")
with Session(engine) as session:
# Get all songs grouped by normalized title
all_songs = session.exec(select(Song)).all()
# Group by normalized title
title_groups = {}
for song in all_songs:
norm = normalize_title(song.title)
if norm not in title_groups:
title_groups[norm] = []
title_groups[norm].append(song)
# Find songs that appear in multiple verticals
shared = {}
for norm_title, songs in title_groups.items():
vertical_ids = set(s.vertical_id for s in songs)
if len(vertical_ids) > 1:
shared[norm_title] = songs
print(f"Found {len(shared)} songs shared across bands:\n")
for norm_title, songs in sorted(shared.items()):
# Get band names
bands = []
for song in songs:
vertical = session.get(Vertical, song.vertical_id)
if vertical:
bands.append(f"{vertical.name} ({song.title})")
print(f" {norm_title}")
for band in bands:
print(f" - {band}")
print()
return shared
def create_canon_links(dry_run: bool = True):
"""Create SongCanon entries and link songs to them"""
print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n")
with Session(engine) as session:
shared = find_shared_songs()
created = 0
linked = 0
for norm_title, songs in shared.items():
# Use the most common title as the canonical title
title_counts = {}
for song in songs:
t = song.title
title_counts[t] = title_counts.get(t, 0) + 1
canonical_title = max(title_counts, key=title_counts.get)
slug = generate_slug(canonical_title)
# Check if canon already exists
existing = session.exec(
select(SongCanon).where(SongCanon.slug == slug)
).first()
if existing:
canon = existing
print(f" Found existing: {canonical_title}")
else:
# Determine original artist
original_artist = None
for song in songs:
if song.original_artist:
original_artist = song.original_artist
break
canon = SongCanon(
title=canonical_title,
slug=slug,
original_artist=original_artist
)
if not dry_run:
session.add(canon)
session.commit()
session.refresh(canon)
created += 1
print(f" Created canon: {canonical_title}")
# Link songs to canon
for song in songs:
if song.canon_id != (canon.id if canon.id else None):
if not dry_run:
song.canon_id = canon.id
session.add(song)
linked += 1
if not dry_run:
session.commit()
print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs")
print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs")
if dry_run:
print("\nRun with dry_run=False to apply changes.")
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "--apply":
create_canon_links(dry_run=False)
else:
create_canon_links(dry_run=True)
print("\nTo apply changes, run: python link_canon_songs.py --apply")