- Add routers/canon.py with endpoints:
- GET /canon - list all canonical songs with versions
- GET /canon/{slug} - get canon with all band versions
- GET /canon/song/{id}/related - get related versions
- Add link_canon_songs.py auto-linker script
- Finds songs with same title across bands
- Creates SongCanon entries automatically
- Run with --apply to execute
159 lines
5.2 KiB
Python
159 lines
5.2 KiB
Python
"""
|
|
Auto-linker script to find and link shared songs across bands.
|
|
|
|
This script identifies songs with matching titles across different verticals
|
|
and creates SongCanon entries to link them together.
|
|
|
|
Common shared songs in the jam scene:
|
|
- Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias)
|
|
- Traditional songs (Amazing Grace, etc.)
|
|
- Songs that multiple bands cover
|
|
"""
|
|
|
|
from sqlmodel import Session, select
|
|
from database import engine
|
|
from models import Song, SongCanon, Vertical
|
|
import re
|
|
|
|
|
|
def normalize_title(title: str) -> str:
|
|
"""Normalize song title for matching"""
|
|
# Lowercase
|
|
t = title.lower()
|
|
# Remove common suffixes
|
|
t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes
|
|
t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators
|
|
t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators
|
|
# Remove special characters
|
|
t = re.sub(r'[^\w\s]', '', t)
|
|
# Normalize whitespace
|
|
t = ' '.join(t.split())
|
|
return t
|
|
|
|
|
|
def generate_slug(title: str) -> str:
|
|
"""Generate URL-safe slug from title"""
|
|
slug = title.lower()
|
|
slug = re.sub(r'[^\w\s-]', '', slug)
|
|
slug = re.sub(r'[\s_]+', '-', slug)
|
|
slug = re.sub(r'-+', '-', slug)
|
|
return slug.strip('-')
|
|
|
|
|
|
def find_shared_songs():
|
|
"""Find songs that appear in multiple verticals"""
|
|
print("Finding shared songs across bands...\n")
|
|
|
|
with Session(engine) as session:
|
|
# Get all songs grouped by normalized title
|
|
all_songs = session.exec(select(Song)).all()
|
|
|
|
# Group by normalized title
|
|
title_groups = {}
|
|
for song in all_songs:
|
|
norm = normalize_title(song.title)
|
|
if norm not in title_groups:
|
|
title_groups[norm] = []
|
|
title_groups[norm].append(song)
|
|
|
|
# Find songs that appear in multiple verticals
|
|
shared = {}
|
|
for norm_title, songs in title_groups.items():
|
|
vertical_ids = set(s.vertical_id for s in songs)
|
|
if len(vertical_ids) > 1:
|
|
shared[norm_title] = songs
|
|
|
|
print(f"Found {len(shared)} songs shared across bands:\n")
|
|
|
|
for norm_title, songs in sorted(shared.items()):
|
|
# Get band names
|
|
bands = []
|
|
for song in songs:
|
|
vertical = session.get(Vertical, song.vertical_id)
|
|
if vertical:
|
|
bands.append(f"{vertical.name} ({song.title})")
|
|
|
|
print(f" {norm_title}")
|
|
for band in bands:
|
|
print(f" - {band}")
|
|
print()
|
|
|
|
return shared
|
|
|
|
|
|
def create_canon_links(dry_run: bool = True):
|
|
"""Create SongCanon entries and link songs to them"""
|
|
print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n")
|
|
|
|
with Session(engine) as session:
|
|
shared = find_shared_songs()
|
|
|
|
created = 0
|
|
linked = 0
|
|
|
|
for norm_title, songs in shared.items():
|
|
# Use the most common title as the canonical title
|
|
title_counts = {}
|
|
for song in songs:
|
|
t = song.title
|
|
title_counts[t] = title_counts.get(t, 0) + 1
|
|
|
|
canonical_title = max(title_counts, key=title_counts.get)
|
|
slug = generate_slug(canonical_title)
|
|
|
|
# Check if canon already exists
|
|
existing = session.exec(
|
|
select(SongCanon).where(SongCanon.slug == slug)
|
|
).first()
|
|
|
|
if existing:
|
|
canon = existing
|
|
print(f" Found existing: {canonical_title}")
|
|
else:
|
|
# Determine original artist
|
|
original_artist = None
|
|
for song in songs:
|
|
if song.original_artist:
|
|
original_artist = song.original_artist
|
|
break
|
|
|
|
canon = SongCanon(
|
|
title=canonical_title,
|
|
slug=slug,
|
|
original_artist=original_artist
|
|
)
|
|
|
|
if not dry_run:
|
|
session.add(canon)
|
|
session.commit()
|
|
session.refresh(canon)
|
|
|
|
created += 1
|
|
print(f" Created canon: {canonical_title}")
|
|
|
|
# Link songs to canon
|
|
for song in songs:
|
|
if song.canon_id != (canon.id if canon.id else None):
|
|
if not dry_run:
|
|
song.canon_id = canon.id
|
|
session.add(song)
|
|
linked += 1
|
|
|
|
if not dry_run:
|
|
session.commit()
|
|
|
|
print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs")
|
|
print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs")
|
|
|
|
if dry_run:
|
|
print("\nRun with dry_run=False to apply changes.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1 and sys.argv[1] == "--apply":
|
|
create_canon_links(dry_run=False)
|
|
else:
|
|
create_canon_links(dry_run=True)
|
|
print("\nTo apply changes, run: python link_canon_songs.py --apply")
|