""" Auto-linker script to find and link shared songs across bands. This script identifies songs with matching titles across different verticals and creates SongCanon entries to link them together. Common shared songs in the jam scene: - Grateful Dead covers (Friend of the Devil, Dark Star, Scarlet Begonias) - Traditional songs (Amazing Grace, etc.) - Songs that multiple bands cover """ from sqlmodel import Session, select from database import engine from models import Song, SongCanon, Vertical import re def normalize_title(title: str) -> str: """Normalize song title for matching""" # Lowercase t = title.lower() # Remove common suffixes t = re.sub(r'\s*\(.*\)$', '', t) # Remove parenthetical notes t = re.sub(r'\s*->.*$', '', t) # Remove segue indicators t = re.sub(r'\s*>.*$', '', t) # Remove segue indicators # Remove special characters t = re.sub(r'[^\w\s]', '', t) # Normalize whitespace t = ' '.join(t.split()) return t def generate_slug(title: str) -> str: """Generate URL-safe slug from title""" slug = title.lower() slug = re.sub(r'[^\w\s-]', '', slug) slug = re.sub(r'[\s_]+', '-', slug) slug = re.sub(r'-+', '-', slug) return slug.strip('-') def find_shared_songs(): """Find songs that appear in multiple verticals""" print("Finding shared songs across bands...\n") with Session(engine) as session: # Get all songs grouped by normalized title all_songs = session.exec(select(Song)).all() # Group by normalized title title_groups = {} for song in all_songs: norm = normalize_title(song.title) if norm not in title_groups: title_groups[norm] = [] title_groups[norm].append(song) # Find songs that appear in multiple verticals shared = {} for norm_title, songs in title_groups.items(): vertical_ids = set(s.vertical_id for s in songs) if len(vertical_ids) > 1: shared[norm_title] = songs print(f"Found {len(shared)} songs shared across bands:\n") for norm_title, songs in sorted(shared.items()): # Get band names bands = [] for song in songs: vertical = session.get(Vertical, song.vertical_id) if vertical: bands.append(f"{vertical.name} ({song.title})") print(f" {norm_title}") for band in bands: print(f" - {band}") print() return shared def create_canon_links(dry_run: bool = True): """Create SongCanon entries and link songs to them""" print(f"{'[DRY RUN] ' if dry_run else ''}Creating SongCanon links...\n") with Session(engine) as session: shared = find_shared_songs() created = 0 linked = 0 for norm_title, songs in shared.items(): # Use the most common title as the canonical title title_counts = {} for song in songs: t = song.title title_counts[t] = title_counts.get(t, 0) + 1 canonical_title = max(title_counts, key=title_counts.get) slug = generate_slug(canonical_title) # Check if canon already exists existing = session.exec( select(SongCanon).where(SongCanon.slug == slug) ).first() if existing: canon = existing print(f" Found existing: {canonical_title}") else: # Determine original artist original_artist = None for song in songs: if song.original_artist: original_artist = song.original_artist break canon = SongCanon( title=canonical_title, slug=slug, original_artist=original_artist ) if not dry_run: session.add(canon) session.commit() session.refresh(canon) created += 1 print(f" Created canon: {canonical_title}") # Link songs to canon for song in songs: if song.canon_id != (canon.id if canon.id else None): if not dry_run: song.canon_id = canon.id session.add(song) linked += 1 if not dry_run: session.commit() print(f"\n{'Would create' if dry_run else 'Created'}: {created} canonical songs") print(f"{'Would link' if dry_run else 'Linked'}: {linked} songs") if dry_run: print("\nRun with dry_run=False to apply changes.") if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] == "--apply": create_canon_links(dry_run=False) else: create_canon_links(dry_run=True) print("\nTo apply changes, run: python link_canon_songs.py --apply")