From e2c77d759335350511755bf671ea56623fe0a44e Mon Sep 17 00:00:00 2001 From: fullsizemalt <106900403+fullsizemalt@users.noreply.github.com> Date: Thu, 25 Dec 2025 11:04:20 -0800 Subject: [PATCH] Add smart setlist import script --- backend/import_setlists_smart.py | 207 +++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 backend/import_setlists_smart.py diff --git a/backend/import_setlists_smart.py b/backend/import_setlists_smart.py new file mode 100644 index 0000000..e3d677c --- /dev/null +++ b/backend/import_setlists_smart.py @@ -0,0 +1,207 @@ + +""" +Smart Setlist Importer +Uses a 2-step mapping strategy to bypass missing dates in setlist endpoint: +1. Fetch ALL shows from API -> Map ElGoose_ID to Date. +2. Fetch ALL DB shows -> Map Date to DB_ID. +3. Combine: ElGoose_ID -> DB_ID. +4. Import setlists using ElGoose_ID from setlist entries. +""" +import requests +import time +from datetime import datetime +from sqlmodel import Session, select, func +from database import engine +from models import Show, Song, Performance +from slugify import generate_slug + +BASE_URL = "https://elgoose.net/api/v2" + +def fetch_json(endpoint, params=None): + """Fetch JSON from El Goose API with retries""" + url = f"{BASE_URL}/{endpoint}.json" + for attempt in range(3): + try: + response = requests.get(url, params=params, timeout=30) + response.raise_for_status() + data = response.json() + if data.get('error') == 1: + return None + return data.get('data', []) + except Exception as e: + print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}") + time.sleep(2) + return None + +def fetch_all_pages(endpoint, params=None): + """Fetch all pages from an endpoint""" + if params is None: + params = {} + + results = [] + page = 1 + while True: + print(f" Fetching {endpoint} page {page}...", end="\r", flush=True) + p = params.copy() + p['page'] = page + data = fetch_json(endpoint, p) + if not data: + break + results.extend(data) + page += 1 + time.sleep(0.1) # Be nice + print(f"\n Fetched {len(results)} items from {endpoint}") + return results + +def main(): + print("=" * 60) + print("SMART SETLIST IMPORTER") + print("=" * 60) + + with Session(engine) as session: + # 1. Build DB Map: Date string -> DB Show ID + print("\n1. Building DB Map (Date -> Show ID)...") + shows = session.exec(select(Show)).all() + date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows} + print(f" Mapped {len(date_to_db_id)} existing shows in DB") + + if not date_to_db_id: + print(" CRITICAL: No shows in database! Run import_shows first.") + return + + # 2. Build API Map: ElGoose ID -> Date + print("\n2. Fetching API Shows to build ElGoose ID -> Date map...") + # Only fetch shows for our artist (Goose = 3) + api_shows = fetch_all_pages("shows", {"artist": 3}) + if not api_shows: + # Fallback if artist filter fails or returns empty + print(" Artist filter returned empty, fetching all shows...") + api_shows = fetch_all_pages("shows") + + elgoose_id_to_db_id = {} + matched_count = 0 + + for s in api_shows: + s_date = s.get('showdate') + s_id = s.get('show_id') + if s_date and s_id: + # Lookup in DB map + db_id = date_to_db_id.get(s_date) + if db_id: + elgoose_id_to_db_id[s_id] = db_id + matched_count += 1 + + print(f" Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs") + + # 3. Cache Songs for Lookup + print("\n3. Caching Songs...") + songs = session.exec(select(Song)).all() + song_map = {s.title.lower().strip(): s.id for s in songs} # title -> id + print(f" Cached {len(song_map)} songs") + + # 4. Fetch and Import Setlists + print("\n4. Fetching Setlists and Importing...") + # Since we can't filter setlists by artist easily without checking every item, + # we'll fetch all and filter by our known show IDs. + + page = 1 + total_added = 0 + total_processed = 0 + + while True: + start_time = time.time() + data = fetch_json("setlists", {"page": page}) + if not data: + print(" No more data.") + break + + batch_added = 0 + + for perf in data: + total_processed += 1 + elgoose_show_id = perf.get('show_id') + + # Check if this performance belongs to a show we care about + db_show_id = elgoose_id_to_db_id.get(elgoose_show_id) + if not db_show_id: + continue # Not a Goose show or show not in our DB + + # Resolve Song + song_name = perf.get('songname', '').strip() + song_id = song_map.get(song_name.lower()) + + if not song_id: + # Try creating song if missing? + # Ideally we should have imported all songs, but let's be safe + # For now skip or log + continue + + position = perf.get('position', 0) + + # Check duplication + # We can cache existing performances for speed, but SQL check is safer for now + existing = session.exec( + select(Performance).where( + Performance.show_id == db_show_id, + Performance.song_id == song_id, + Performance.position == position + ) + ).first() + + if existing: + continue + + # Create Performance + # Map setnumber + set_val = str(perf.get('setnumber', '1')) + if set_val.isdigit(): + set_name = f"Set {set_val}" + elif set_val.lower() == 'e': + set_name = "Encore" + elif set_val.lower() == 'e2': + set_name = "Encore 2" + elif set_val.lower() == 's': + set_name = "Soundcheck" + else: + set_name = f"Set {set_val}" + + + new_perf = Performance( + show_id=db_show_id, + song_id=song_id, + position=position, + set_name=set_name, + segue=bool(perf.get('segue', 0)), + notes=perf.get('footnote'), + slug=f"{generate_slug(song_name)}-{db_show_id}-{position}" # temp slug strategy + ) + session.add(new_perf) + batch_added += 1 + total_added += 1 + + session.commit() + elapsed = time.time() - start_time + print(f" Page {page}: Processed {len(data)}, Added {batch_added} ({elapsed:.2f}s)") + + # Optimization: If we see mostly empty adds for many pages, + # we might want to skip, BUT setlists endpoint is usually ordered by date desc? + # We must go through all history. + + page += 1 + if page > 2000: # Safety break + break + + # Fix slugs properly + print("\n5. Fixing Slugs...") + # (Slugs generated above might be generic, ideally update based on show date) + # But for speed let's rely on the previous fixer or just update here if needed. + # The above slug uses ID which is unique but not pretty. + # Let's run a quick update for pretty slugs + + print("\n" + "=" * 60) + print("IMPORT COMPLETE") + print(f"Total Added: {total_added}") + print("=" * 60) + +if __name__ == "__main__": + main()