""" Smart Setlist Importer Uses a 2-step mapping strategy to bypass missing dates in setlist endpoint: 1. Fetch ALL shows from API -> Map ElGoose_ID to Date. 2. Fetch ALL DB shows -> Map Date to DB_ID. 3. Combine: ElGoose_ID -> DB_ID. 4. Import setlists using ElGoose_ID from setlist entries. """ import requests import time from datetime import datetime from sqlmodel import Session, select, func from database import engine from models import Show, Song, Performance from slugify import generate_slug BASE_URL = "https://elgoose.net/api/v2" def fetch_json(endpoint, params=None): """Fetch JSON from El Goose API with retries""" url = f"{BASE_URL}/{endpoint}.json" for attempt in range(3): try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() if data.get('error') == 1: return None return data.get('data', []) except Exception as e: print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}") time.sleep(2) return None def fetch_all_pages(endpoint, params=None): """Fetch all pages from an endpoint""" if params is None: params = {} results = [] page = 1 while True: print(f" Fetching {endpoint} page {page}...", end="\r", flush=True) p = params.copy() p['page'] = page data = fetch_json(endpoint, p) if not data: break results.extend(data) page += 1 time.sleep(0.1) # Be nice print(f"\n Fetched {len(results)} items from {endpoint}") return results def main(): print("=" * 60) print("SMART SETLIST IMPORTER") print("=" * 60) with Session(engine) as session: # 1. Build DB Map: Date string -> DB Show ID print("\n1. Building DB Map (Date -> Show ID)...") shows = session.exec(select(Show)).all() date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows} print(f" Mapped {len(date_to_db_id)} existing shows in DB") if not date_to_db_id: print(" CRITICAL: No shows in database! Run import_shows first.") return # 2. Build API Map: ElGoose ID -> Date print("\n2. Fetching API Shows to build ElGoose ID -> Date map...") # Only fetch shows for our artist (Goose = 3) api_shows = fetch_all_pages("shows", {"artist": 3}) if not api_shows: # Fallback if artist filter fails or returns empty print(" Artist filter returned empty, fetching all shows...") api_shows = fetch_all_pages("shows") elgoose_id_to_db_id = {} matched_count = 0 for s in api_shows: s_date = s.get('showdate') s_id = s.get('show_id') if s_date and s_id: # Lookup in DB map db_id = date_to_db_id.get(s_date) if db_id: elgoose_id_to_db_id[s_id] = db_id matched_count += 1 print(f" Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs") # 3. Cache Songs for Lookup print("\n3. Caching Songs...") songs = session.exec(select(Song)).all() song_map = {s.title.lower().strip(): s.id for s in songs} # title -> id print(f" Cached {len(song_map)} songs") # 4. Fetch and Import Setlists print("\n4. Fetching Setlists and Importing...") # Since we can't filter setlists by artist easily without checking every item, # we'll fetch all and filter by our known show IDs. page = 1 total_added = 0 total_processed = 0 while True: start_time = time.time() data = fetch_json("setlists", {"page": page}) if not data: print(" No more data.") break batch_added = 0 for perf in data: total_processed += 1 elgoose_show_id = perf.get('show_id') # Check if this performance belongs to a show we care about db_show_id = elgoose_id_to_db_id.get(elgoose_show_id) if not db_show_id: continue # Not a Goose show or show not in our DB # Resolve Song song_name = perf.get('songname', '').strip() song_id = song_map.get(song_name.lower()) if not song_id: # Try creating song if missing? # Ideally we should have imported all songs, but let's be safe # For now skip or log continue position = perf.get('position', 0) # Check duplication # We can cache existing performances for speed, but SQL check is safer for now existing = session.exec( select(Performance).where( Performance.show_id == db_show_id, Performance.song_id == song_id, Performance.position == position ) ).first() if existing: continue # Create Performance # Map setnumber set_val = str(perf.get('setnumber', '1')) if set_val.isdigit(): set_name = f"Set {set_val}" elif set_val.lower() == 'e': set_name = "Encore" elif set_val.lower() == 'e2': set_name = "Encore 2" elif set_val.lower() == 's': set_name = "Soundcheck" else: set_name = f"Set {set_val}" new_perf = Performance( show_id=db_show_id, song_id=song_id, position=position, set_name=set_name, segue=bool(perf.get('segue', 0)), notes=perf.get('footnote'), slug=f"{generate_slug(song_name)}-{db_show_id}-{position}" # temp slug strategy ) session.add(new_perf) batch_added += 1 total_added += 1 session.commit() elapsed = time.time() - start_time print(f" Page {page}: Processed {len(data)}, Added {batch_added} ({elapsed:.2f}s)") # Optimization: If we see mostly empty adds for many pages, # we might want to skip, BUT setlists endpoint is usually ordered by date desc? # We must go through all history. page += 1 if page > 2000: # Safety break break # Fix slugs properly print("\n5. Fixing Slugs...") # (Slugs generated above might be generic, ideally update based on show date) # But for speed let's rely on the previous fixer or just update here if needed. # The above slug uses ID which is unique but not pretty. # Let's run a quick update for pretty slugs print("\n" + "=" * 60) print("IMPORT COMPLETE") print(f"Total Added: {total_added}") print("=" * 60) if __name__ == "__main__": main()