""" Smart Setlist Importer (Streaming Version) Reducing memory usage by processing data in streams instead of bulk loading. """ import requests import time import gc from datetime import datetime from sqlmodel import Session, select from database import engine from models import Show, Song, Performance from slugify import generate_slug BASE_URL = "https://elgoose.net/api/v2" def fetch_json(endpoint, params=None): """Fetch JSON from El Goose API with retries""" url = f"{BASE_URL}/{endpoint}.json" for attempt in range(3): try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() if data.get('error') == 1: return None return data.get('data', []) except Exception as e: print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}") time.sleep(2) return None def main(): print("=" * 60) print("SMART SETLIST IMPORTER (STREAMING)") print("=" * 60) with Session(engine) as session: # 1. Build DB Map: Date string -> DB Show ID print("\n1. Building DB Map (Date -> Show ID)...") shows = session.exec(select(Show.id, Show.date)).all() date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows} print(f" Mapped {len(date_to_db_id)} existing shows in DB") if not date_to_db_id: print(" CRITICAL: No shows in database!") return del shows gc.collect() # 2. Build API Map: ElGoose ID -> DB ID print("\n2. Building ElGoose ID -> DB ID map (Streaming)...") elgoose_id_to_db_id = {} page = 1 seen_show_ids = set() while True: print(f" Fetching shows page {page}...", end="\r", flush=True) data = fetch_json("shows", {"page": page}) if not data: break # Loop Detection (Shows) first_id = data[0].get('show_id') if data else None if first_id and first_id in seen_show_ids: print(f"\n Loop detected in Shows at page {page} (ID {first_id}). Breaking.") break if first_id: seen_show_ids.add(first_id) for s in data: s_date = s.get('showdate') s_id = s.get('show_id') if s_date and s_id: db_id = date_to_db_id.get(s_date) if db_id: elgoose_id_to_db_id[s_id] = db_id page += 1 if page % 10 == 0: gc.collect() print(f"\n Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs") del date_to_db_id gc.collect() # 3. Caching Songs print("\n3. Caching Songs...") songs = session.exec(select(Song.id, Song.title)).all() song_map = {s.title.lower().strip(): s.id for s in songs} del songs gc.collect() print(f" Cached {len(song_map)} songs") # 4. Importing Setlists print("\n4. Importing Setlists...") page = 1 total_added = 0 seen_batch_signatures = set() # Cache existing performance keys (show_id, song_id, position) print(" Caching existing performance keys...") perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all() existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs) print(f" Cached {len(existing_keys)} existing performances") del perfs gc.collect() while True: data = fetch_json("setlists", {"page": page}) if not data: break # Loop Detection (Setlists) # Use signature of first item: (uniqueid or show_id+position) if data: first = data[0] signature = f"{first.get('uniqueid')}-{first.get('show_id')}-{first.get('position')}" if signature in seen_batch_signatures: print(f"\n Loop detected in Setlists at page {page} (Sig {signature}). Breaking.") break seen_batch_signatures.add(signature) batch_added = 0 new_objects = [] for perf in data: elgoose_show_id = perf.get('show_id') db_show_id = elgoose_id_to_db_id.get(elgoose_show_id) if not db_show_id: continue song_name = perf.get('songname', '').strip() song_id = song_map.get(song_name.lower()) if not song_id: continue position = perf.get('position', 0) if (db_show_id, song_id, position) in existing_keys: continue set_val = str(perf.get('setnumber', '1')) if set_val.isdigit(): set_name = f"Set {set_val}" elif set_val.lower() == 'e': set_name = "Encore" elif set_val.lower() == 'e2': set_name = "Encore 2" elif set_val.lower() == 's': set_name = "Soundcheck" else: set_name = f"Set {set_val}" new_perf = Performance( show_id=db_show_id, song_id=song_id, position=position, set_name=set_name, segue=bool(perf.get('segue', 0)), notes=perf.get('footnote'), slug=f"{generate_slug(song_name)}-{db_show_id}-{position}" ) new_objects.append(new_perf) existing_keys.add((db_show_id, song_id, position)) batch_added += 1 total_added += 1 if new_objects: session.add_all(new_objects) session.commit() print(f" Page {page}: Added {batch_added} (Total {total_added})", end="\r", flush=True) page += 1 if page % 20 == 0: gc.collect() print(f"\nImport Complete! Total Added: {total_added}") if __name__ == "__main__": main()