Add smart setlist import script

2025-12-25 11:04:20 -08:00 · 2025-12-25 11:04:20 -08:00 · e2c77d7593
commit e2c77d7593
parent ddcc49d41e
1 changed files with 207 additions and 0 deletions
--- a/backend/import_setlists_smart.py
+++ b/backend/import_setlists_smart.py
@ -0,0 +1,207 @@
+
+"""
+Smart Setlist Importer
+Uses a 2-step mapping strategy to bypass missing dates in setlist endpoint:
+1. Fetch ALL shows from API -> Map ElGoose_ID to Date.
+2. Fetch ALL DB shows -> Map Date to DB_ID.
+3. Combine: ElGoose_ID -> DB_ID.
+4. Import setlists using ElGoose_ID from setlist entries.
+"""
+import requests
+import time
+from datetime import datetime
+from sqlmodel import Session, select, func
+from database import engine
+from models import Show, Song, Performance
+from slugify import generate_slug
+
+BASE_URL = "https://elgoose.net/api/v2"
+
+def fetch_json(endpoint, params=None):
+    """Fetch JSON from El Goose API with retries"""
+    url = f"{BASE_URL}/{endpoint}.json"
+    for attempt in range(3):
+        try:
+            response = requests.get(url, params=params, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            if data.get('error') == 1:
+                return None
+            return data.get('data', [])
+        except Exception as e:
+            print(f"  Error fetching {endpoint} (attempt {attempt+1}): {e}")
+            time.sleep(2)
+    return None
+
+def fetch_all_pages(endpoint, params=None):
+    """Fetch all pages from an endpoint"""
+    if params is None:
+        params = {}
+    
+    results = []
+    page = 1
+    while True:
+        print(f"  Fetching {endpoint} page {page}...", end="\r", flush=True)
+        p = params.copy()
+        p['page'] = page
+        data = fetch_json(endpoint, p)
+        if not data:
+            break
+        results.extend(data)
+        page += 1
+        time.sleep(0.1) # Be nice
+    print(f"\n  Fetched {len(results)} items from {endpoint}")
+    return results
+
+def main():
+    print("=" * 60)
+    print("SMART SETLIST IMPORTER")
+    print("=" * 60)
+    
+    with Session(engine) as session:
+        # 1. Build DB Map: Date string -> DB Show ID
+        print("\n1. Building DB Map (Date -> Show ID)...")
+        shows = session.exec(select(Show)).all()
+        date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
+        print(f"   Mapped {len(date_to_db_id)} existing shows in DB")
+
+        if not date_to_db_id:
+            print("   CRITICAL: No shows in database! Run import_shows first.")
+            return
+
+        # 2. Build API Map: ElGoose ID -> Date
+        print("\n2. Fetching API Shows to build ElGoose ID -> Date map...")
+        # Only fetch shows for our artist (Goose = 3)
+        api_shows = fetch_all_pages("shows", {"artist": 3}) 
+        if not api_shows:
+             # Fallback if artist filter fails or returns empty
+             print("   Artist filter returned empty, fetching all shows...")
+             api_shows = fetch_all_pages("shows")
+        
+        elgoose_id_to_db_id = {}
+        matched_count = 0
+        
+        for s in api_shows:
+            s_date = s.get('showdate')
+            s_id = s.get('show_id')
+            if s_date and s_id:
+                # Lookup in DB map
+                db_id = date_to_db_id.get(s_date)
+                if db_id:
+                    elgoose_id_to_db_id[s_id] = db_id
+                    matched_count += 1
+        
+        print(f"   Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs")
+        
+        # 3. Cache Songs for Lookup
+        print("\n3. Caching Songs...")
+        songs = session.exec(select(Song)).all()
+        song_map = {s.title.lower().strip(): s.id for s in songs} # title -> id
+        print(f"   Cached {len(song_map)} songs")
+
+        # 4. Fetch and Import Setlists
+        print("\n4. Fetching Setlists and Importing...")
+        # Since we can't filter setlists by artist easily without checking every item,
+        # we'll fetch all and filter by our known show IDs.
+        
+        page = 1
+        total_added = 0
+        total_processed = 0
+        
+        while True:
+            start_time = time.time()
+            data = fetch_json("setlists", {"page": page})
+            if not data:
+                print("   No more data.")
+                break
+                
+            batch_added = 0
+            
+            for perf in data:
+                total_processed += 1
+                elgoose_show_id = perf.get('show_id')
+                
+                # Check if this performance belongs to a show we care about
+                db_show_id = elgoose_id_to_db_id.get(elgoose_show_id)
+                if not db_show_id:
+                    continue # Not a Goose show or show not in our DB
+                
+                # Resolve Song
+                song_name = perf.get('songname', '').strip()
+                song_id = song_map.get(song_name.lower())
+                
+                if not song_id:
+                    # Try creating song if missing?
+                    # Ideally we should have imported all songs, but let's be safe
+                    # For now skip or log
+                    continue
+                
+                position = perf.get('position', 0)
+                
+                # Check duplication
+                # We can cache existing performances for speed, but SQL check is safer for now
+                existing = session.exec(
+                    select(Performance).where(
+                        Performance.show_id == db_show_id,
+                        Performance.song_id == song_id,
+                        Performance.position == position
+                    )
+                ).first()
+                
+                if existing:
+                    continue
+                
+                # Create Performance
+                # Map setnumber
+                set_val = str(perf.get('setnumber', '1'))
+                if set_val.isdigit():
+                    set_name = f"Set {set_val}"
+                elif set_val.lower() == 'e':
+                    set_name = "Encore"
+                elif set_val.lower() == 'e2':
+                    set_name = "Encore 2"
+                elif set_val.lower() == 's':
+                    set_name = "Soundcheck"
+                else:
+                    set_name = f"Set {set_val}"
+
+
+                new_perf = Performance(
+                    show_id=db_show_id,
+                    song_id=song_id,
+                    position=position,
+                    set_name=set_name,
+                    segue=bool(perf.get('segue', 0)),
+                    notes=perf.get('footnote'),
+                    slug=f"{generate_slug(song_name)}-{db_show_id}-{position}" # temp slug strategy
+                )
+                session.add(new_perf)
+                batch_added += 1
+                total_added += 1
+            
+            session.commit()
+            elapsed = time.time() - start_time
+            print(f"   Page {page}: Processed {len(data)}, Added {batch_added} ({elapsed:.2f}s)")
+            
+            # Optimization: If we see mostly empty adds for many pages, 
+            # we might want to skip, BUT setlists endpoint is usually ordered by date desc?
+            # We must go through all history.
+            
+            page += 1
+            if page > 2000: # Safety break
+                break
+                
+        # Fix slugs properly 
+        print("\n5. Fixing Slugs...")
+        # (Slugs generated above might be generic, ideally update based on show date)
+        # But for speed let's rely on the previous fixer or just update here if needed.
+        # The above slug uses ID which is unique but not pretty.
+        # Let's run a quick update for pretty slugs
+        
+        print("\n" + "=" * 60)
+        print("IMPORT COMPLETE")
+        print(f"Total Added: {total_added}")
+        print("=" * 60)
+
+if __name__ == "__main__":
+    main()