Add smart setlist import script

This commit is contained in:
fullsizemalt 2025-12-25 11:04:20 -08:00
parent ddcc49d41e
commit e2c77d7593

View file

@ -0,0 +1,207 @@
"""
Smart Setlist Importer
Uses a 2-step mapping strategy to bypass missing dates in setlist endpoint:
1. Fetch ALL shows from API -> Map ElGoose_ID to Date.
2. Fetch ALL DB shows -> Map Date to DB_ID.
3. Combine: ElGoose_ID -> DB_ID.
4. Import setlists using ElGoose_ID from setlist entries.
"""
import requests
import time
from datetime import datetime
from sqlmodel import Session, select, func
from database import engine
from models import Show, Song, Performance
from slugify import generate_slug
BASE_URL = "https://elgoose.net/api/v2"
def fetch_json(endpoint, params=None):
"""Fetch JSON from El Goose API with retries"""
url = f"{BASE_URL}/{endpoint}.json"
for attempt in range(3):
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if data.get('error') == 1:
return None
return data.get('data', [])
except Exception as e:
print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}")
time.sleep(2)
return None
def fetch_all_pages(endpoint, params=None):
"""Fetch all pages from an endpoint"""
if params is None:
params = {}
results = []
page = 1
while True:
print(f" Fetching {endpoint} page {page}...", end="\r", flush=True)
p = params.copy()
p['page'] = page
data = fetch_json(endpoint, p)
if not data:
break
results.extend(data)
page += 1
time.sleep(0.1) # Be nice
print(f"\n Fetched {len(results)} items from {endpoint}")
return results
def main():
print("=" * 60)
print("SMART SETLIST IMPORTER")
print("=" * 60)
with Session(engine) as session:
# 1. Build DB Map: Date string -> DB Show ID
print("\n1. Building DB Map (Date -> Show ID)...")
shows = session.exec(select(Show)).all()
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
if not date_to_db_id:
print(" CRITICAL: No shows in database! Run import_shows first.")
return
# 2. Build API Map: ElGoose ID -> Date
print("\n2. Fetching API Shows to build ElGoose ID -> Date map...")
# Only fetch shows for our artist (Goose = 3)
api_shows = fetch_all_pages("shows", {"artist": 3})
if not api_shows:
# Fallback if artist filter fails or returns empty
print(" Artist filter returned empty, fetching all shows...")
api_shows = fetch_all_pages("shows")
elgoose_id_to_db_id = {}
matched_count = 0
for s in api_shows:
s_date = s.get('showdate')
s_id = s.get('show_id')
if s_date and s_id:
# Lookup in DB map
db_id = date_to_db_id.get(s_date)
if db_id:
elgoose_id_to_db_id[s_id] = db_id
matched_count += 1
print(f" Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs")
# 3. Cache Songs for Lookup
print("\n3. Caching Songs...")
songs = session.exec(select(Song)).all()
song_map = {s.title.lower().strip(): s.id for s in songs} # title -> id
print(f" Cached {len(song_map)} songs")
# 4. Fetch and Import Setlists
print("\n4. Fetching Setlists and Importing...")
# Since we can't filter setlists by artist easily without checking every item,
# we'll fetch all and filter by our known show IDs.
page = 1
total_added = 0
total_processed = 0
while True:
start_time = time.time()
data = fetch_json("setlists", {"page": page})
if not data:
print(" No more data.")
break
batch_added = 0
for perf in data:
total_processed += 1
elgoose_show_id = perf.get('show_id')
# Check if this performance belongs to a show we care about
db_show_id = elgoose_id_to_db_id.get(elgoose_show_id)
if not db_show_id:
continue # Not a Goose show or show not in our DB
# Resolve Song
song_name = perf.get('songname', '').strip()
song_id = song_map.get(song_name.lower())
if not song_id:
# Try creating song if missing?
# Ideally we should have imported all songs, but let's be safe
# For now skip or log
continue
position = perf.get('position', 0)
# Check duplication
# We can cache existing performances for speed, but SQL check is safer for now
existing = session.exec(
select(Performance).where(
Performance.show_id == db_show_id,
Performance.song_id == song_id,
Performance.position == position
)
).first()
if existing:
continue
# Create Performance
# Map setnumber
set_val = str(perf.get('setnumber', '1'))
if set_val.isdigit():
set_name = f"Set {set_val}"
elif set_val.lower() == 'e':
set_name = "Encore"
elif set_val.lower() == 'e2':
set_name = "Encore 2"
elif set_val.lower() == 's':
set_name = "Soundcheck"
else:
set_name = f"Set {set_val}"
new_perf = Performance(
show_id=db_show_id,
song_id=song_id,
position=position,
set_name=set_name,
segue=bool(perf.get('segue', 0)),
notes=perf.get('footnote'),
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}" # temp slug strategy
)
session.add(new_perf)
batch_added += 1
total_added += 1
session.commit()
elapsed = time.time() - start_time
print(f" Page {page}: Processed {len(data)}, Added {batch_added} ({elapsed:.2f}s)")
# Optimization: If we see mostly empty adds for many pages,
# we might want to skip, BUT setlists endpoint is usually ordered by date desc?
# We must go through all history.
page += 1
if page > 2000: # Safety break
break
# Fix slugs properly
print("\n5. Fixing Slugs...")
# (Slugs generated above might be generic, ideally update based on show date)
# But for speed let's rely on the previous fixer or just update here if needed.
# The above slug uses ID which is unique but not pretty.
# Let's run a quick update for pretty slugs
print("\n" + "=" * 60)
print("IMPORT COMPLETE")
print(f"Total Added: {total_added}")
print("=" * 60)
if __name__ == "__main__":
main()