191 lines
6.8 KiB
Python
191 lines
6.8 KiB
Python
|
|
"""
|
|
Smart Setlist Importer (Streaming Version)
|
|
Reducing memory usage by processing data in streams instead of bulk loading.
|
|
"""
|
|
import requests
|
|
import time
|
|
import gc
|
|
from datetime import datetime
|
|
from sqlmodel import Session, select
|
|
from database import engine
|
|
from models import Show, Song, Performance
|
|
from slugify import generate_slug
|
|
|
|
BASE_URL = "https://elgoose.net/api/v2"
|
|
|
|
def fetch_json(endpoint, params=None):
|
|
"""Fetch JSON from El Goose API with retries"""
|
|
url = f"{BASE_URL}/{endpoint}.json"
|
|
for attempt in range(3):
|
|
try:
|
|
response = requests.get(url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
if data.get('error') == 1:
|
|
return None
|
|
return data.get('data', [])
|
|
except Exception as e:
|
|
print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}")
|
|
time.sleep(2)
|
|
return None
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("SMART SETLIST IMPORTER (STREAMING)")
|
|
print("=" * 60)
|
|
|
|
with Session(engine) as session:
|
|
# 1. Build DB Map: Date string -> DB Show ID
|
|
print("\n1. Building DB Map (Date -> Show ID)...")
|
|
shows = session.exec(select(Show.id, Show.date)).all() # Only fetch needed fields
|
|
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
|
|
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
|
|
|
|
if not date_to_db_id:
|
|
print(" CRITICAL: No shows in database!")
|
|
return
|
|
|
|
del shows
|
|
gc.collect()
|
|
|
|
# 2. Build API Map: ElGoose ID -> DB ID
|
|
# Process iteratively to save memory
|
|
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
|
|
elgoose_id_to_db_id = {}
|
|
matched_count = 0
|
|
|
|
page = 1
|
|
seen_ids_in_run = set()
|
|
|
|
while True:
|
|
# Fetch batch of shows
|
|
print(f" Fetching shows page {page}...", end="\r", flush=True)
|
|
data = fetch_json("shows", {"page": page}) # Fetch all shows (artist filter can be flaky)
|
|
if not data:
|
|
break
|
|
|
|
# Check for API loop (if Page X returns same content as Page 1)
|
|
first_id_in_batch = data[0].get('show_id') if data else None
|
|
if first_id_in_batch and first_id_in_batch in seen_ids_in_run:
|
|
print(f"\n Loop detected at page {page} (ID {first_id_in_batch} seen before). Breaking.")
|
|
break
|
|
|
|
for s in data:
|
|
# We only need Goose shows (artist_id=3 usually, but we check date match)
|
|
s_date = s.get('showdate')
|
|
s_id = s.get('show_id')
|
|
|
|
if s_id:
|
|
seen_ids_in_run.add(s_id)
|
|
|
|
if s_date and s_id:
|
|
db_id = date_to_db_id.get(s_date)
|
|
if db_id:
|
|
elgoose_id_to_db_id[s_id] = db_id
|
|
matched_count += 1
|
|
|
|
page += 1
|
|
if page % 10 == 0:
|
|
gc.collect()
|
|
|
|
print(f"\n Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs")
|
|
del date_to_db_id
|
|
gc.collect()
|
|
|
|
# 3. Cache Songs
|
|
print("\n3. Caching Songs...")
|
|
songs = session.exec(select(Song.id, Song.title)).all()
|
|
song_map = {s.title.lower().strip(): s.id for s in songs}
|
|
del songs
|
|
gc.collect()
|
|
print(f" Cached {len(song_map)} songs")
|
|
|
|
# 4. Process Setlists
|
|
print("\n4. Importing Setlists...")
|
|
page = 1
|
|
total_added = 0
|
|
|
|
while True:
|
|
data = fetch_json("setlists", {"page": page})
|
|
if not data:
|
|
break
|
|
|
|
# Prefetch checks for this batch to avoid N+1 SELECTs?
|
|
# Actually with 3600 perfs, one-by-one check is slow.
|
|
# But "existing check" is needed.
|
|
# We can cache *existing performances* for the CURRENT batch's shows?
|
|
# Or just cache ALL existing performance keys (show_id, song_id, position)?
|
|
# Performance table might be large (40k rows?).
|
|
# (show_id, song_id, position) tuples set is ~2MB RAM. Safe.
|
|
|
|
if page == 1:
|
|
print(" Caching existing performance keys...")
|
|
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
|
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
|
print(f" Cached {len(existing_keys)} performance keys")
|
|
del perfs
|
|
gc.collect()
|
|
|
|
batch_added = 0
|
|
new_objects = []
|
|
|
|
for perf in data:
|
|
elgoose_show_id = perf.get('show_id')
|
|
db_show_id = elgoose_id_to_db_id.get(elgoose_show_id)
|
|
if not db_show_id:
|
|
continue
|
|
|
|
song_name = perf.get('songname', '').strip()
|
|
song_id = song_map.get(song_name.lower())
|
|
if not song_id:
|
|
continue
|
|
|
|
position = perf.get('position', 0)
|
|
|
|
# Check uniqueness
|
|
if (db_show_id, song_id, position) in existing_keys:
|
|
continue
|
|
|
|
# Create
|
|
set_val = str(perf.get('setnumber', '1'))
|
|
if set_val.isdigit():
|
|
set_name = f"Set {set_val}"
|
|
elif set_val.lower() == 'e':
|
|
set_name = "Encore"
|
|
elif set_val.lower() == 'e2':
|
|
set_name = "Encore 2"
|
|
elif set_val.lower() == 's':
|
|
set_name = "Soundcheck"
|
|
else:
|
|
set_name = f"Set {set_val}"
|
|
|
|
|
|
new_perf = Performance(
|
|
show_id=db_show_id,
|
|
song_id=song_id,
|
|
position=position,
|
|
set_name=set_name,
|
|
segue=bool(perf.get('segue', 0)),
|
|
notes=perf.get('footnote'),
|
|
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
|
|
)
|
|
new_objects.append(new_perf)
|
|
existing_keys.add((db_show_id, song_id, position)) # Add to cache
|
|
batch_added += 1
|
|
total_added += 1
|
|
|
|
if new_objects:
|
|
session.add_all(new_objects)
|
|
session.commit()
|
|
|
|
print(f" Page {page}: Added {batch_added} (Total {total_added})", end="\r", flush=True)
|
|
page += 1
|
|
|
|
if page % 20 == 0:
|
|
gc.collect()
|
|
|
|
print(f"\nImport Complete! Total Added: {total_added}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|