fediversion/backend/import_setlists_smart.py
fullsizemalt b4cddf41ea feat: Initialize Fediversion multi-band platform
- Fork elmeg-demo codebase for multi-band support
- Add data importer infrastructure with base class
- Create band-specific importers:
  - phish.py: Phish.net API v5
  - grateful_dead.py: Grateful Stats API
  - setlistfm.py: Dead & Company, Billy Strings (Setlist.fm)
- Add spec-kit configuration for Gemini
- Update README with supported bands and architecture
2025-12-28 12:39:28 -08:00

186 lines
6.4 KiB
Python

"""
Smart Setlist Importer (Streaming Version)
Reducing memory usage by processing data in streams instead of bulk loading.
"""
import requests
import time
import gc
from datetime import datetime
from sqlmodel import Session, select
from database import engine
from models import Show, Song, Performance
from slugify import generate_slug
BASE_URL = "https://elgoose.net/api/v2"
def fetch_json(endpoint, params=None):
"""Fetch JSON from El Goose API with retries"""
url = f"{BASE_URL}/{endpoint}.json"
for attempt in range(3):
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if data.get('error') == 1:
return None
return data.get('data', [])
except Exception as e:
print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}")
time.sleep(2)
return None
def main():
print("=" * 60)
print("SMART SETLIST IMPORTER (STREAMING)")
print("=" * 60)
with Session(engine) as session:
# 1. Build DB Map: Date string -> DB Show ID
print("\n1. Building DB Map (Date -> Show ID)...")
shows = session.exec(select(Show.id, Show.date)).all()
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
if not date_to_db_id:
print(" CRITICAL: No shows in database!")
return
del shows
gc.collect()
# 2. Build API Map: ElGoose ID -> DB ID
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
elgoose_id_to_db_id = {}
page = 1
seen_show_ids = set()
while True:
print(f" Fetching shows page {page}...", end="\r", flush=True)
data = fetch_json("shows", {"page": page})
if not data:
break
# Loop Detection (Shows)
first_id = data[0].get('show_id') if data else None
if first_id and first_id in seen_show_ids:
print(f"\n Loop detected in Shows at page {page} (ID {first_id}). Breaking.")
break
if first_id:
seen_show_ids.add(first_id)
for s in data:
s_date = s.get('showdate')
s_id = s.get('show_id')
if s_date and s_id:
db_id = date_to_db_id.get(s_date)
if db_id:
elgoose_id_to_db_id[s_id] = db_id
page += 1
if page % 10 == 0:
gc.collect()
print(f"\n Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs")
del date_to_db_id
gc.collect()
# 3. Caching Songs
print("\n3. Caching Songs...")
songs = session.exec(select(Song.id, Song.title)).all()
song_map = {s.title.lower().strip(): s.id for s in songs}
del songs
gc.collect()
print(f" Cached {len(song_map)} songs")
# 4. Importing Setlists
print("\n4. Importing Setlists...")
page = 1
total_added = 0
seen_batch_signatures = set()
# Cache existing performance keys (show_id, song_id, position)
print(" Caching existing performance keys...")
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
print(f" Cached {len(existing_keys)} existing performances")
del perfs
gc.collect()
while True:
data = fetch_json("setlists", {"page": page})
if not data:
break
# Loop Detection (Setlists)
# Use signature of first item: (uniqueid or show_id+position)
if data:
first = data[0]
signature = f"{first.get('uniqueid')}-{first.get('show_id')}-{first.get('position')}"
if signature in seen_batch_signatures:
print(f"\n Loop detected in Setlists at page {page} (Sig {signature}). Breaking.")
break
seen_batch_signatures.add(signature)
batch_added = 0
new_objects = []
for perf in data:
elgoose_show_id = perf.get('show_id')
db_show_id = elgoose_id_to_db_id.get(elgoose_show_id)
if not db_show_id:
continue
song_name = perf.get('songname', '').strip()
song_id = song_map.get(song_name.lower())
if not song_id:
continue
position = perf.get('position', 0)
if (db_show_id, song_id, position) in existing_keys:
continue
set_val = str(perf.get('setnumber', '1'))
if set_val.isdigit():
set_name = f"Set {set_val}"
elif set_val.lower() == 'e':
set_name = "Encore"
elif set_val.lower() == 'e2':
set_name = "Encore 2"
elif set_val.lower() == 's':
set_name = "Soundcheck"
else:
set_name = f"Set {set_val}"
new_perf = Performance(
show_id=db_show_id,
song_id=song_id,
position=position,
set_name=set_name,
segue=bool(perf.get('segue', 0)),
notes=perf.get('footnote'),
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
)
new_objects.append(new_perf)
existing_keys.add((db_show_id, song_id, position))
batch_added += 1
total_added += 1
if new_objects:
session.add_all(new_objects)
session.commit()
print(f" Page {page}: Added {batch_added} (Total {total_added})", end="\r", flush=True)
page += 1
if page % 20 == 0:
gc.collect()
print(f"\nImport Complete! Total Added: {total_added}")
if __name__ == "__main__":
main()