186 lines
6.4 KiB
Python
186 lines
6.4 KiB
Python
|
|
"""
|
|
Smart Setlist Importer (Streaming Version)
|
|
Reducing memory usage by processing data in streams instead of bulk loading.
|
|
"""
|
|
import requests
|
|
import time
|
|
import gc
|
|
from datetime import datetime
|
|
from sqlmodel import Session, select
|
|
from database import engine
|
|
from models import Show, Song, Performance
|
|
from slugify import generate_slug
|
|
|
|
BASE_URL = "https://elgoose.net/api/v2"
|
|
|
|
def fetch_json(endpoint, params=None):
|
|
"""Fetch JSON from El Goose API with retries"""
|
|
url = f"{BASE_URL}/{endpoint}.json"
|
|
for attempt in range(3):
|
|
try:
|
|
response = requests.get(url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
if data.get('error') == 1:
|
|
return None
|
|
return data.get('data', [])
|
|
except Exception as e:
|
|
print(f" Error fetching {endpoint} (attempt {attempt+1}): {e}")
|
|
time.sleep(2)
|
|
return None
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("SMART SETLIST IMPORTER (STREAMING)")
|
|
print("=" * 60)
|
|
|
|
with Session(engine) as session:
|
|
# 1. Build DB Map: Date string -> DB Show ID
|
|
print("\n1. Building DB Map (Date -> Show ID)...")
|
|
shows = session.exec(select(Show.id, Show.date)).all()
|
|
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
|
|
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
|
|
|
|
if not date_to_db_id:
|
|
print(" CRITICAL: No shows in database!")
|
|
return
|
|
|
|
del shows
|
|
gc.collect()
|
|
|
|
# 2. Build API Map: ElGoose ID -> DB ID
|
|
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
|
|
elgoose_id_to_db_id = {}
|
|
|
|
page = 1
|
|
seen_show_ids = set()
|
|
|
|
while True:
|
|
print(f" Fetching shows page {page}...", end="\r", flush=True)
|
|
data = fetch_json("shows", {"page": page})
|
|
if not data:
|
|
break
|
|
|
|
# Loop Detection (Shows)
|
|
first_id = data[0].get('show_id') if data else None
|
|
if first_id and first_id in seen_show_ids:
|
|
print(f"\n Loop detected in Shows at page {page} (ID {first_id}). Breaking.")
|
|
break
|
|
if first_id:
|
|
seen_show_ids.add(first_id)
|
|
|
|
for s in data:
|
|
s_date = s.get('showdate')
|
|
s_id = s.get('show_id')
|
|
|
|
if s_date and s_id:
|
|
db_id = date_to_db_id.get(s_date)
|
|
if db_id:
|
|
elgoose_id_to_db_id[s_id] = db_id
|
|
|
|
page += 1
|
|
if page % 10 == 0:
|
|
gc.collect()
|
|
|
|
print(f"\n Mapped {len(elgoose_id_to_db_id)} ElGoose IDs to DB IDs")
|
|
del date_to_db_id
|
|
gc.collect()
|
|
|
|
# 3. Caching Songs
|
|
print("\n3. Caching Songs...")
|
|
songs = session.exec(select(Song.id, Song.title)).all()
|
|
song_map = {s.title.lower().strip(): s.id for s in songs}
|
|
del songs
|
|
gc.collect()
|
|
print(f" Cached {len(song_map)} songs")
|
|
|
|
# 4. Importing Setlists
|
|
print("\n4. Importing Setlists...")
|
|
page = 1
|
|
total_added = 0
|
|
seen_batch_signatures = set()
|
|
|
|
# Cache existing performance keys (show_id, song_id, position)
|
|
print(" Caching existing performance keys...")
|
|
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
|
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
|
print(f" Cached {len(existing_keys)} existing performances")
|
|
del perfs
|
|
gc.collect()
|
|
|
|
while True:
|
|
data = fetch_json("setlists", {"page": page})
|
|
if not data:
|
|
break
|
|
|
|
# Loop Detection (Setlists)
|
|
# Use signature of first item: (uniqueid or show_id+position)
|
|
if data:
|
|
first = data[0]
|
|
signature = f"{first.get('uniqueid')}-{first.get('show_id')}-{first.get('position')}"
|
|
if signature in seen_batch_signatures:
|
|
print(f"\n Loop detected in Setlists at page {page} (Sig {signature}). Breaking.")
|
|
break
|
|
seen_batch_signatures.add(signature)
|
|
|
|
batch_added = 0
|
|
new_objects = []
|
|
|
|
for perf in data:
|
|
elgoose_show_id = perf.get('show_id')
|
|
db_show_id = elgoose_id_to_db_id.get(elgoose_show_id)
|
|
if not db_show_id:
|
|
continue
|
|
|
|
song_name = perf.get('songname', '').strip()
|
|
song_id = song_map.get(song_name.lower())
|
|
if not song_id:
|
|
continue
|
|
|
|
position = perf.get('position', 0)
|
|
|
|
if (db_show_id, song_id, position) in existing_keys:
|
|
continue
|
|
|
|
set_val = str(perf.get('setnumber', '1'))
|
|
if set_val.isdigit():
|
|
set_name = f"Set {set_val}"
|
|
elif set_val.lower() == 'e':
|
|
set_name = "Encore"
|
|
elif set_val.lower() == 'e2':
|
|
set_name = "Encore 2"
|
|
elif set_val.lower() == 's':
|
|
set_name = "Soundcheck"
|
|
else:
|
|
set_name = f"Set {set_val}"
|
|
|
|
|
|
new_perf = Performance(
|
|
show_id=db_show_id,
|
|
song_id=song_id,
|
|
position=position,
|
|
set_name=set_name,
|
|
segue=bool(perf.get('segue', 0)),
|
|
notes=perf.get('footnote'),
|
|
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
|
|
)
|
|
new_objects.append(new_perf)
|
|
existing_keys.add((db_show_id, song_id, position))
|
|
batch_added += 1
|
|
total_added += 1
|
|
|
|
if new_objects:
|
|
session.add_all(new_objects)
|
|
session.commit()
|
|
|
|
print(f" Page {page}: Added {batch_added} (Total {total_added})", end="\r", flush=True)
|
|
page += 1
|
|
|
|
if page % 20 == 0:
|
|
gc.collect()
|
|
|
|
print(f"\nImport Complete! Total Added: {total_added}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|