- Fork elmeg-demo codebase for multi-band support - Add data importer infrastructure with base class - Create band-specific importers: - phish.py: Phish.net API v5 - grateful_dead.py: Grateful Stats API - setlistfm.py: Dead & Company, Billy Strings (Setlist.fm) - Add spec-kit configuration for Gemini - Update README with supported bands and architecture
288 lines
11 KiB
Python
288 lines
11 KiB
Python
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from sqlmodel import Session, select
|
|
from database import engine
|
|
from models import Venue, Song, Show, Tour, Performance
|
|
from slugify import generate_slug, generate_show_slug
|
|
import requests
|
|
import time
|
|
|
|
BASE_URL = "https://elgoose.net/api/v2"
|
|
|
|
def fetch_all_json(endpoint, params=None):
|
|
all_data = []
|
|
page = 1
|
|
params = params.copy() if params else {}
|
|
print(f"Fetching {endpoint}...")
|
|
|
|
seen_ids = set()
|
|
|
|
while True:
|
|
params['page'] = page
|
|
url = f"{BASE_URL}/{endpoint}.json"
|
|
try:
|
|
resp = requests.get(url, params=params)
|
|
if resp.status_code != 200:
|
|
print(f" Failed with status {resp.status_code}")
|
|
break
|
|
|
|
# API can return a dict with 'data' or just a list sometimes, handling both
|
|
json_resp = resp.json()
|
|
if isinstance(json_resp, dict):
|
|
items = json_resp.get('data', [])
|
|
elif isinstance(json_resp, list):
|
|
items = json_resp
|
|
else:
|
|
items = []
|
|
|
|
if not items:
|
|
print(" No more items found.")
|
|
break
|
|
|
|
# Check for cycles / infinite loop by checking if we've seen these IDs before
|
|
# Assuming items have 'id' or 'show_id' etc.
|
|
# If not, we hash the string representation.
|
|
new_items_count = 0
|
|
for item in items:
|
|
# Try to find a unique identifier
|
|
uid = item.get('id') or item.get('show_id') or str(item)
|
|
if uid not in seen_ids:
|
|
seen_ids.add(uid)
|
|
all_data.append(item)
|
|
new_items_count += 1
|
|
|
|
if new_items_count == 0:
|
|
print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.")
|
|
break
|
|
|
|
print(f" Page {page} done ({new_items_count} new items)")
|
|
page += 1
|
|
time.sleep(0.5)
|
|
|
|
# Safety break
|
|
if page > 1000:
|
|
print(" Hit 1000 pages safety limit.")
|
|
break
|
|
if page > 200: # Safety break
|
|
print(" Safety limit reached.")
|
|
break
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching {endpoint}: {e}")
|
|
break
|
|
|
|
return all_data
|
|
|
|
def fix_data():
|
|
with Session(engine) as session:
|
|
# 1. Fix Venues Slugs
|
|
print("Fixing Venue Slugs...")
|
|
venues = session.exec(select(Venue)).all()
|
|
existing_venue_slugs = {v.slug for v in venues if v.slug}
|
|
for v in venues:
|
|
if not v.slug:
|
|
new_slug = generate_slug(v.name)
|
|
# Ensure unique
|
|
original_slug = new_slug
|
|
counter = 1
|
|
while new_slug in existing_venue_slugs:
|
|
counter += 1
|
|
new_slug = f"{original_slug}-{counter}"
|
|
v.slug = new_slug
|
|
existing_venue_slugs.add(new_slug)
|
|
session.add(v)
|
|
session.commit()
|
|
|
|
# 2. Fix Songs Slugs
|
|
print("Fixing Song Slugs...")
|
|
songs = session.exec(select(Song)).all()
|
|
existing_song_slugs = {s.slug for s in songs if s.slug}
|
|
for s in songs:
|
|
if not s.slug:
|
|
new_slug = generate_slug(s.title)
|
|
original_slug = new_slug
|
|
counter = 1
|
|
while new_slug in existing_song_slugs:
|
|
counter += 1
|
|
new_slug = f"{original_slug}-{counter}"
|
|
s.slug = new_slug
|
|
existing_song_slugs.add(new_slug)
|
|
session.add(s)
|
|
session.commit()
|
|
|
|
# 3. Fix Tours Slugs
|
|
print("Fixing Tour Slugs...")
|
|
tours = session.exec(select(Tour)).all()
|
|
existing_tour_slugs = {t.slug for t in tours if t.slug}
|
|
for t in tours:
|
|
if not t.slug:
|
|
new_slug = generate_slug(t.name)
|
|
original_slug = new_slug
|
|
counter = 1
|
|
while new_slug in existing_tour_slugs:
|
|
counter += 1
|
|
new_slug = f"{original_slug}-{counter}"
|
|
t.slug = new_slug
|
|
existing_tour_slugs.add(new_slug)
|
|
session.add(t)
|
|
session.commit()
|
|
|
|
# 4. Fix Shows Slugs
|
|
print("Fixing Show Slugs...")
|
|
shows = session.exec(select(Show)).all()
|
|
existing_show_slugs = {s.slug for s in shows if s.slug}
|
|
venue_map = {v.id: v for v in venues} # Cache venues for naming
|
|
|
|
for show in shows:
|
|
if not show.slug:
|
|
date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown"
|
|
venue_name = "unknown"
|
|
if show.venue_id and show.venue_id in venue_map:
|
|
venue_name = venue_map[show.venue_id].name
|
|
|
|
new_slug = generate_show_slug(date_str, venue_name)
|
|
# Ensure unique
|
|
original_slug = new_slug
|
|
counter = 1
|
|
while new_slug in existing_show_slugs:
|
|
counter += 1
|
|
new_slug = f"{original_slug}-{counter}"
|
|
|
|
show.slug = new_slug
|
|
existing_show_slugs.add(new_slug)
|
|
session.add(show)
|
|
session.commit()
|
|
|
|
# 4b. Fix Performance Slugs
|
|
print("Fixing Performance Slugs...")
|
|
from slugify import generate_performance_slug
|
|
perfs = session.exec(select(Performance)).all()
|
|
existing_perf_slugs = {p.slug for p in perfs if p.slug}
|
|
|
|
# We need song titles and show dates
|
|
# Efficient way: build maps
|
|
song_map = {s.id: s.title for s in songs}
|
|
show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows}
|
|
|
|
for p in perfs:
|
|
if not p.slug:
|
|
song_title = song_map.get(p.song_id, "unknown")
|
|
show_date = show_map.get(p.show_id, "unknown")
|
|
|
|
new_slug = generate_performance_slug(song_title, show_date)
|
|
|
|
# Ensure unique (for reprises etc)
|
|
original_slug = new_slug
|
|
counter = 1
|
|
while new_slug in existing_perf_slugs:
|
|
counter += 1
|
|
new_slug = f"{original_slug}-{counter}"
|
|
|
|
p.slug = new_slug
|
|
existing_perf_slugs.add(new_slug)
|
|
session.add(p)
|
|
session.commit()
|
|
|
|
# 5. Fix Set Names (Fetch API)
|
|
print("Fixing Set Names (fetching setlists)...")
|
|
# We need to map El Goose show_id/song_id to our IDs to find the record.
|
|
# But we don't store El Goose IDs in our models?
|
|
# Checked models.py: we don't store ex_id.
|
|
# We match by show date/venue and song title.
|
|
|
|
# This is hard to do reliably without external IDs.
|
|
# Alternatively, we can infer set name from 'position'?
|
|
# No, position 1 could be Set 1 or Encore if short show? No.
|
|
|
|
# Wait, import_elgoose mappings are local var.
|
|
# If we re-run import logic but UPDATE instead of SKIP, we can fix it.
|
|
# But matching is tricky.
|
|
|
|
# Let's try to match by Show Date and Song Title.
|
|
# Build map: (show_id, song_id, position) -> Performance
|
|
|
|
# Refresh perfs from DB since we might have added slugs
|
|
# perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale?
|
|
# Re-querying is safer but PERFS list object is updated by session.add? Yes.
|
|
|
|
perf_map = {} # (show_id, song_id, position) -> perf object
|
|
for p in perfs:
|
|
perf_map[(p.show_id, p.song_id, p.position)] = p
|
|
|
|
# We need show map: el_goose_show_id -> our_show_id
|
|
# We need song map: el_goose_song_id -> our_song_id
|
|
|
|
# We have to re-fetch shows and songs to rebuild this map.
|
|
print(" Re-building ID maps...")
|
|
|
|
# Map Shows
|
|
el_shows = fetch_all_json("shows", {"artist": 1})
|
|
if not el_shows: el_shows = fetch_all_json("shows") # fallback
|
|
|
|
el_show_map = {} # el_id -> our_id
|
|
for s in el_shows:
|
|
# Find our show
|
|
dt = s['showdate'] # YYYY-MM-DD
|
|
# We need to match precise Show.
|
|
# Simplified: match by date.
|
|
# Convert string to datetime
|
|
from datetime import datetime
|
|
s_date = datetime.strptime(dt, "%Y-%m-%d")
|
|
|
|
# Find show in our DB
|
|
# We can optimise this but for now linear search or query is fine for one-off script
|
|
found = session.exec(select(Show).where(Show.date == s_date)).first()
|
|
if found:
|
|
el_show_map[s['show_id']] = found.id
|
|
|
|
# Map Songs
|
|
el_songs = fetch_all_json("songs")
|
|
el_song_map = {} # el_id -> our_id
|
|
for s in el_songs:
|
|
found = session.exec(select(Song).where(Song.title == s['name'])).first()
|
|
if found:
|
|
el_song_map[s['id']] = found.id
|
|
|
|
# Now fetch setlists
|
|
el_setlists = fetch_all_json("setlists")
|
|
|
|
count = 0
|
|
for item in el_setlists:
|
|
our_show_id = el_show_map.get(item['show_id'])
|
|
our_song_id = el_song_map.get(item['song_id'])
|
|
position = item.get('position', 0)
|
|
|
|
if our_show_id and our_song_id:
|
|
# Find existing perf
|
|
perf = perf_map.get((our_show_id, our_song_id, position))
|
|
if perf:
|
|
# Logic to fix set_name
|
|
set_val = str(item.get('setnumber', '1'))
|
|
set_name = f"Set {set_val}"
|
|
if set_val.isdigit():
|
|
set_name = f"Set {set_val}"
|
|
elif set_val.lower() == 'e':
|
|
set_name = "Encore"
|
|
elif set_val.lower() == 'e2':
|
|
set_name = "Encore 2"
|
|
elif set_val.lower() == 's':
|
|
set_name = "Soundcheck"
|
|
|
|
if perf.set_name != set_name:
|
|
perf.set_name = set_name
|
|
session.add(perf)
|
|
count += 1
|
|
else:
|
|
# Debug only first few failures to avoid spam
|
|
if count < 5:
|
|
print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}")
|
|
if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})")
|
|
if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})")
|
|
|
|
session.commit()
|
|
print(f"Fixed {count} performance set names.")
|
|
|
|
if __name__ == "__main__":
|
|
fix_data()
|