import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from sqlmodel import Session, select from database import engine from models import Venue, Song, Show, Tour, Performance from slugify import generate_slug, generate_show_slug import requests import time BASE_URL = "https://elgoose.net/api/v2" def fetch_all_json(endpoint, params=None): all_data = [] page = 1 params = params.copy() if params else {} print(f"Fetching {endpoint}...") seen_ids = set() while True: params['page'] = page url = f"{BASE_URL}/{endpoint}.json" try: resp = requests.get(url, params=params) if resp.status_code != 200: print(f" Failed with status {resp.status_code}") break # API can return a dict with 'data' or just a list sometimes, handling both json_resp = resp.json() if isinstance(json_resp, dict): items = json_resp.get('data', []) elif isinstance(json_resp, list): items = json_resp else: items = [] if not items: print(" No more items found.") break # Check for cycles / infinite loop by checking if we've seen these IDs before # Assuming items have 'id' or 'show_id' etc. # If not, we hash the string representation. new_items_count = 0 for item in items: # Try to find a unique identifier uid = item.get('id') or item.get('show_id') or str(item) if uid not in seen_ids: seen_ids.add(uid) all_data.append(item) new_items_count += 1 if new_items_count == 0: print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.") break print(f" Page {page} done ({new_items_count} new items)") page += 1 time.sleep(0.5) # Safety break if page > 1000: print(" Hit 1000 pages safety limit.") break if page > 200: # Safety break print(" Safety limit reached.") break except Exception as e: print(f"Error fetching {endpoint}: {e}") break return all_data def fix_data(): with Session(engine) as session: # 1. Fix Venues Slugs print("Fixing Venue Slugs...") venues = session.exec(select(Venue)).all() existing_venue_slugs = {v.slug for v in venues if v.slug} for v in venues: if not v.slug: new_slug = generate_slug(v.name) # Ensure unique original_slug = new_slug counter = 1 while new_slug in existing_venue_slugs: counter += 1 new_slug = f"{original_slug}-{counter}" v.slug = new_slug existing_venue_slugs.add(new_slug) session.add(v) session.commit() # 2. Fix Songs Slugs print("Fixing Song Slugs...") songs = session.exec(select(Song)).all() existing_song_slugs = {s.slug for s in songs if s.slug} for s in songs: if not s.slug: new_slug = generate_slug(s.title) original_slug = new_slug counter = 1 while new_slug in existing_song_slugs: counter += 1 new_slug = f"{original_slug}-{counter}" s.slug = new_slug existing_song_slugs.add(new_slug) session.add(s) session.commit() # 3. Fix Tours Slugs print("Fixing Tour Slugs...") tours = session.exec(select(Tour)).all() existing_tour_slugs = {t.slug for t in tours if t.slug} for t in tours: if not t.slug: new_slug = generate_slug(t.name) original_slug = new_slug counter = 1 while new_slug in existing_tour_slugs: counter += 1 new_slug = f"{original_slug}-{counter}" t.slug = new_slug existing_tour_slugs.add(new_slug) session.add(t) session.commit() # 4. Fix Shows Slugs print("Fixing Show Slugs...") shows = session.exec(select(Show)).all() existing_show_slugs = {s.slug for s in shows if s.slug} venue_map = {v.id: v for v in venues} # Cache venues for naming for show in shows: if not show.slug: date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown" venue_name = "unknown" if show.venue_id and show.venue_id in venue_map: venue_name = venue_map[show.venue_id].name new_slug = generate_show_slug(date_str, venue_name) # Ensure unique original_slug = new_slug counter = 1 while new_slug in existing_show_slugs: counter += 1 new_slug = f"{original_slug}-{counter}" show.slug = new_slug existing_show_slugs.add(new_slug) session.add(show) session.commit() # 4b. Fix Performance Slugs print("Fixing Performance Slugs...") from slugify import generate_performance_slug perfs = session.exec(select(Performance)).all() existing_perf_slugs = {p.slug for p in perfs if p.slug} # We need song titles and show dates # Efficient way: build maps song_map = {s.id: s.title for s in songs} show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows} for p in perfs: if not p.slug: song_title = song_map.get(p.song_id, "unknown") show_date = show_map.get(p.show_id, "unknown") new_slug = generate_performance_slug(song_title, show_date) # Ensure unique (for reprises etc) original_slug = new_slug counter = 1 while new_slug in existing_perf_slugs: counter += 1 new_slug = f"{original_slug}-{counter}" p.slug = new_slug existing_perf_slugs.add(new_slug) session.add(p) session.commit() # 5. Fix Set Names (Fetch API) print("Fixing Set Names (fetching setlists)...") # We need to map El Goose show_id/song_id to our IDs to find the record. # But we don't store El Goose IDs in our models? # Checked models.py: we don't store ex_id. # We match by show date/venue and song title. # This is hard to do reliably without external IDs. # Alternatively, we can infer set name from 'position'? # No, position 1 could be Set 1 or Encore if short show? No. # Wait, import_elgoose mappings are local var. # If we re-run import logic but UPDATE instead of SKIP, we can fix it. # But matching is tricky. # Let's try to match by Show Date and Song Title. # Build map: (show_id, song_id, position) -> Performance # Refresh perfs from DB since we might have added slugs # perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale? # Re-querying is safer but PERFS list object is updated by session.add? Yes. perf_map = {} # (show_id, song_id, position) -> perf object for p in perfs: perf_map[(p.show_id, p.song_id, p.position)] = p # We need show map: el_goose_show_id -> our_show_id # We need song map: el_goose_song_id -> our_song_id # We have to re-fetch shows and songs to rebuild this map. print(" Re-building ID maps...") # Map Shows el_shows = fetch_all_json("shows", {"artist": 1}) if not el_shows: el_shows = fetch_all_json("shows") # fallback el_show_map = {} # el_id -> our_id for s in el_shows: # Find our show dt = s['showdate'] # YYYY-MM-DD # We need to match precise Show. # Simplified: match by date. # Convert string to datetime from datetime import datetime s_date = datetime.strptime(dt, "%Y-%m-%d") # Find show in our DB # We can optimise this but for now linear search or query is fine for one-off script found = session.exec(select(Show).where(Show.date == s_date)).first() if found: el_show_map[s['show_id']] = found.id # Map Songs el_songs = fetch_all_json("songs") el_song_map = {} # el_id -> our_id for s in el_songs: found = session.exec(select(Song).where(Song.title == s['name'])).first() if found: el_song_map[s['id']] = found.id # Now fetch setlists el_setlists = fetch_all_json("setlists") count = 0 for item in el_setlists: our_show_id = el_show_map.get(item['show_id']) our_song_id = el_song_map.get(item['song_id']) position = item.get('position', 0) if our_show_id and our_song_id: # Find existing perf perf = perf_map.get((our_show_id, our_song_id, position)) if perf: # Logic to fix set_name set_val = str(item.get('setnumber', '1')) set_name = f"Set {set_val}" if set_val.isdigit(): set_name = f"Set {set_val}" elif set_val.lower() == 'e': set_name = "Encore" elif set_val.lower() == 'e2': set_name = "Encore 2" elif set_val.lower() == 's': set_name = "Soundcheck" if perf.set_name != set_name: perf.set_name = set_name session.add(perf) count += 1 else: # Debug only first few failures to avoid spam if count < 5: print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}") if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})") if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})") session.commit() print(f"Fixed {count} performance set names.") if __name__ == "__main__": fix_data()