diff --git a/backend/import_elgoose.py b/backend/import_elgoose.py index 3024021..cbdb27c 100644 --- a/backend/import_elgoose.py +++ b/backend/import_elgoose.py @@ -343,53 +343,67 @@ def import_setlists(session, show_map, song_map): print(f"āœ“ Imported {performance_count} new performances") +def run_import(session: Session, with_users: bool = False): + """Run the import process programmatically""" + # 1. Get or create vertical + print("\nšŸ¦† Creating Goose vertical...") + vertical = session.exec( + select(Vertical).where(Vertical.slug == "goose") + ).first() + + if not vertical: + vertical = Vertical( + name="Goose", + slug="goose", + description="Goose is a jam band from Connecticut" + ) + session.add(vertical) + session.commit() + session.refresh(vertical) + print(f"āœ“ Created vertical (ID: {vertical.id})") + else: + print(f"āœ“ Using existing vertical (ID: {vertical.id})") + + users = [] + if with_users: + # 2. Create users + users = create_users(session) + + # 3. Import base data + venue_map = import_venues(session) + song_map = import_songs(session, vertical.id) + + # 4. Import shows + show_map, tour_map = import_shows(session, vertical.id, venue_map) + + # 5. Import setlists + import_setlists(session, show_map, song_map) + + return { + "venues": len(venue_map), + "tours": len(tour_map), + "songs": len(song_map), + "shows": len(show_map), + "users": len(users) + } + def main(): print("="*60) print("EL GOOSE DATA IMPORTER") print("="*60) with Session(engine) as session: - # 1. Get or create vertical - print("\nšŸ¦† Creating Goose vertical...") - vertical = session.exec( - select(Vertical).where(Vertical.slug == "goose") - ).first() - - if not vertical: - vertical = Vertical( - name="Goose", - slug="goose", - description="Goose is a jam band from Connecticut" - ) - session.add(vertical) - session.commit() - session.refresh(vertical) - print(f"āœ“ Created vertical (ID: {vertical.id})") - else: - print(f"āœ“ Using existing vertical (ID: {vertical.id})") - - # 2. Create users - users = create_users(session) - - # 3. Import base data - venue_map = import_venues(session) - song_map = import_songs(session, vertical.id) - - # 4. Import shows - show_map, tour_map = import_shows(session, vertical.id, venue_map) - - # 5. Import setlists - import_setlists(session, show_map, song_map) + stats = run_import(session, with_users=True) print("\n" + "="*60) print("āœ“ IMPORT COMPLETE!") print("="*60) print(f"\nImported:") - print(f" • {len(venue_map)} venues") - print(f" • {len(tour_map)} tours") - print(f" • {len(song_map)} songs") - print(f" • {len(show_map)} shows") - print(f" • {len(users)} demo users") + print(f" • {stats['venues']} venues") + print(f" • {stats['tours']} tours") + print(f" • {stats['songs']} songs") + print(f" • {stats['shows']} shows") + print(f" • {stats['users']} demo users") print(f"\nAll passwords: demo123") print(f"\nStart demo servers:") print(f" Backend: DATABASE_URL='sqlite:///./elmeg-demo.db' uvicorn main:app --reload --port 8001") diff --git a/backend/main.py b/backend/main.py index ea7c334..0454312 100644 --- a/backend/main.py +++ b/backend/main.py @@ -7,8 +7,14 @@ from fastapi.middleware.cors import CORSMiddleware # Feature flags - set to False to disable features ENABLE_BUG_TRACKER = os.getenv("ENABLE_BUG_TRACKER", "true").lower() == "true" +from services.scheduler import start_scheduler + app = FastAPI() +@app.on_event("startup") +def on_startup(): + start_scheduler() + app.add_middleware( CORSMiddleware, allow_origins=["*"], # In production, set this to the frontend domain diff --git a/backend/migrations/99_fix_db_data.py b/backend/migrations/99_fix_db_data.py deleted file mode 100644 index 434daa7..0000000 --- a/backend/migrations/99_fix_db_data.py +++ /dev/null @@ -1,288 +0,0 @@ -import sys -import os -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from sqlmodel import Session, select -from database import engine -from models import Venue, Song, Show, Tour, Performance -from slugify import generate_slug, generate_show_slug -import requests -import time - -BASE_URL = "https://elgoose.net/api/v2" - -def fetch_all_json(endpoint, params=None): - all_data = [] - page = 1 - params = params.copy() if params else {} - print(f"Fetching {endpoint}...") - - seen_ids = set() - - while True: - params['page'] = page - url = f"{BASE_URL}/{endpoint}.json" - try: - resp = requests.get(url, params=params) - if resp.status_code != 200: - print(f" Failed with status {resp.status_code}") - break - - # API can return a dict with 'data' or just a list sometimes, handling both - json_resp = resp.json() - if isinstance(json_resp, dict): - items = json_resp.get('data', []) - elif isinstance(json_resp, list): - items = json_resp - else: - items = [] - - if not items: - print(" No more items found.") - break - - # Check for cycles / infinite loop by checking if we've seen these IDs before - # Assuming items have 'id' or 'show_id' etc. - # If not, we hash the string representation. - new_items_count = 0 - for item in items: - # Try to find a unique identifier - uid = item.get('id') or item.get('show_id') or str(item) - if uid not in seen_ids: - seen_ids.add(uid) - all_data.append(item) - new_items_count += 1 - - if new_items_count == 0: - print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.") - break - - print(f" Page {page} done ({new_items_count} new items)") - page += 1 - time.sleep(0.5) - - # Safety break - if page > 1000: - print(" Hit 1000 pages safety limit.") - break - if page > 200: # Safety break - print(" Safety limit reached.") - break - - except Exception as e: - print(f"Error fetching {endpoint}: {e}") - break - - return all_data - -def fix_data(): - with Session(engine) as session: - # 1. Fix Venues Slugs - print("Fixing Venue Slugs...") - venues = session.exec(select(Venue)).all() - existing_venue_slugs = {v.slug for v in venues if v.slug} - for v in venues: - if not v.slug: - new_slug = generate_slug(v.name) - # Ensure unique - original_slug = new_slug - counter = 1 - while new_slug in existing_venue_slugs: - counter += 1 - new_slug = f"{original_slug}-{counter}" - v.slug = new_slug - existing_venue_slugs.add(new_slug) - session.add(v) - session.commit() - - # 2. Fix Songs Slugs - print("Fixing Song Slugs...") - songs = session.exec(select(Song)).all() - existing_song_slugs = {s.slug for s in songs if s.slug} - for s in songs: - if not s.slug: - new_slug = generate_slug(s.title) - original_slug = new_slug - counter = 1 - while new_slug in existing_song_slugs: - counter += 1 - new_slug = f"{original_slug}-{counter}" - s.slug = new_slug - existing_song_slugs.add(new_slug) - session.add(s) - session.commit() - - # 3. Fix Tours Slugs - print("Fixing Tour Slugs...") - tours = session.exec(select(Tour)).all() - existing_tour_slugs = {t.slug for t in tours if t.slug} - for t in tours: - if not t.slug: - new_slug = generate_slug(t.name) - original_slug = new_slug - counter = 1 - while new_slug in existing_tour_slugs: - counter += 1 - new_slug = f"{original_slug}-{counter}" - t.slug = new_slug - existing_tour_slugs.add(new_slug) - session.add(t) - session.commit() - - # 4. Fix Shows Slugs - print("Fixing Show Slugs...") - shows = session.exec(select(Show)).all() - existing_show_slugs = {s.slug for s in shows if s.slug} - venue_map = {v.id: v for v in venues} # Cache venues for naming - - for show in shows: - if not show.slug: - date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown" - venue_name = "unknown" - if show.venue_id and show.venue_id in venue_map: - venue_name = venue_map[show.venue_id].name - - new_slug = generate_show_slug(date_str, venue_name) - # Ensure unique - original_slug = new_slug - counter = 1 - while new_slug in existing_show_slugs: - counter += 1 - new_slug = f"{original_slug}-{counter}" - - show.slug = new_slug - existing_show_slugs.add(new_slug) - session.add(show) - session.commit() - - # 4b. Fix Performance Slugs - print("Fixing Performance Slugs...") - from slugify import generate_performance_slug - perfs = session.exec(select(Performance)).all() - existing_perf_slugs = {p.slug for p in perfs if p.slug} - - # We need song titles and show dates - # Efficient way: build maps - song_map = {s.id: s.title for s in songs} - show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows} - - for p in perfs: - if not p.slug: - song_title = song_map.get(p.song_id, "unknown") - show_date = show_map.get(p.show_id, "unknown") - - new_slug = generate_performance_slug(song_title, show_date) - - # Ensure unique (for reprises etc) - original_slug = new_slug - counter = 1 - while new_slug in existing_perf_slugs: - counter += 1 - new_slug = f"{original_slug}-{counter}" - - p.slug = new_slug - existing_perf_slugs.add(new_slug) - session.add(p) - session.commit() - - # 5. Fix Set Names (Fetch API) - print("Fixing Set Names (fetching setlists)...") - # We need to map El Goose show_id/song_id to our IDs to find the record. - # But we don't store El Goose IDs in our models? - # Checked models.py: we don't store ex_id. - # We match by show date/venue and song title. - - # This is hard to do reliably without external IDs. - # Alternatively, we can infer set name from 'position'? - # No, position 1 could be Set 1 or Encore if short show? No. - - # Wait, import_elgoose mappings are local var. - # If we re-run import logic but UPDATE instead of SKIP, we can fix it. - # But matching is tricky. - - # Let's try to match by Show Date and Song Title. - # Build map: (show_id, song_id, position) -> Performance - - # Refresh perfs from DB since we might have added slugs - # perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale? - # Re-querying is safer but PERFS list object is updated by session.add? Yes. - - perf_map = {} # (show_id, song_id, position) -> perf object - for p in perfs: - perf_map[(p.show_id, p.song_id, p.position)] = p - - # We need show map: el_goose_show_id -> our_show_id - # We need song map: el_goose_song_id -> our_song_id - - # We have to re-fetch shows and songs to rebuild this map. - print(" Re-building ID maps...") - - # Map Shows - el_shows = fetch_all_json("shows", {"artist": 1}) - if not el_shows: el_shows = fetch_all_json("shows") # fallback - - el_show_map = {} # el_id -> our_id - for s in el_shows: - # Find our show - dt = s['showdate'] # YYYY-MM-DD - # We need to match precise Show. - # Simplified: match by date. - # Convert string to datetime - from datetime import datetime - s_date = datetime.strptime(dt, "%Y-%m-%d") - - # Find show in our DB - # We can optimise this but for now linear search or query is fine for one-off script - found = session.exec(select(Show).where(Show.date == s_date)).first() - if found: - el_show_map[s['show_id']] = found.id - - # Map Songs - el_songs = fetch_all_json("songs") - el_song_map = {} # el_id -> our_id - for s in el_songs: - found = session.exec(select(Song).where(Song.title == s['name'])).first() - if found: - el_song_map[s['id']] = found.id - - # Now fetch setlists - el_setlists = fetch_all_json("setlists") - - count = 0 - for item in el_setlists: - our_show_id = el_show_map.get(item['show_id']) - our_song_id = el_song_map.get(item['song_id']) - position = item.get('position', 0) - - if our_show_id and our_song_id: - # Find existing perf - perf = perf_map.get((our_show_id, our_song_id, position)) - if perf: - # Logic to fix set_name - set_val = str(item.get('setnumber', '1')) - set_name = f"Set {set_val}" - if set_val.isdigit(): - set_name = f"Set {set_val}" - elif set_val.lower() == 'e': - set_name = "Encore" - elif set_val.lower() == 'e2': - set_name = "Encore 2" - elif set_val.lower() == 's': - set_name = "Soundcheck" - - if perf.set_name != set_name: - perf.set_name = set_name - session.add(perf) - count += 1 - else: - # Debug only first few failures to avoid spam - if count < 5: - print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}") - if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})") - if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})") - - session.commit() - print(f"Fixed {count} performance set names.") - -if __name__ == "__main__": - fix_data() diff --git a/backend/requirements.txt b/backend/requirements.txt index 589409e..b5f3cc8 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,3 +13,5 @@ requests beautifulsoup4 boto3 email-validator +apscheduler +python-slugify diff --git a/backend/routers/shows.py b/backend/routers/shows.py index d15183a..a010c13 100644 --- a/backend/routers/shows.py +++ b/backend/routers/shows.py @@ -1,9 +1,10 @@ from typing import List from fastapi import APIRouter, Depends, HTTPException, Query from sqlmodel import Session, select +from sqlalchemy import func from database import get_session from models import Show, Tag, EntityTag, Vertical, UserVerticalPreference -from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead +from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead, PaginatedResponse, PaginationMeta from auth import get_current_user, get_current_user_optional router = APIRouter(prefix="/shows", tags=["shows"]) @@ -33,7 +34,7 @@ def create_show( return db_show -@router.get("/", response_model=List[ShowRead]) +@router.get("/", response_model=PaginatedResponse[ShowRead]) def read_shows( offset: int = 0, limit: int = Query(default=2000, le=5000), @@ -49,6 +50,8 @@ def read_shows( session: Session = Depends(get_session) ): from sqlalchemy.orm import joinedload + from datetime import datetime + query = select(Show).options( joinedload(Show.vertical), joinedload(Show.venue), @@ -64,11 +67,12 @@ def read_shows( allowed_ids = [p.vertical_id for p in prefs] # If user selected tiers but has no bands in them, return empty if not allowed_ids: - return [] + return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset)) query = query.where(Show.vertical_id.in_(allowed_ids)) elif tiers and not current_user: # Anonymous users can't filter by personal tiers - return [] + return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset)) + if venue_id: query = query.where(Show.venue_id == venue_id) if tour_id: @@ -87,20 +91,28 @@ def read_shows( query = query.where(Show.vertical_id == vertical_id) if status: - from datetime import datetime today = datetime.now() if status == "past": query = query.where(Show.date <= today) - query = query.order_by(Show.date.desc()) elif status == "upcoming": query = query.where(Show.date > today) - query = query.order_by(Show.date.asc()) + + # Calculate total count before pagination + total = session.exec(select(func.count()).select_from(query.subquery())).one() + + # Apply sorting and pagination + if status == "upcoming": + query = query.order_by(Show.date.asc()) else: # Default sort by date descending so we get recent shows first query = query.order_by(Show.date.desc()) shows = session.exec(query.offset(offset).limit(limit)).all() - return shows + + return PaginatedResponse( + data=shows, + meta=PaginationMeta(total=total, limit=limit, offset=offset) + ) @router.get("/recent", response_model=List[ShowRead]) def read_recent_shows( diff --git a/backend/schemas.py b/backend/schemas.py index fbcac53..e389912 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Dict +from typing import Optional, List, Dict, Generic, TypeVar from sqlmodel import SQLModel from datetime import datetime @@ -452,3 +452,17 @@ class PublicProfileRead(SQLModel): stats: Dict[str, int] joined_at: datetime + + +# --- Pagination --- + +T = TypeVar('T') + +class PaginationMeta(SQLModel): + total: int + limit: int + offset: int + +class PaginatedResponse(SQLModel, Generic[T]): + data: List[T] + meta: PaginationMeta diff --git a/backend/services/scheduler.py b/backend/services/scheduler.py new file mode 100644 index 0000000..9c14c27 --- /dev/null +++ b/backend/services/scheduler.py @@ -0,0 +1,23 @@ +from apscheduler.schedulers.background import BackgroundScheduler +import import_elgoose +from sqlmodel import Session +from database import engine +import logging + +logger = logging.getLogger(__name__) + +scheduler = BackgroundScheduler() + +def daily_import_job(): + logger.info("Starting daily Goose data import...") + try: + with Session(engine) as session: + stats = import_elgoose.run_import(session, with_users=False) + logger.info(f"Daily import complete. Stats: {stats}") + except Exception as e: + logger.error(f"Daily import failed: {e}") + +def start_scheduler(): + scheduler.add_job(daily_import_job, 'interval', hours=12, id='goose_import') + scheduler.start() + logger.info("Scheduler started with daily import job.")