feat(backend): Implement automation scheduler and pagination envelope

This commit is contained in:
fullsizemalt 2025-12-30 22:29:04 -08:00
parent 3aaf35d43b
commit 2941fa482e
7 changed files with 116 additions and 333 deletions

View file

@ -343,53 +343,67 @@ def import_setlists(session, show_map, song_map):
print(f"✓ Imported {performance_count} new performances") print(f"✓ Imported {performance_count} new performances")
def run_import(session: Session, with_users: bool = False):
"""Run the import process programmatically"""
# 1. Get or create vertical
print("\n🦆 Creating Goose vertical...")
vertical = session.exec(
select(Vertical).where(Vertical.slug == "goose")
).first()
if not vertical:
vertical = Vertical(
name="Goose",
slug="goose",
description="Goose is a jam band from Connecticut"
)
session.add(vertical)
session.commit()
session.refresh(vertical)
print(f"✓ Created vertical (ID: {vertical.id})")
else:
print(f"✓ Using existing vertical (ID: {vertical.id})")
users = []
if with_users:
# 2. Create users
users = create_users(session)
# 3. Import base data
venue_map = import_venues(session)
song_map = import_songs(session, vertical.id)
# 4. Import shows
show_map, tour_map = import_shows(session, vertical.id, venue_map)
# 5. Import setlists
import_setlists(session, show_map, song_map)
return {
"venues": len(venue_map),
"tours": len(tour_map),
"songs": len(song_map),
"shows": len(show_map),
"users": len(users)
}
def main(): def main():
print("="*60) print("="*60)
print("EL GOOSE DATA IMPORTER") print("EL GOOSE DATA IMPORTER")
print("="*60) print("="*60)
with Session(engine) as session: with Session(engine) as session:
# 1. Get or create vertical stats = run_import(session, with_users=True)
print("\n🦆 Creating Goose vertical...")
vertical = session.exec(
select(Vertical).where(Vertical.slug == "goose")
).first()
if not vertical:
vertical = Vertical(
name="Goose",
slug="goose",
description="Goose is a jam band from Connecticut"
)
session.add(vertical)
session.commit()
session.refresh(vertical)
print(f"✓ Created vertical (ID: {vertical.id})")
else:
print(f"✓ Using existing vertical (ID: {vertical.id})")
# 2. Create users
users = create_users(session)
# 3. Import base data
venue_map = import_venues(session)
song_map = import_songs(session, vertical.id)
# 4. Import shows
show_map, tour_map = import_shows(session, vertical.id, venue_map)
# 5. Import setlists
import_setlists(session, show_map, song_map)
print("\n" + "="*60) print("\n" + "="*60)
print("✓ IMPORT COMPLETE!") print("✓ IMPORT COMPLETE!")
print("="*60) print("="*60)
print(f"\nImported:") print(f"\nImported:")
print(f"{len(venue_map)} venues") print(f"{stats['venues']} venues")
print(f"{len(tour_map)} tours") print(f"{stats['tours']} tours")
print(f"{len(song_map)} songs") print(f"{stats['songs']} songs")
print(f"{len(show_map)} shows") print(f"{stats['shows']} shows")
print(f"{len(users)} demo users") print(f"{stats['users']} demo users")
print(f"\nAll passwords: demo123") print(f"\nAll passwords: demo123")
print(f"\nStart demo servers:") print(f"\nStart demo servers:")
print(f" Backend: DATABASE_URL='sqlite:///./elmeg-demo.db' uvicorn main:app --reload --port 8001") print(f" Backend: DATABASE_URL='sqlite:///./elmeg-demo.db' uvicorn main:app --reload --port 8001")

View file

@ -7,8 +7,14 @@ from fastapi.middleware.cors import CORSMiddleware
# Feature flags - set to False to disable features # Feature flags - set to False to disable features
ENABLE_BUG_TRACKER = os.getenv("ENABLE_BUG_TRACKER", "true").lower() == "true" ENABLE_BUG_TRACKER = os.getenv("ENABLE_BUG_TRACKER", "true").lower() == "true"
from services.scheduler import start_scheduler
app = FastAPI() app = FastAPI()
@app.on_event("startup")
def on_startup():
start_scheduler()
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["*"], # In production, set this to the frontend domain allow_origins=["*"], # In production, set this to the frontend domain

View file

@ -1,288 +0,0 @@
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlmodel import Session, select
from database import engine
from models import Venue, Song, Show, Tour, Performance
from slugify import generate_slug, generate_show_slug
import requests
import time
BASE_URL = "https://elgoose.net/api/v2"
def fetch_all_json(endpoint, params=None):
all_data = []
page = 1
params = params.copy() if params else {}
print(f"Fetching {endpoint}...")
seen_ids = set()
while True:
params['page'] = page
url = f"{BASE_URL}/{endpoint}.json"
try:
resp = requests.get(url, params=params)
if resp.status_code != 200:
print(f" Failed with status {resp.status_code}")
break
# API can return a dict with 'data' or just a list sometimes, handling both
json_resp = resp.json()
if isinstance(json_resp, dict):
items = json_resp.get('data', [])
elif isinstance(json_resp, list):
items = json_resp
else:
items = []
if not items:
print(" No more items found.")
break
# Check for cycles / infinite loop by checking if we've seen these IDs before
# Assuming items have 'id' or 'show_id' etc.
# If not, we hash the string representation.
new_items_count = 0
for item in items:
# Try to find a unique identifier
uid = item.get('id') or item.get('show_id') or str(item)
if uid not in seen_ids:
seen_ids.add(uid)
all_data.append(item)
new_items_count += 1
if new_items_count == 0:
print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.")
break
print(f" Page {page} done ({new_items_count} new items)")
page += 1
time.sleep(0.5)
# Safety break
if page > 1000:
print(" Hit 1000 pages safety limit.")
break
if page > 200: # Safety break
print(" Safety limit reached.")
break
except Exception as e:
print(f"Error fetching {endpoint}: {e}")
break
return all_data
def fix_data():
with Session(engine) as session:
# 1. Fix Venues Slugs
print("Fixing Venue Slugs...")
venues = session.exec(select(Venue)).all()
existing_venue_slugs = {v.slug for v in venues if v.slug}
for v in venues:
if not v.slug:
new_slug = generate_slug(v.name)
# Ensure unique
original_slug = new_slug
counter = 1
while new_slug in existing_venue_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
v.slug = new_slug
existing_venue_slugs.add(new_slug)
session.add(v)
session.commit()
# 2. Fix Songs Slugs
print("Fixing Song Slugs...")
songs = session.exec(select(Song)).all()
existing_song_slugs = {s.slug for s in songs if s.slug}
for s in songs:
if not s.slug:
new_slug = generate_slug(s.title)
original_slug = new_slug
counter = 1
while new_slug in existing_song_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
s.slug = new_slug
existing_song_slugs.add(new_slug)
session.add(s)
session.commit()
# 3. Fix Tours Slugs
print("Fixing Tour Slugs...")
tours = session.exec(select(Tour)).all()
existing_tour_slugs = {t.slug for t in tours if t.slug}
for t in tours:
if not t.slug:
new_slug = generate_slug(t.name)
original_slug = new_slug
counter = 1
while new_slug in existing_tour_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
t.slug = new_slug
existing_tour_slugs.add(new_slug)
session.add(t)
session.commit()
# 4. Fix Shows Slugs
print("Fixing Show Slugs...")
shows = session.exec(select(Show)).all()
existing_show_slugs = {s.slug for s in shows if s.slug}
venue_map = {v.id: v for v in venues} # Cache venues for naming
for show in shows:
if not show.slug:
date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown"
venue_name = "unknown"
if show.venue_id and show.venue_id in venue_map:
venue_name = venue_map[show.venue_id].name
new_slug = generate_show_slug(date_str, venue_name)
# Ensure unique
original_slug = new_slug
counter = 1
while new_slug in existing_show_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
show.slug = new_slug
existing_show_slugs.add(new_slug)
session.add(show)
session.commit()
# 4b. Fix Performance Slugs
print("Fixing Performance Slugs...")
from slugify import generate_performance_slug
perfs = session.exec(select(Performance)).all()
existing_perf_slugs = {p.slug for p in perfs if p.slug}
# We need song titles and show dates
# Efficient way: build maps
song_map = {s.id: s.title for s in songs}
show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows}
for p in perfs:
if not p.slug:
song_title = song_map.get(p.song_id, "unknown")
show_date = show_map.get(p.show_id, "unknown")
new_slug = generate_performance_slug(song_title, show_date)
# Ensure unique (for reprises etc)
original_slug = new_slug
counter = 1
while new_slug in existing_perf_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
p.slug = new_slug
existing_perf_slugs.add(new_slug)
session.add(p)
session.commit()
# 5. Fix Set Names (Fetch API)
print("Fixing Set Names (fetching setlists)...")
# We need to map El Goose show_id/song_id to our IDs to find the record.
# But we don't store El Goose IDs in our models?
# Checked models.py: we don't store ex_id.
# We match by show date/venue and song title.
# This is hard to do reliably without external IDs.
# Alternatively, we can infer set name from 'position'?
# No, position 1 could be Set 1 or Encore if short show? No.
# Wait, import_elgoose mappings are local var.
# If we re-run import logic but UPDATE instead of SKIP, we can fix it.
# But matching is tricky.
# Let's try to match by Show Date and Song Title.
# Build map: (show_id, song_id, position) -> Performance
# Refresh perfs from DB since we might have added slugs
# perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale?
# Re-querying is safer but PERFS list object is updated by session.add? Yes.
perf_map = {} # (show_id, song_id, position) -> perf object
for p in perfs:
perf_map[(p.show_id, p.song_id, p.position)] = p
# We need show map: el_goose_show_id -> our_show_id
# We need song map: el_goose_song_id -> our_song_id
# We have to re-fetch shows and songs to rebuild this map.
print(" Re-building ID maps...")
# Map Shows
el_shows = fetch_all_json("shows", {"artist": 1})
if not el_shows: el_shows = fetch_all_json("shows") # fallback
el_show_map = {} # el_id -> our_id
for s in el_shows:
# Find our show
dt = s['showdate'] # YYYY-MM-DD
# We need to match precise Show.
# Simplified: match by date.
# Convert string to datetime
from datetime import datetime
s_date = datetime.strptime(dt, "%Y-%m-%d")
# Find show in our DB
# We can optimise this but for now linear search or query is fine for one-off script
found = session.exec(select(Show).where(Show.date == s_date)).first()
if found:
el_show_map[s['show_id']] = found.id
# Map Songs
el_songs = fetch_all_json("songs")
el_song_map = {} # el_id -> our_id
for s in el_songs:
found = session.exec(select(Song).where(Song.title == s['name'])).first()
if found:
el_song_map[s['id']] = found.id
# Now fetch setlists
el_setlists = fetch_all_json("setlists")
count = 0
for item in el_setlists:
our_show_id = el_show_map.get(item['show_id'])
our_song_id = el_song_map.get(item['song_id'])
position = item.get('position', 0)
if our_show_id and our_song_id:
# Find existing perf
perf = perf_map.get((our_show_id, our_song_id, position))
if perf:
# Logic to fix set_name
set_val = str(item.get('setnumber', '1'))
set_name = f"Set {set_val}"
if set_val.isdigit():
set_name = f"Set {set_val}"
elif set_val.lower() == 'e':
set_name = "Encore"
elif set_val.lower() == 'e2':
set_name = "Encore 2"
elif set_val.lower() == 's':
set_name = "Soundcheck"
if perf.set_name != set_name:
perf.set_name = set_name
session.add(perf)
count += 1
else:
# Debug only first few failures to avoid spam
if count < 5:
print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}")
if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})")
if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})")
session.commit()
print(f"Fixed {count} performance set names.")
if __name__ == "__main__":
fix_data()

View file

@ -13,3 +13,5 @@ requests
beautifulsoup4 beautifulsoup4
boto3 boto3
email-validator email-validator
apscheduler
python-slugify

View file

@ -1,9 +1,10 @@
from typing import List from typing import List
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from sqlmodel import Session, select from sqlmodel import Session, select
from sqlalchemy import func
from database import get_session from database import get_session
from models import Show, Tag, EntityTag, Vertical, UserVerticalPreference from models import Show, Tag, EntityTag, Vertical, UserVerticalPreference
from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead, PaginatedResponse, PaginationMeta
from auth import get_current_user, get_current_user_optional from auth import get_current_user, get_current_user_optional
router = APIRouter(prefix="/shows", tags=["shows"]) router = APIRouter(prefix="/shows", tags=["shows"])
@ -33,7 +34,7 @@ def create_show(
return db_show return db_show
@router.get("/", response_model=List[ShowRead]) @router.get("/", response_model=PaginatedResponse[ShowRead])
def read_shows( def read_shows(
offset: int = 0, offset: int = 0,
limit: int = Query(default=2000, le=5000), limit: int = Query(default=2000, le=5000),
@ -49,6 +50,8 @@ def read_shows(
session: Session = Depends(get_session) session: Session = Depends(get_session)
): ):
from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload
from datetime import datetime
query = select(Show).options( query = select(Show).options(
joinedload(Show.vertical), joinedload(Show.vertical),
joinedload(Show.venue), joinedload(Show.venue),
@ -64,11 +67,12 @@ def read_shows(
allowed_ids = [p.vertical_id for p in prefs] allowed_ids = [p.vertical_id for p in prefs]
# If user selected tiers but has no bands in them, return empty # If user selected tiers but has no bands in them, return empty
if not allowed_ids: if not allowed_ids:
return [] return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
query = query.where(Show.vertical_id.in_(allowed_ids)) query = query.where(Show.vertical_id.in_(allowed_ids))
elif tiers and not current_user: elif tiers and not current_user:
# Anonymous users can't filter by personal tiers # Anonymous users can't filter by personal tiers
return [] return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
if venue_id: if venue_id:
query = query.where(Show.venue_id == venue_id) query = query.where(Show.venue_id == venue_id)
if tour_id: if tour_id:
@ -87,20 +91,28 @@ def read_shows(
query = query.where(Show.vertical_id == vertical_id) query = query.where(Show.vertical_id == vertical_id)
if status: if status:
from datetime import datetime
today = datetime.now() today = datetime.now()
if status == "past": if status == "past":
query = query.where(Show.date <= today) query = query.where(Show.date <= today)
query = query.order_by(Show.date.desc())
elif status == "upcoming": elif status == "upcoming":
query = query.where(Show.date > today) query = query.where(Show.date > today)
query = query.order_by(Show.date.asc())
# Calculate total count before pagination
total = session.exec(select(func.count()).select_from(query.subquery())).one()
# Apply sorting and pagination
if status == "upcoming":
query = query.order_by(Show.date.asc())
else: else:
# Default sort by date descending so we get recent shows first # Default sort by date descending so we get recent shows first
query = query.order_by(Show.date.desc()) query = query.order_by(Show.date.desc())
shows = session.exec(query.offset(offset).limit(limit)).all() shows = session.exec(query.offset(offset).limit(limit)).all()
return shows
return PaginatedResponse(
data=shows,
meta=PaginationMeta(total=total, limit=limit, offset=offset)
)
@router.get("/recent", response_model=List[ShowRead]) @router.get("/recent", response_model=List[ShowRead])
def read_recent_shows( def read_recent_shows(

View file

@ -1,4 +1,4 @@
from typing import Optional, List, Dict from typing import Optional, List, Dict, Generic, TypeVar
from sqlmodel import SQLModel from sqlmodel import SQLModel
from datetime import datetime from datetime import datetime
@ -452,3 +452,17 @@ class PublicProfileRead(SQLModel):
stats: Dict[str, int] stats: Dict[str, int]
joined_at: datetime joined_at: datetime
# --- Pagination ---
T = TypeVar('T')
class PaginationMeta(SQLModel):
total: int
limit: int
offset: int
class PaginatedResponse(SQLModel, Generic[T]):
data: List[T]
meta: PaginationMeta

View file

@ -0,0 +1,23 @@
from apscheduler.schedulers.background import BackgroundScheduler
import import_elgoose
from sqlmodel import Session
from database import engine
import logging
logger = logging.getLogger(__name__)
scheduler = BackgroundScheduler()
def daily_import_job():
logger.info("Starting daily Goose data import...")
try:
with Session(engine) as session:
stats = import_elgoose.run_import(session, with_users=False)
logger.info(f"Daily import complete. Stats: {stats}")
except Exception as e:
logger.error(f"Daily import failed: {e}")
def start_scheduler():
scheduler.add_job(daily_import_job, 'interval', hours=12, id='goose_import')
scheduler.start()
logger.info("Scheduler started with daily import job.")