feat(backend): Implement automation scheduler and pagination envelope

This commit is contained in:
fullsizemalt 2025-12-30 22:29:04 -08:00
parent 3aaf35d43b
commit 2941fa482e
7 changed files with 116 additions and 333 deletions

View file

@ -343,53 +343,67 @@ def import_setlists(session, show_map, song_map):
print(f"✓ Imported {performance_count} new performances")
def run_import(session: Session, with_users: bool = False):
"""Run the import process programmatically"""
# 1. Get or create vertical
print("\n🦆 Creating Goose vertical...")
vertical = session.exec(
select(Vertical).where(Vertical.slug == "goose")
).first()
if not vertical:
vertical = Vertical(
name="Goose",
slug="goose",
description="Goose is a jam band from Connecticut"
)
session.add(vertical)
session.commit()
session.refresh(vertical)
print(f"✓ Created vertical (ID: {vertical.id})")
else:
print(f"✓ Using existing vertical (ID: {vertical.id})")
users = []
if with_users:
# 2. Create users
users = create_users(session)
# 3. Import base data
venue_map = import_venues(session)
song_map = import_songs(session, vertical.id)
# 4. Import shows
show_map, tour_map = import_shows(session, vertical.id, venue_map)
# 5. Import setlists
import_setlists(session, show_map, song_map)
return {
"venues": len(venue_map),
"tours": len(tour_map),
"songs": len(song_map),
"shows": len(show_map),
"users": len(users)
}
def main():
print("="*60)
print("EL GOOSE DATA IMPORTER")
print("="*60)
with Session(engine) as session:
# 1. Get or create vertical
print("\n🦆 Creating Goose vertical...")
vertical = session.exec(
select(Vertical).where(Vertical.slug == "goose")
).first()
if not vertical:
vertical = Vertical(
name="Goose",
slug="goose",
description="Goose is a jam band from Connecticut"
)
session.add(vertical)
session.commit()
session.refresh(vertical)
print(f"✓ Created vertical (ID: {vertical.id})")
else:
print(f"✓ Using existing vertical (ID: {vertical.id})")
# 2. Create users
users = create_users(session)
# 3. Import base data
venue_map = import_venues(session)
song_map = import_songs(session, vertical.id)
# 4. Import shows
show_map, tour_map = import_shows(session, vertical.id, venue_map)
# 5. Import setlists
import_setlists(session, show_map, song_map)
stats = run_import(session, with_users=True)
print("\n" + "="*60)
print("✓ IMPORT COMPLETE!")
print("="*60)
print(f"\nImported:")
print(f"{len(venue_map)} venues")
print(f"{len(tour_map)} tours")
print(f"{len(song_map)} songs")
print(f"{len(show_map)} shows")
print(f"{len(users)} demo users")
print(f"{stats['venues']} venues")
print(f"{stats['tours']} tours")
print(f"{stats['songs']} songs")
print(f"{stats['shows']} shows")
print(f"{stats['users']} demo users")
print(f"\nAll passwords: demo123")
print(f"\nStart demo servers:")
print(f" Backend: DATABASE_URL='sqlite:///./elmeg-demo.db' uvicorn main:app --reload --port 8001")

View file

@ -7,8 +7,14 @@ from fastapi.middleware.cors import CORSMiddleware
# Feature flags - set to False to disable features
ENABLE_BUG_TRACKER = os.getenv("ENABLE_BUG_TRACKER", "true").lower() == "true"
from services.scheduler import start_scheduler
app = FastAPI()
@app.on_event("startup")
def on_startup():
start_scheduler()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, set this to the frontend domain

View file

@ -1,288 +0,0 @@
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlmodel import Session, select
from database import engine
from models import Venue, Song, Show, Tour, Performance
from slugify import generate_slug, generate_show_slug
import requests
import time
BASE_URL = "https://elgoose.net/api/v2"
def fetch_all_json(endpoint, params=None):
all_data = []
page = 1
params = params.copy() if params else {}
print(f"Fetching {endpoint}...")
seen_ids = set()
while True:
params['page'] = page
url = f"{BASE_URL}/{endpoint}.json"
try:
resp = requests.get(url, params=params)
if resp.status_code != 200:
print(f" Failed with status {resp.status_code}")
break
# API can return a dict with 'data' or just a list sometimes, handling both
json_resp = resp.json()
if isinstance(json_resp, dict):
items = json_resp.get('data', [])
elif isinstance(json_resp, list):
items = json_resp
else:
items = []
if not items:
print(" No more items found.")
break
# Check for cycles / infinite loop by checking if we've seen these IDs before
# Assuming items have 'id' or 'show_id' etc.
# If not, we hash the string representation.
new_items_count = 0
for item in items:
# Try to find a unique identifier
uid = item.get('id') or item.get('show_id') or str(item)
if uid not in seen_ids:
seen_ids.add(uid)
all_data.append(item)
new_items_count += 1
if new_items_count == 0:
print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.")
break
print(f" Page {page} done ({new_items_count} new items)")
page += 1
time.sleep(0.5)
# Safety break
if page > 1000:
print(" Hit 1000 pages safety limit.")
break
if page > 200: # Safety break
print(" Safety limit reached.")
break
except Exception as e:
print(f"Error fetching {endpoint}: {e}")
break
return all_data
def fix_data():
with Session(engine) as session:
# 1. Fix Venues Slugs
print("Fixing Venue Slugs...")
venues = session.exec(select(Venue)).all()
existing_venue_slugs = {v.slug for v in venues if v.slug}
for v in venues:
if not v.slug:
new_slug = generate_slug(v.name)
# Ensure unique
original_slug = new_slug
counter = 1
while new_slug in existing_venue_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
v.slug = new_slug
existing_venue_slugs.add(new_slug)
session.add(v)
session.commit()
# 2. Fix Songs Slugs
print("Fixing Song Slugs...")
songs = session.exec(select(Song)).all()
existing_song_slugs = {s.slug for s in songs if s.slug}
for s in songs:
if not s.slug:
new_slug = generate_slug(s.title)
original_slug = new_slug
counter = 1
while new_slug in existing_song_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
s.slug = new_slug
existing_song_slugs.add(new_slug)
session.add(s)
session.commit()
# 3. Fix Tours Slugs
print("Fixing Tour Slugs...")
tours = session.exec(select(Tour)).all()
existing_tour_slugs = {t.slug for t in tours if t.slug}
for t in tours:
if not t.slug:
new_slug = generate_slug(t.name)
original_slug = new_slug
counter = 1
while new_slug in existing_tour_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
t.slug = new_slug
existing_tour_slugs.add(new_slug)
session.add(t)
session.commit()
# 4. Fix Shows Slugs
print("Fixing Show Slugs...")
shows = session.exec(select(Show)).all()
existing_show_slugs = {s.slug for s in shows if s.slug}
venue_map = {v.id: v for v in venues} # Cache venues for naming
for show in shows:
if not show.slug:
date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown"
venue_name = "unknown"
if show.venue_id and show.venue_id in venue_map:
venue_name = venue_map[show.venue_id].name
new_slug = generate_show_slug(date_str, venue_name)
# Ensure unique
original_slug = new_slug
counter = 1
while new_slug in existing_show_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
show.slug = new_slug
existing_show_slugs.add(new_slug)
session.add(show)
session.commit()
# 4b. Fix Performance Slugs
print("Fixing Performance Slugs...")
from slugify import generate_performance_slug
perfs = session.exec(select(Performance)).all()
existing_perf_slugs = {p.slug for p in perfs if p.slug}
# We need song titles and show dates
# Efficient way: build maps
song_map = {s.id: s.title for s in songs}
show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows}
for p in perfs:
if not p.slug:
song_title = song_map.get(p.song_id, "unknown")
show_date = show_map.get(p.show_id, "unknown")
new_slug = generate_performance_slug(song_title, show_date)
# Ensure unique (for reprises etc)
original_slug = new_slug
counter = 1
while new_slug in existing_perf_slugs:
counter += 1
new_slug = f"{original_slug}-{counter}"
p.slug = new_slug
existing_perf_slugs.add(new_slug)
session.add(p)
session.commit()
# 5. Fix Set Names (Fetch API)
print("Fixing Set Names (fetching setlists)...")
# We need to map El Goose show_id/song_id to our IDs to find the record.
# But we don't store El Goose IDs in our models?
# Checked models.py: we don't store ex_id.
# We match by show date/venue and song title.
# This is hard to do reliably without external IDs.
# Alternatively, we can infer set name from 'position'?
# No, position 1 could be Set 1 or Encore if short show? No.
# Wait, import_elgoose mappings are local var.
# If we re-run import logic but UPDATE instead of SKIP, we can fix it.
# But matching is tricky.
# Let's try to match by Show Date and Song Title.
# Build map: (show_id, song_id, position) -> Performance
# Refresh perfs from DB since we might have added slugs
# perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale?
# Re-querying is safer but PERFS list object is updated by session.add? Yes.
perf_map = {} # (show_id, song_id, position) -> perf object
for p in perfs:
perf_map[(p.show_id, p.song_id, p.position)] = p
# We need show map: el_goose_show_id -> our_show_id
# We need song map: el_goose_song_id -> our_song_id
# We have to re-fetch shows and songs to rebuild this map.
print(" Re-building ID maps...")
# Map Shows
el_shows = fetch_all_json("shows", {"artist": 1})
if not el_shows: el_shows = fetch_all_json("shows") # fallback
el_show_map = {} # el_id -> our_id
for s in el_shows:
# Find our show
dt = s['showdate'] # YYYY-MM-DD
# We need to match precise Show.
# Simplified: match by date.
# Convert string to datetime
from datetime import datetime
s_date = datetime.strptime(dt, "%Y-%m-%d")
# Find show in our DB
# We can optimise this but for now linear search or query is fine for one-off script
found = session.exec(select(Show).where(Show.date == s_date)).first()
if found:
el_show_map[s['show_id']] = found.id
# Map Songs
el_songs = fetch_all_json("songs")
el_song_map = {} # el_id -> our_id
for s in el_songs:
found = session.exec(select(Song).where(Song.title == s['name'])).first()
if found:
el_song_map[s['id']] = found.id
# Now fetch setlists
el_setlists = fetch_all_json("setlists")
count = 0
for item in el_setlists:
our_show_id = el_show_map.get(item['show_id'])
our_song_id = el_song_map.get(item['song_id'])
position = item.get('position', 0)
if our_show_id and our_song_id:
# Find existing perf
perf = perf_map.get((our_show_id, our_song_id, position))
if perf:
# Logic to fix set_name
set_val = str(item.get('setnumber', '1'))
set_name = f"Set {set_val}"
if set_val.isdigit():
set_name = f"Set {set_val}"
elif set_val.lower() == 'e':
set_name = "Encore"
elif set_val.lower() == 'e2':
set_name = "Encore 2"
elif set_val.lower() == 's':
set_name = "Soundcheck"
if perf.set_name != set_name:
perf.set_name = set_name
session.add(perf)
count += 1
else:
# Debug only first few failures to avoid spam
if count < 5:
print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}")
if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})")
if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})")
session.commit()
print(f"Fixed {count} performance set names.")
if __name__ == "__main__":
fix_data()

View file

@ -13,3 +13,5 @@ requests
beautifulsoup4
boto3
email-validator
apscheduler
python-slugify

View file

@ -1,9 +1,10 @@
from typing import List
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlmodel import Session, select
from sqlalchemy import func
from database import get_session
from models import Show, Tag, EntityTag, Vertical, UserVerticalPreference
from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead
from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead, PaginatedResponse, PaginationMeta
from auth import get_current_user, get_current_user_optional
router = APIRouter(prefix="/shows", tags=["shows"])
@ -33,7 +34,7 @@ def create_show(
return db_show
@router.get("/", response_model=List[ShowRead])
@router.get("/", response_model=PaginatedResponse[ShowRead])
def read_shows(
offset: int = 0,
limit: int = Query(default=2000, le=5000),
@ -49,6 +50,8 @@ def read_shows(
session: Session = Depends(get_session)
):
from sqlalchemy.orm import joinedload
from datetime import datetime
query = select(Show).options(
joinedload(Show.vertical),
joinedload(Show.venue),
@ -64,11 +67,12 @@ def read_shows(
allowed_ids = [p.vertical_id for p in prefs]
# If user selected tiers but has no bands in them, return empty
if not allowed_ids:
return []
return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
query = query.where(Show.vertical_id.in_(allowed_ids))
elif tiers and not current_user:
# Anonymous users can't filter by personal tiers
return []
return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
if venue_id:
query = query.where(Show.venue_id == venue_id)
if tour_id:
@ -87,20 +91,28 @@ def read_shows(
query = query.where(Show.vertical_id == vertical_id)
if status:
from datetime import datetime
today = datetime.now()
if status == "past":
query = query.where(Show.date <= today)
query = query.order_by(Show.date.desc())
elif status == "upcoming":
query = query.where(Show.date > today)
query = query.order_by(Show.date.asc())
# Calculate total count before pagination
total = session.exec(select(func.count()).select_from(query.subquery())).one()
# Apply sorting and pagination
if status == "upcoming":
query = query.order_by(Show.date.asc())
else:
# Default sort by date descending so we get recent shows first
query = query.order_by(Show.date.desc())
shows = session.exec(query.offset(offset).limit(limit)).all()
return shows
return PaginatedResponse(
data=shows,
meta=PaginationMeta(total=total, limit=limit, offset=offset)
)
@router.get("/recent", response_model=List[ShowRead])
def read_recent_shows(

View file

@ -1,4 +1,4 @@
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Generic, TypeVar
from sqlmodel import SQLModel
from datetime import datetime
@ -452,3 +452,17 @@ class PublicProfileRead(SQLModel):
stats: Dict[str, int]
joined_at: datetime
# --- Pagination ---
T = TypeVar('T')
class PaginationMeta(SQLModel):
total: int
limit: int
offset: int
class PaginatedResponse(SQLModel, Generic[T]):
data: List[T]
meta: PaginationMeta

View file

@ -0,0 +1,23 @@
from apscheduler.schedulers.background import BackgroundScheduler
import import_elgoose
from sqlmodel import Session
from database import engine
import logging
logger = logging.getLogger(__name__)
scheduler = BackgroundScheduler()
def daily_import_job():
logger.info("Starting daily Goose data import...")
try:
with Session(engine) as session:
stats = import_elgoose.run_import(session, with_users=False)
logger.info(f"Daily import complete. Stats: {stats}")
except Exception as e:
logger.error(f"Daily import failed: {e}")
def start_scheduler():
scheduler.add_job(daily_import_job, 'interval', hours=12, id='goose_import')
scheduler.start()
logger.info("Scheduler started with daily import job.")