feat(backend): Implement automation scheduler and pagination envelope
This commit is contained in:
parent
3aaf35d43b
commit
2941fa482e
7 changed files with 116 additions and 333 deletions
|
|
@ -343,12 +343,8 @@ def import_setlists(session, show_map, song_map):
|
|||
|
||||
print(f"✓ Imported {performance_count} new performances")
|
||||
|
||||
def main():
|
||||
print("="*60)
|
||||
print("EL GOOSE DATA IMPORTER")
|
||||
print("="*60)
|
||||
|
||||
with Session(engine) as session:
|
||||
def run_import(session: Session, with_users: bool = False):
|
||||
"""Run the import process programmatically"""
|
||||
# 1. Get or create vertical
|
||||
print("\n🦆 Creating Goose vertical...")
|
||||
vertical = session.exec(
|
||||
|
|
@ -368,6 +364,8 @@ def main():
|
|||
else:
|
||||
print(f"✓ Using existing vertical (ID: {vertical.id})")
|
||||
|
||||
users = []
|
||||
if with_users:
|
||||
# 2. Create users
|
||||
users = create_users(session)
|
||||
|
||||
|
|
@ -381,15 +379,31 @@ def main():
|
|||
# 5. Import setlists
|
||||
import_setlists(session, show_map, song_map)
|
||||
|
||||
return {
|
||||
"venues": len(venue_map),
|
||||
"tours": len(tour_map),
|
||||
"songs": len(song_map),
|
||||
"shows": len(show_map),
|
||||
"users": len(users)
|
||||
}
|
||||
|
||||
def main():
|
||||
print("="*60)
|
||||
print("EL GOOSE DATA IMPORTER")
|
||||
print("="*60)
|
||||
|
||||
with Session(engine) as session:
|
||||
stats = run_import(session, with_users=True)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("✓ IMPORT COMPLETE!")
|
||||
print("="*60)
|
||||
print(f"\nImported:")
|
||||
print(f" • {len(venue_map)} venues")
|
||||
print(f" • {len(tour_map)} tours")
|
||||
print(f" • {len(song_map)} songs")
|
||||
print(f" • {len(show_map)} shows")
|
||||
print(f" • {len(users)} demo users")
|
||||
print(f" • {stats['venues']} venues")
|
||||
print(f" • {stats['tours']} tours")
|
||||
print(f" • {stats['songs']} songs")
|
||||
print(f" • {stats['shows']} shows")
|
||||
print(f" • {stats['users']} demo users")
|
||||
print(f"\nAll passwords: demo123")
|
||||
print(f"\nStart demo servers:")
|
||||
print(f" Backend: DATABASE_URL='sqlite:///./elmeg-demo.db' uvicorn main:app --reload --port 8001")
|
||||
|
|
|
|||
|
|
@ -7,8 +7,14 @@ from fastapi.middleware.cors import CORSMiddleware
|
|||
# Feature flags - set to False to disable features
|
||||
ENABLE_BUG_TRACKER = os.getenv("ENABLE_BUG_TRACKER", "true").lower() == "true"
|
||||
|
||||
from services.scheduler import start_scheduler
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.on_event("startup")
|
||||
def on_startup():
|
||||
start_scheduler()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # In production, set this to the frontend domain
|
||||
|
|
|
|||
|
|
@ -1,288 +0,0 @@
|
|||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from sqlmodel import Session, select
|
||||
from database import engine
|
||||
from models import Venue, Song, Show, Tour, Performance
|
||||
from slugify import generate_slug, generate_show_slug
|
||||
import requests
|
||||
import time
|
||||
|
||||
BASE_URL = "https://elgoose.net/api/v2"
|
||||
|
||||
def fetch_all_json(endpoint, params=None):
|
||||
all_data = []
|
||||
page = 1
|
||||
params = params.copy() if params else {}
|
||||
print(f"Fetching {endpoint}...")
|
||||
|
||||
seen_ids = set()
|
||||
|
||||
while True:
|
||||
params['page'] = page
|
||||
url = f"{BASE_URL}/{endpoint}.json"
|
||||
try:
|
||||
resp = requests.get(url, params=params)
|
||||
if resp.status_code != 200:
|
||||
print(f" Failed with status {resp.status_code}")
|
||||
break
|
||||
|
||||
# API can return a dict with 'data' or just a list sometimes, handling both
|
||||
json_resp = resp.json()
|
||||
if isinstance(json_resp, dict):
|
||||
items = json_resp.get('data', [])
|
||||
elif isinstance(json_resp, list):
|
||||
items = json_resp
|
||||
else:
|
||||
items = []
|
||||
|
||||
if not items:
|
||||
print(" No more items found.")
|
||||
break
|
||||
|
||||
# Check for cycles / infinite loop by checking if we've seen these IDs before
|
||||
# Assuming items have 'id' or 'show_id' etc.
|
||||
# If not, we hash the string representation.
|
||||
new_items_count = 0
|
||||
for item in items:
|
||||
# Try to find a unique identifier
|
||||
uid = item.get('id') or item.get('show_id') or str(item)
|
||||
if uid not in seen_ids:
|
||||
seen_ids.add(uid)
|
||||
all_data.append(item)
|
||||
new_items_count += 1
|
||||
|
||||
if new_items_count == 0:
|
||||
print(f" Page {page} returned {len(items)} items but all were duplicates. Stopping.")
|
||||
break
|
||||
|
||||
print(f" Page {page} done ({new_items_count} new items)")
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
# Safety break
|
||||
if page > 1000:
|
||||
print(" Hit 1000 pages safety limit.")
|
||||
break
|
||||
if page > 200: # Safety break
|
||||
print(" Safety limit reached.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching {endpoint}: {e}")
|
||||
break
|
||||
|
||||
return all_data
|
||||
|
||||
def fix_data():
|
||||
with Session(engine) as session:
|
||||
# 1. Fix Venues Slugs
|
||||
print("Fixing Venue Slugs...")
|
||||
venues = session.exec(select(Venue)).all()
|
||||
existing_venue_slugs = {v.slug for v in venues if v.slug}
|
||||
for v in venues:
|
||||
if not v.slug:
|
||||
new_slug = generate_slug(v.name)
|
||||
# Ensure unique
|
||||
original_slug = new_slug
|
||||
counter = 1
|
||||
while new_slug in existing_venue_slugs:
|
||||
counter += 1
|
||||
new_slug = f"{original_slug}-{counter}"
|
||||
v.slug = new_slug
|
||||
existing_venue_slugs.add(new_slug)
|
||||
session.add(v)
|
||||
session.commit()
|
||||
|
||||
# 2. Fix Songs Slugs
|
||||
print("Fixing Song Slugs...")
|
||||
songs = session.exec(select(Song)).all()
|
||||
existing_song_slugs = {s.slug for s in songs if s.slug}
|
||||
for s in songs:
|
||||
if not s.slug:
|
||||
new_slug = generate_slug(s.title)
|
||||
original_slug = new_slug
|
||||
counter = 1
|
||||
while new_slug in existing_song_slugs:
|
||||
counter += 1
|
||||
new_slug = f"{original_slug}-{counter}"
|
||||
s.slug = new_slug
|
||||
existing_song_slugs.add(new_slug)
|
||||
session.add(s)
|
||||
session.commit()
|
||||
|
||||
# 3. Fix Tours Slugs
|
||||
print("Fixing Tour Slugs...")
|
||||
tours = session.exec(select(Tour)).all()
|
||||
existing_tour_slugs = {t.slug for t in tours if t.slug}
|
||||
for t in tours:
|
||||
if not t.slug:
|
||||
new_slug = generate_slug(t.name)
|
||||
original_slug = new_slug
|
||||
counter = 1
|
||||
while new_slug in existing_tour_slugs:
|
||||
counter += 1
|
||||
new_slug = f"{original_slug}-{counter}"
|
||||
t.slug = new_slug
|
||||
existing_tour_slugs.add(new_slug)
|
||||
session.add(t)
|
||||
session.commit()
|
||||
|
||||
# 4. Fix Shows Slugs
|
||||
print("Fixing Show Slugs...")
|
||||
shows = session.exec(select(Show)).all()
|
||||
existing_show_slugs = {s.slug for s in shows if s.slug}
|
||||
venue_map = {v.id: v for v in venues} # Cache venues for naming
|
||||
|
||||
for show in shows:
|
||||
if not show.slug:
|
||||
date_str = show.date.strftime("%Y-%m-%d") if show.date else "unknown"
|
||||
venue_name = "unknown"
|
||||
if show.venue_id and show.venue_id in venue_map:
|
||||
venue_name = venue_map[show.venue_id].name
|
||||
|
||||
new_slug = generate_show_slug(date_str, venue_name)
|
||||
# Ensure unique
|
||||
original_slug = new_slug
|
||||
counter = 1
|
||||
while new_slug in existing_show_slugs:
|
||||
counter += 1
|
||||
new_slug = f"{original_slug}-{counter}"
|
||||
|
||||
show.slug = new_slug
|
||||
existing_show_slugs.add(new_slug)
|
||||
session.add(show)
|
||||
session.commit()
|
||||
|
||||
# 4b. Fix Performance Slugs
|
||||
print("Fixing Performance Slugs...")
|
||||
from slugify import generate_performance_slug
|
||||
perfs = session.exec(select(Performance)).all()
|
||||
existing_perf_slugs = {p.slug for p in perfs if p.slug}
|
||||
|
||||
# We need song titles and show dates
|
||||
# Efficient way: build maps
|
||||
song_map = {s.id: s.title for s in songs}
|
||||
show_map = {s.id: s.date.strftime("%Y-%m-%d") for s in shows}
|
||||
|
||||
for p in perfs:
|
||||
if not p.slug:
|
||||
song_title = song_map.get(p.song_id, "unknown")
|
||||
show_date = show_map.get(p.show_id, "unknown")
|
||||
|
||||
new_slug = generate_performance_slug(song_title, show_date)
|
||||
|
||||
# Ensure unique (for reprises etc)
|
||||
original_slug = new_slug
|
||||
counter = 1
|
||||
while new_slug in existing_perf_slugs:
|
||||
counter += 1
|
||||
new_slug = f"{original_slug}-{counter}"
|
||||
|
||||
p.slug = new_slug
|
||||
existing_perf_slugs.add(new_slug)
|
||||
session.add(p)
|
||||
session.commit()
|
||||
|
||||
# 5. Fix Set Names (Fetch API)
|
||||
print("Fixing Set Names (fetching setlists)...")
|
||||
# We need to map El Goose show_id/song_id to our IDs to find the record.
|
||||
# But we don't store El Goose IDs in our models?
|
||||
# Checked models.py: we don't store ex_id.
|
||||
# We match by show date/venue and song title.
|
||||
|
||||
# This is hard to do reliably without external IDs.
|
||||
# Alternatively, we can infer set name from 'position'?
|
||||
# No, position 1 could be Set 1 or Encore if short show? No.
|
||||
|
||||
# Wait, import_elgoose mappings are local var.
|
||||
# If we re-run import logic but UPDATE instead of SKIP, we can fix it.
|
||||
# But matching is tricky.
|
||||
|
||||
# Let's try to match by Show Date and Song Title.
|
||||
# Build map: (show_id, song_id, position) -> Performance
|
||||
|
||||
# Refresh perfs from DB since we might have added slugs
|
||||
# perfs = session.exec(select(Performance)).all() # Already have them, but maybe stale?
|
||||
# Re-querying is safer but PERFS list object is updated by session.add? Yes.
|
||||
|
||||
perf_map = {} # (show_id, song_id, position) -> perf object
|
||||
for p in perfs:
|
||||
perf_map[(p.show_id, p.song_id, p.position)] = p
|
||||
|
||||
# We need show map: el_goose_show_id -> our_show_id
|
||||
# We need song map: el_goose_song_id -> our_song_id
|
||||
|
||||
# We have to re-fetch shows and songs to rebuild this map.
|
||||
print(" Re-building ID maps...")
|
||||
|
||||
# Map Shows
|
||||
el_shows = fetch_all_json("shows", {"artist": 1})
|
||||
if not el_shows: el_shows = fetch_all_json("shows") # fallback
|
||||
|
||||
el_show_map = {} # el_id -> our_id
|
||||
for s in el_shows:
|
||||
# Find our show
|
||||
dt = s['showdate'] # YYYY-MM-DD
|
||||
# We need to match precise Show.
|
||||
# Simplified: match by date.
|
||||
# Convert string to datetime
|
||||
from datetime import datetime
|
||||
s_date = datetime.strptime(dt, "%Y-%m-%d")
|
||||
|
||||
# Find show in our DB
|
||||
# We can optimise this but for now linear search or query is fine for one-off script
|
||||
found = session.exec(select(Show).where(Show.date == s_date)).first()
|
||||
if found:
|
||||
el_show_map[s['show_id']] = found.id
|
||||
|
||||
# Map Songs
|
||||
el_songs = fetch_all_json("songs")
|
||||
el_song_map = {} # el_id -> our_id
|
||||
for s in el_songs:
|
||||
found = session.exec(select(Song).where(Song.title == s['name'])).first()
|
||||
if found:
|
||||
el_song_map[s['id']] = found.id
|
||||
|
||||
# Now fetch setlists
|
||||
el_setlists = fetch_all_json("setlists")
|
||||
|
||||
count = 0
|
||||
for item in el_setlists:
|
||||
our_show_id = el_show_map.get(item['show_id'])
|
||||
our_song_id = el_song_map.get(item['song_id'])
|
||||
position = item.get('position', 0)
|
||||
|
||||
if our_show_id and our_song_id:
|
||||
# Find existing perf
|
||||
perf = perf_map.get((our_show_id, our_song_id, position))
|
||||
if perf:
|
||||
# Logic to fix set_name
|
||||
set_val = str(item.get('setnumber', '1'))
|
||||
set_name = f"Set {set_val}"
|
||||
if set_val.isdigit():
|
||||
set_name = f"Set {set_val}"
|
||||
elif set_val.lower() == 'e':
|
||||
set_name = "Encore"
|
||||
elif set_val.lower() == 'e2':
|
||||
set_name = "Encore 2"
|
||||
elif set_val.lower() == 's':
|
||||
set_name = "Soundcheck"
|
||||
|
||||
if perf.set_name != set_name:
|
||||
perf.set_name = set_name
|
||||
session.add(perf)
|
||||
count += 1
|
||||
else:
|
||||
# Debug only first few failures to avoid spam
|
||||
if count < 5:
|
||||
print(f"Match failed for el_show_id={item.get('show_id')} el_song_id={item.get('song_id')}")
|
||||
if not our_show_id: print(f" -> Show ID not found in map (Map size: {len(el_show_map)})")
|
||||
if not our_song_id: print(f" -> Song ID not found in map (Map size: {len(el_song_map)})")
|
||||
|
||||
session.commit()
|
||||
print(f"Fixed {count} performance set names.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fix_data()
|
||||
|
|
@ -13,3 +13,5 @@ requests
|
|||
beautifulsoup4
|
||||
boto3
|
||||
email-validator
|
||||
apscheduler
|
||||
python-slugify
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
from typing import List
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlmodel import Session, select
|
||||
from sqlalchemy import func
|
||||
from database import get_session
|
||||
from models import Show, Tag, EntityTag, Vertical, UserVerticalPreference
|
||||
from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead
|
||||
from schemas import ShowCreate, ShowRead, ShowUpdate, TagRead, PaginatedResponse, PaginationMeta
|
||||
from auth import get_current_user, get_current_user_optional
|
||||
|
||||
router = APIRouter(prefix="/shows", tags=["shows"])
|
||||
|
|
@ -33,7 +34,7 @@ def create_show(
|
|||
|
||||
return db_show
|
||||
|
||||
@router.get("/", response_model=List[ShowRead])
|
||||
@router.get("/", response_model=PaginatedResponse[ShowRead])
|
||||
def read_shows(
|
||||
offset: int = 0,
|
||||
limit: int = Query(default=2000, le=5000),
|
||||
|
|
@ -49,6 +50,8 @@ def read_shows(
|
|||
session: Session = Depends(get_session)
|
||||
):
|
||||
from sqlalchemy.orm import joinedload
|
||||
from datetime import datetime
|
||||
|
||||
query = select(Show).options(
|
||||
joinedload(Show.vertical),
|
||||
joinedload(Show.venue),
|
||||
|
|
@ -64,11 +67,12 @@ def read_shows(
|
|||
allowed_ids = [p.vertical_id for p in prefs]
|
||||
# If user selected tiers but has no bands in them, return empty
|
||||
if not allowed_ids:
|
||||
return []
|
||||
return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
|
||||
query = query.where(Show.vertical_id.in_(allowed_ids))
|
||||
elif tiers and not current_user:
|
||||
# Anonymous users can't filter by personal tiers
|
||||
return []
|
||||
return PaginatedResponse(data=[], meta=PaginationMeta(total=0, limit=limit, offset=offset))
|
||||
|
||||
if venue_id:
|
||||
query = query.where(Show.venue_id == venue_id)
|
||||
if tour_id:
|
||||
|
|
@ -87,20 +91,28 @@ def read_shows(
|
|||
query = query.where(Show.vertical_id == vertical_id)
|
||||
|
||||
if status:
|
||||
from datetime import datetime
|
||||
today = datetime.now()
|
||||
if status == "past":
|
||||
query = query.where(Show.date <= today)
|
||||
query = query.order_by(Show.date.desc())
|
||||
elif status == "upcoming":
|
||||
query = query.where(Show.date > today)
|
||||
|
||||
# Calculate total count before pagination
|
||||
total = session.exec(select(func.count()).select_from(query.subquery())).one()
|
||||
|
||||
# Apply sorting and pagination
|
||||
if status == "upcoming":
|
||||
query = query.order_by(Show.date.asc())
|
||||
else:
|
||||
# Default sort by date descending so we get recent shows first
|
||||
query = query.order_by(Show.date.desc())
|
||||
|
||||
shows = session.exec(query.offset(offset).limit(limit)).all()
|
||||
return shows
|
||||
|
||||
return PaginatedResponse(
|
||||
data=shows,
|
||||
meta=PaginationMeta(total=total, limit=limit, offset=offset)
|
||||
)
|
||||
|
||||
@router.get("/recent", response_model=List[ShowRead])
|
||||
def read_recent_shows(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional, List, Dict
|
||||
from typing import Optional, List, Dict, Generic, TypeVar
|
||||
from sqlmodel import SQLModel
|
||||
from datetime import datetime
|
||||
|
||||
|
|
@ -452,3 +452,17 @@ class PublicProfileRead(SQLModel):
|
|||
stats: Dict[str, int]
|
||||
|
||||
joined_at: datetime
|
||||
|
||||
|
||||
# --- Pagination ---
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
class PaginationMeta(SQLModel):
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
|
||||
class PaginatedResponse(SQLModel, Generic[T]):
|
||||
data: List[T]
|
||||
meta: PaginationMeta
|
||||
|
|
|
|||
23
backend/services/scheduler.py
Normal file
23
backend/services/scheduler.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
import import_elgoose
|
||||
from sqlmodel import Session
|
||||
from database import engine
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
|
||||
def daily_import_job():
|
||||
logger.info("Starting daily Goose data import...")
|
||||
try:
|
||||
with Session(engine) as session:
|
||||
stats = import_elgoose.run_import(session, with_users=False)
|
||||
logger.info(f"Daily import complete. Stats: {stats}")
|
||||
except Exception as e:
|
||||
logger.error(f"Daily import failed: {e}")
|
||||
|
||||
def start_scheduler():
|
||||
scheduler.add_job(daily_import_job, 'interval', hours=12, id='goose_import')
|
||||
scheduler.start()
|
||||
logger.info("Scheduler started with daily import job.")
|
||||
Loading…
Add table
Reference in a new issue