Fix import scripts: proper Goose filtering, loop detection, set name updates
This commit is contained in:
parent
29e3e07141
commit
8a46000b9d
4 changed files with 176 additions and 59 deletions
|
|
@ -10,6 +10,15 @@ When deploying changes to elmeg, **ONLY rebuild the backend and frontend contain
|
|||
|
||||
## Safe deployment command
|
||||
|
||||
### Production (`elmeg.xyz`) - tangible-aacorn
|
||||
|
||||
```bash
|
||||
# turbo
|
||||
ssh tangible-aacorn "cd /srv/containers/elmeg-demo && git pull && docker compose up -d --build --no-deps backend frontend"
|
||||
```
|
||||
|
||||
### Staging (`elmeg.runfoo.run`) - nexus-vector
|
||||
|
||||
```bash
|
||||
# turbo
|
||||
ssh nexus-vector "cd /srv/containers/elmeg-demo && git pull && docker compose up -d --build --no-deps backend frontend"
|
||||
|
|
@ -36,3 +45,29 @@ ssh nexus-vector "docker exec elmeg-demo-db-1 pg_dump -U elmeg elmeg > /srv/cont
|
|||
```bash
|
||||
ssh nexus-vector "cat /srv/containers/elmeg-demo/backup-YYYYMMDD-HHMMSS.sql | docker exec -i elmeg-demo-db-1 psql -U elmeg elmeg"
|
||||
```
|
||||
|
||||
## Data Import (Recovery)
|
||||
|
||||
If the database is wiped or fresh, use the Smart Import script to populate shows and setlists. This script is memory-optimized and checks for infinite loops.
|
||||
|
||||
### Production (tangible-aacorn)
|
||||
|
||||
```bash
|
||||
ssh tangible-aacorn "docker exec elmeg-backend-1 python import_setlists_smart.py"
|
||||
```
|
||||
|
||||
### Staging (nexus-vector)
|
||||
|
||||
```bash
|
||||
ssh nexus-vector "docker exec elmeg-demo-backend-1 python import_setlists_smart.py"
|
||||
```
|
||||
|
||||
## Git Configuration (Production)
|
||||
|
||||
To ensure `git pull` works correctly on production:
|
||||
|
||||
```bash
|
||||
# On nexus-vector
|
||||
cd /srv/containers/elmeg-demo
|
||||
git branch --set-upstream-to=origin/main main
|
||||
```
|
||||
|
|
|
|||
|
|
@ -37,13 +37,12 @@ def main():
|
|||
print(f"Mapped {len(song_map)} songs")
|
||||
|
||||
# Get existing performances
|
||||
existing = set()
|
||||
perfs = session.exec(
|
||||
select(Performance.show_id, Performance.song_id, Performance.position)
|
||||
).all()
|
||||
print("Loading existing performances...")
|
||||
existing_map = {} # (show_id, song_id, position) -> Performance Object
|
||||
perfs = session.exec(select(Performance)).all()
|
||||
for p in perfs:
|
||||
existing.add((p[0], p[1], p[2]))
|
||||
print(f"Found {len(existing)} existing performances")
|
||||
existing_map[(p.show_id, p.song_id, p.position)] = p
|
||||
print(f"Found {len(existing_map)} existing performances")
|
||||
|
||||
# We need API show IDs. The ElGoose API shows endpoint returns show_id.
|
||||
# Let's fetch and correlate by date
|
||||
|
|
@ -51,26 +50,39 @@ def main():
|
|||
api_shows = {} # date_str -> api_show_id
|
||||
|
||||
page = 1
|
||||
seen_ids = set()
|
||||
while True:
|
||||
url = f"{BASE_URL}/shows.json"
|
||||
try:
|
||||
resp = requests.get(url, params={"artist": 1, "page": page}, timeout=30)
|
||||
resp = requests.get(url, params={"page": page}, timeout=30)
|
||||
data = resp.json().get('data', [])
|
||||
if not data:
|
||||
break
|
||||
|
||||
# Loop detection
|
||||
first_id = data[0].get('show_id') if data else None
|
||||
if first_id in seen_ids:
|
||||
print(f" Loop detected at page {page}")
|
||||
break
|
||||
if first_id:
|
||||
seen_ids.add(first_id)
|
||||
|
||||
for s in data:
|
||||
# CRITICAL: Only include Goose shows
|
||||
if s.get('artist') != 'Goose':
|
||||
continue
|
||||
date_str = s['showdate']
|
||||
api_shows[date_str] = s['show_id']
|
||||
page += 1
|
||||
if page > 50:
|
||||
break
|
||||
except:
|
||||
except Exception as e:
|
||||
print(f" Error on page {page}: {e}")
|
||||
break
|
||||
|
||||
print(f"Got {len(api_shows)} API show IDs")
|
||||
|
||||
# Now import setlists for each show
|
||||
total_added = 0
|
||||
total_updated = 0
|
||||
processed = 0
|
||||
|
||||
for show in shows:
|
||||
|
|
@ -80,13 +92,8 @@ def main():
|
|||
if not api_show_id:
|
||||
continue
|
||||
|
||||
# Check if we already have performances for this show
|
||||
existing_for_show = session.exec(
|
||||
select(Performance).where(Performance.show_id == show.id)
|
||||
).first()
|
||||
|
||||
if existing_for_show:
|
||||
continue # Skip shows that already have performances
|
||||
# REMOVED: Skipping logic. We verify everything.
|
||||
# existing_for_show = ...
|
||||
|
||||
# Fetch setlist
|
||||
setlist = fetch_show_setlist(api_show_id)
|
||||
|
|
@ -94,6 +101,8 @@ def main():
|
|||
continue
|
||||
|
||||
added = 0
|
||||
updated = 0
|
||||
|
||||
for item in setlist:
|
||||
song_title = item.get('songname', '').lower()
|
||||
song_id = song_map.get(song_title)
|
||||
|
|
@ -104,28 +113,49 @@ def main():
|
|||
position = item.get('position', 0)
|
||||
key = (show.id, song_id, position)
|
||||
|
||||
if key in existing:
|
||||
# Resolve set name
|
||||
set_val = str(item.get('setnumber', '1'))
|
||||
if set_val.isdigit():
|
||||
set_name = f"Set {set_val}"
|
||||
elif set_val.lower() == 'e':
|
||||
set_name = "Encore"
|
||||
elif set_val.lower() == 'e2':
|
||||
set_name = "Encore 2"
|
||||
elif set_val.lower() == 's':
|
||||
set_name = "Soundcheck"
|
||||
else:
|
||||
set_name = f"Set {set_val}"
|
||||
|
||||
if key in existing_map:
|
||||
# Update Check
|
||||
perf = existing_map[key]
|
||||
if not perf.set_name or perf.set_name != set_name:
|
||||
perf.set_name = set_name
|
||||
session.add(perf)
|
||||
updated += 1
|
||||
total_updated += 1
|
||||
continue
|
||||
|
||||
# Create New
|
||||
perf = Performance(
|
||||
show_id=show.id,
|
||||
song_id=song_id,
|
||||
position=position,
|
||||
set_name=item.get('set'),
|
||||
set_name=set_name,
|
||||
segue=bool(item.get('segue', 0)),
|
||||
notes=item.get('footnote')
|
||||
)
|
||||
session.add(perf)
|
||||
existing.add(key)
|
||||
existing_map[key] = perf # Add to map to prevent dupes in same run
|
||||
added += 1
|
||||
total_added += 1
|
||||
|
||||
if added > 0:
|
||||
if added > 0 or updated > 0:
|
||||
session.commit()
|
||||
processed += 1
|
||||
print(f"Show {date_str}: +{added} songs ({total_added} total)")
|
||||
|
||||
print(f"\\n✓ Added {total_added} performances from {processed} shows")
|
||||
print(f"Show {date_str}: +{added} new, ~{updated} updated")
|
||||
|
||||
print(f"\nImport Complete! Added: {total_added}, Updated: {total_updated}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ def main():
|
|||
with Session(engine) as session:
|
||||
# 1. Build DB Map: Date string -> DB Show ID
|
||||
print("\n1. Building DB Map (Date -> Show ID)...")
|
||||
shows = session.exec(select(Show.id, Show.date)).all() # Only fetch needed fields
|
||||
shows = session.exec(select(Show.id, Show.date)).all()
|
||||
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
|
||||
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
|
||||
|
||||
|
|
@ -50,40 +50,34 @@ def main():
|
|||
gc.collect()
|
||||
|
||||
# 2. Build API Map: ElGoose ID -> DB ID
|
||||
# Process iteratively to save memory
|
||||
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
|
||||
elgoose_id_to_db_id = {}
|
||||
matched_count = 0
|
||||
|
||||
page = 1
|
||||
seen_ids_in_run = set()
|
||||
seen_show_ids = set()
|
||||
|
||||
while True:
|
||||
# Fetch batch of shows
|
||||
print(f" Fetching shows page {page}...", end="\r", flush=True)
|
||||
data = fetch_json("shows", {"page": page}) # Fetch all shows (artist filter can be flaky)
|
||||
data = fetch_json("shows", {"page": page})
|
||||
if not data:
|
||||
break
|
||||
|
||||
# Check for API loop (if Page X returns same content as Page 1)
|
||||
first_id_in_batch = data[0].get('show_id') if data else None
|
||||
if first_id_in_batch and first_id_in_batch in seen_ids_in_run:
|
||||
print(f"\n Loop detected at page {page} (ID {first_id_in_batch} seen before). Breaking.")
|
||||
# Loop Detection (Shows)
|
||||
first_id = data[0].get('show_id') if data else None
|
||||
if first_id and first_id in seen_show_ids:
|
||||
print(f"\n Loop detected in Shows at page {page} (ID {first_id}). Breaking.")
|
||||
break
|
||||
if first_id:
|
||||
seen_show_ids.add(first_id)
|
||||
|
||||
for s in data:
|
||||
# We only need Goose shows (artist_id=3 usually, but we check date match)
|
||||
s_date = s.get('showdate')
|
||||
s_id = s.get('show_id')
|
||||
|
||||
if s_id:
|
||||
seen_ids_in_run.add(s_id)
|
||||
|
||||
if s_date and s_id:
|
||||
db_id = date_to_db_id.get(s_date)
|
||||
if db_id:
|
||||
elgoose_id_to_db_id[s_id] = db_id
|
||||
matched_count += 1
|
||||
|
||||
page += 1
|
||||
if page % 10 == 0:
|
||||
|
|
@ -93,7 +87,7 @@ def main():
|
|||
del date_to_db_id
|
||||
gc.collect()
|
||||
|
||||
# 3. Cache Songs
|
||||
# 3. Caching Songs
|
||||
print("\n3. Caching Songs...")
|
||||
songs = session.exec(select(Song.id, Song.title)).all()
|
||||
song_map = {s.title.lower().strip(): s.id for s in songs}
|
||||
|
|
@ -101,32 +95,35 @@ def main():
|
|||
gc.collect()
|
||||
print(f" Cached {len(song_map)} songs")
|
||||
|
||||
# 4. Process Setlists
|
||||
# 4. Importing Setlists
|
||||
print("\n4. Importing Setlists...")
|
||||
page = 1
|
||||
total_added = 0
|
||||
seen_batch_signatures = set()
|
||||
|
||||
# Cache existing performance keys (show_id, song_id, position)
|
||||
print(" Caching existing performance keys...")
|
||||
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
||||
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
||||
print(f" Cached {len(existing_keys)} existing performances")
|
||||
del perfs
|
||||
gc.collect()
|
||||
|
||||
while True:
|
||||
data = fetch_json("setlists", {"page": page})
|
||||
if not data:
|
||||
break
|
||||
|
||||
# Prefetch checks for this batch to avoid N+1 SELECTs?
|
||||
# Actually with 3600 perfs, one-by-one check is slow.
|
||||
# But "existing check" is needed.
|
||||
# We can cache *existing performances* for the CURRENT batch's shows?
|
||||
# Or just cache ALL existing performance keys (show_id, song_id, position)?
|
||||
# Performance table might be large (40k rows?).
|
||||
# (show_id, song_id, position) tuples set is ~2MB RAM. Safe.
|
||||
# Loop Detection (Setlists)
|
||||
# Use signature of first item: (uniqueid or show_id+position)
|
||||
if data:
|
||||
first = data[0]
|
||||
signature = f"{first.get('uniqueid')}-{first.get('show_id')}-{first.get('position')}"
|
||||
if signature in seen_batch_signatures:
|
||||
print(f"\n Loop detected in Setlists at page {page} (Sig {signature}). Breaking.")
|
||||
break
|
||||
seen_batch_signatures.add(signature)
|
||||
|
||||
if page == 1:
|
||||
print(" Caching existing performance keys...")
|
||||
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
||||
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
||||
print(f" Cached {len(existing_keys)} performance keys")
|
||||
del perfs
|
||||
gc.collect()
|
||||
|
||||
batch_added = 0
|
||||
new_objects = []
|
||||
|
||||
|
|
@ -143,11 +140,9 @@ def main():
|
|||
|
||||
position = perf.get('position', 0)
|
||||
|
||||
# Check uniqueness
|
||||
if (db_show_id, song_id, position) in existing_keys:
|
||||
continue
|
||||
|
||||
# Create
|
||||
set_val = str(perf.get('setnumber', '1'))
|
||||
if set_val.isdigit():
|
||||
set_name = f"Set {set_val}"
|
||||
|
|
@ -171,7 +166,7 @@ def main():
|
|||
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
|
||||
)
|
||||
new_objects.append(new_perf)
|
||||
existing_keys.add((db_show_id, song_id, position)) # Add to cache
|
||||
existing_keys.add((db_show_id, song_id, position))
|
||||
batch_added += 1
|
||||
total_added += 1
|
||||
|
||||
|
|
|
|||
57
backend/repro_review_crash.py
Normal file
57
backend/repro_review_crash.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
import pytest
|
||||
from sqlmodel import Session, SQLModel, create_engine
|
||||
from models import User, Review, Show, Rating
|
||||
from schemas import ReviewCreate
|
||||
from services.gamification import award_xp
|
||||
from routers.reviews import create_review
|
||||
from fastapi import HTTPException
|
||||
|
||||
# Mock auth
|
||||
def mock_get_current_user():
|
||||
return User(id=1, email="test@test.com", hashed_password="pw", is_active=True)
|
||||
|
||||
# Setup in-memory DB
|
||||
sqlite_file_name = "test_review_debug.db"
|
||||
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
||||
engine = create_engine(sqlite_url)
|
||||
|
||||
def test_repro_review_crash():
|
||||
SQLModel.metadata.create_all(engine)
|
||||
|
||||
with Session(engine) as session:
|
||||
# Create dummy user and show
|
||||
user = User(email="test@test.com", hashed_password="pw")
|
||||
session.add(user)
|
||||
|
||||
show = Show(date="2025-01-01", slug="test-show")
|
||||
session.add(show)
|
||||
session.commit()
|
||||
session.refresh(user)
|
||||
session.refresh(show)
|
||||
|
||||
print(f"User ID: {user.id}, Show ID: {show.id}")
|
||||
|
||||
# Payload
|
||||
review_payload = ReviewCreate(
|
||||
show_id=show.id,
|
||||
content="Test Review Content",
|
||||
blurb="Test Blurb",
|
||||
score=5.0
|
||||
)
|
||||
|
||||
try:
|
||||
print("Attempting to create review...")
|
||||
result = create_review(
|
||||
review=review_payload,
|
||||
session=session,
|
||||
current_user=user
|
||||
)
|
||||
print("Review created successfully:", result)
|
||||
except Exception as e:
|
||||
print(f"\nCRASH DETECTED: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_repro_review_crash()
|
||||
Loading…
Add table
Reference in a new issue