Fix import scripts: proper Goose filtering, loop detection, set name updates
This commit is contained in:
parent
29e3e07141
commit
8a46000b9d
4 changed files with 176 additions and 59 deletions
|
|
@ -10,6 +10,15 @@ When deploying changes to elmeg, **ONLY rebuild the backend and frontend contain
|
||||||
|
|
||||||
## Safe deployment command
|
## Safe deployment command
|
||||||
|
|
||||||
|
### Production (`elmeg.xyz`) - tangible-aacorn
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# turbo
|
||||||
|
ssh tangible-aacorn "cd /srv/containers/elmeg-demo && git pull && docker compose up -d --build --no-deps backend frontend"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Staging (`elmeg.runfoo.run`) - nexus-vector
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# turbo
|
# turbo
|
||||||
ssh nexus-vector "cd /srv/containers/elmeg-demo && git pull && docker compose up -d --build --no-deps backend frontend"
|
ssh nexus-vector "cd /srv/containers/elmeg-demo && git pull && docker compose up -d --build --no-deps backend frontend"
|
||||||
|
|
@ -36,3 +45,29 @@ ssh nexus-vector "docker exec elmeg-demo-db-1 pg_dump -U elmeg elmeg > /srv/cont
|
||||||
```bash
|
```bash
|
||||||
ssh nexus-vector "cat /srv/containers/elmeg-demo/backup-YYYYMMDD-HHMMSS.sql | docker exec -i elmeg-demo-db-1 psql -U elmeg elmeg"
|
ssh nexus-vector "cat /srv/containers/elmeg-demo/backup-YYYYMMDD-HHMMSS.sql | docker exec -i elmeg-demo-db-1 psql -U elmeg elmeg"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Data Import (Recovery)
|
||||||
|
|
||||||
|
If the database is wiped or fresh, use the Smart Import script to populate shows and setlists. This script is memory-optimized and checks for infinite loops.
|
||||||
|
|
||||||
|
### Production (tangible-aacorn)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh tangible-aacorn "docker exec elmeg-backend-1 python import_setlists_smart.py"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Staging (nexus-vector)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh nexus-vector "docker exec elmeg-demo-backend-1 python import_setlists_smart.py"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Git Configuration (Production)
|
||||||
|
|
||||||
|
To ensure `git pull` works correctly on production:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On nexus-vector
|
||||||
|
cd /srv/containers/elmeg-demo
|
||||||
|
git branch --set-upstream-to=origin/main main
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -37,13 +37,12 @@ def main():
|
||||||
print(f"Mapped {len(song_map)} songs")
|
print(f"Mapped {len(song_map)} songs")
|
||||||
|
|
||||||
# Get existing performances
|
# Get existing performances
|
||||||
existing = set()
|
print("Loading existing performances...")
|
||||||
perfs = session.exec(
|
existing_map = {} # (show_id, song_id, position) -> Performance Object
|
||||||
select(Performance.show_id, Performance.song_id, Performance.position)
|
perfs = session.exec(select(Performance)).all()
|
||||||
).all()
|
|
||||||
for p in perfs:
|
for p in perfs:
|
||||||
existing.add((p[0], p[1], p[2]))
|
existing_map[(p.show_id, p.song_id, p.position)] = p
|
||||||
print(f"Found {len(existing)} existing performances")
|
print(f"Found {len(existing_map)} existing performances")
|
||||||
|
|
||||||
# We need API show IDs. The ElGoose API shows endpoint returns show_id.
|
# We need API show IDs. The ElGoose API shows endpoint returns show_id.
|
||||||
# Let's fetch and correlate by date
|
# Let's fetch and correlate by date
|
||||||
|
|
@ -51,26 +50,39 @@ def main():
|
||||||
api_shows = {} # date_str -> api_show_id
|
api_shows = {} # date_str -> api_show_id
|
||||||
|
|
||||||
page = 1
|
page = 1
|
||||||
|
seen_ids = set()
|
||||||
while True:
|
while True:
|
||||||
url = f"{BASE_URL}/shows.json"
|
url = f"{BASE_URL}/shows.json"
|
||||||
try:
|
try:
|
||||||
resp = requests.get(url, params={"artist": 1, "page": page}, timeout=30)
|
resp = requests.get(url, params={"page": page}, timeout=30)
|
||||||
data = resp.json().get('data', [])
|
data = resp.json().get('data', [])
|
||||||
if not data:
|
if not data:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Loop detection
|
||||||
|
first_id = data[0].get('show_id') if data else None
|
||||||
|
if first_id in seen_ids:
|
||||||
|
print(f" Loop detected at page {page}")
|
||||||
|
break
|
||||||
|
if first_id:
|
||||||
|
seen_ids.add(first_id)
|
||||||
|
|
||||||
for s in data:
|
for s in data:
|
||||||
|
# CRITICAL: Only include Goose shows
|
||||||
|
if s.get('artist') != 'Goose':
|
||||||
|
continue
|
||||||
date_str = s['showdate']
|
date_str = s['showdate']
|
||||||
api_shows[date_str] = s['show_id']
|
api_shows[date_str] = s['show_id']
|
||||||
page += 1
|
page += 1
|
||||||
if page > 50:
|
except Exception as e:
|
||||||
break
|
print(f" Error on page {page}: {e}")
|
||||||
except:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
print(f"Got {len(api_shows)} API show IDs")
|
print(f"Got {len(api_shows)} API show IDs")
|
||||||
|
|
||||||
# Now import setlists for each show
|
# Now import setlists for each show
|
||||||
total_added = 0
|
total_added = 0
|
||||||
|
total_updated = 0
|
||||||
processed = 0
|
processed = 0
|
||||||
|
|
||||||
for show in shows:
|
for show in shows:
|
||||||
|
|
@ -80,13 +92,8 @@ def main():
|
||||||
if not api_show_id:
|
if not api_show_id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check if we already have performances for this show
|
# REMOVED: Skipping logic. We verify everything.
|
||||||
existing_for_show = session.exec(
|
# existing_for_show = ...
|
||||||
select(Performance).where(Performance.show_id == show.id)
|
|
||||||
).first()
|
|
||||||
|
|
||||||
if existing_for_show:
|
|
||||||
continue # Skip shows that already have performances
|
|
||||||
|
|
||||||
# Fetch setlist
|
# Fetch setlist
|
||||||
setlist = fetch_show_setlist(api_show_id)
|
setlist = fetch_show_setlist(api_show_id)
|
||||||
|
|
@ -94,6 +101,8 @@ def main():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
added = 0
|
added = 0
|
||||||
|
updated = 0
|
||||||
|
|
||||||
for item in setlist:
|
for item in setlist:
|
||||||
song_title = item.get('songname', '').lower()
|
song_title = item.get('songname', '').lower()
|
||||||
song_id = song_map.get(song_title)
|
song_id = song_map.get(song_title)
|
||||||
|
|
@ -104,28 +113,49 @@ def main():
|
||||||
position = item.get('position', 0)
|
position = item.get('position', 0)
|
||||||
key = (show.id, song_id, position)
|
key = (show.id, song_id, position)
|
||||||
|
|
||||||
if key in existing:
|
# Resolve set name
|
||||||
|
set_val = str(item.get('setnumber', '1'))
|
||||||
|
if set_val.isdigit():
|
||||||
|
set_name = f"Set {set_val}"
|
||||||
|
elif set_val.lower() == 'e':
|
||||||
|
set_name = "Encore"
|
||||||
|
elif set_val.lower() == 'e2':
|
||||||
|
set_name = "Encore 2"
|
||||||
|
elif set_val.lower() == 's':
|
||||||
|
set_name = "Soundcheck"
|
||||||
|
else:
|
||||||
|
set_name = f"Set {set_val}"
|
||||||
|
|
||||||
|
if key in existing_map:
|
||||||
|
# Update Check
|
||||||
|
perf = existing_map[key]
|
||||||
|
if not perf.set_name or perf.set_name != set_name:
|
||||||
|
perf.set_name = set_name
|
||||||
|
session.add(perf)
|
||||||
|
updated += 1
|
||||||
|
total_updated += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Create New
|
||||||
perf = Performance(
|
perf = Performance(
|
||||||
show_id=show.id,
|
show_id=show.id,
|
||||||
song_id=song_id,
|
song_id=song_id,
|
||||||
position=position,
|
position=position,
|
||||||
set_name=item.get('set'),
|
set_name=set_name,
|
||||||
segue=bool(item.get('segue', 0)),
|
segue=bool(item.get('segue', 0)),
|
||||||
notes=item.get('footnote')
|
notes=item.get('footnote')
|
||||||
)
|
)
|
||||||
session.add(perf)
|
session.add(perf)
|
||||||
existing.add(key)
|
existing_map[key] = perf # Add to map to prevent dupes in same run
|
||||||
added += 1
|
added += 1
|
||||||
total_added += 1
|
total_added += 1
|
||||||
|
|
||||||
if added > 0:
|
if added > 0 or updated > 0:
|
||||||
session.commit()
|
session.commit()
|
||||||
processed += 1
|
processed += 1
|
||||||
print(f"Show {date_str}: +{added} songs ({total_added} total)")
|
print(f"Show {date_str}: +{added} new, ~{updated} updated")
|
||||||
|
|
||||||
print(f"\\n✓ Added {total_added} performances from {processed} shows")
|
print(f"\nImport Complete! Added: {total_added}, Updated: {total_updated}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ def main():
|
||||||
with Session(engine) as session:
|
with Session(engine) as session:
|
||||||
# 1. Build DB Map: Date string -> DB Show ID
|
# 1. Build DB Map: Date string -> DB Show ID
|
||||||
print("\n1. Building DB Map (Date -> Show ID)...")
|
print("\n1. Building DB Map (Date -> Show ID)...")
|
||||||
shows = session.exec(select(Show.id, Show.date)).all() # Only fetch needed fields
|
shows = session.exec(select(Show.id, Show.date)).all()
|
||||||
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
|
date_to_db_id = {s.date.strftime('%Y-%m-%d'): s.id for s in shows}
|
||||||
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
|
print(f" Mapped {len(date_to_db_id)} existing shows in DB")
|
||||||
|
|
||||||
|
|
@ -50,40 +50,34 @@ def main():
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
# 2. Build API Map: ElGoose ID -> DB ID
|
# 2. Build API Map: ElGoose ID -> DB ID
|
||||||
# Process iteratively to save memory
|
|
||||||
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
|
print("\n2. Building ElGoose ID -> DB ID map (Streaming)...")
|
||||||
elgoose_id_to_db_id = {}
|
elgoose_id_to_db_id = {}
|
||||||
matched_count = 0
|
|
||||||
|
|
||||||
page = 1
|
page = 1
|
||||||
seen_ids_in_run = set()
|
seen_show_ids = set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Fetch batch of shows
|
|
||||||
print(f" Fetching shows page {page}...", end="\r", flush=True)
|
print(f" Fetching shows page {page}...", end="\r", flush=True)
|
||||||
data = fetch_json("shows", {"page": page}) # Fetch all shows (artist filter can be flaky)
|
data = fetch_json("shows", {"page": page})
|
||||||
if not data:
|
if not data:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check for API loop (if Page X returns same content as Page 1)
|
# Loop Detection (Shows)
|
||||||
first_id_in_batch = data[0].get('show_id') if data else None
|
first_id = data[0].get('show_id') if data else None
|
||||||
if first_id_in_batch and first_id_in_batch in seen_ids_in_run:
|
if first_id and first_id in seen_show_ids:
|
||||||
print(f"\n Loop detected at page {page} (ID {first_id_in_batch} seen before). Breaking.")
|
print(f"\n Loop detected in Shows at page {page} (ID {first_id}). Breaking.")
|
||||||
break
|
break
|
||||||
|
if first_id:
|
||||||
|
seen_show_ids.add(first_id)
|
||||||
|
|
||||||
for s in data:
|
for s in data:
|
||||||
# We only need Goose shows (artist_id=3 usually, but we check date match)
|
|
||||||
s_date = s.get('showdate')
|
s_date = s.get('showdate')
|
||||||
s_id = s.get('show_id')
|
s_id = s.get('show_id')
|
||||||
|
|
||||||
if s_id:
|
|
||||||
seen_ids_in_run.add(s_id)
|
|
||||||
|
|
||||||
if s_date and s_id:
|
if s_date and s_id:
|
||||||
db_id = date_to_db_id.get(s_date)
|
db_id = date_to_db_id.get(s_date)
|
||||||
if db_id:
|
if db_id:
|
||||||
elgoose_id_to_db_id[s_id] = db_id
|
elgoose_id_to_db_id[s_id] = db_id
|
||||||
matched_count += 1
|
|
||||||
|
|
||||||
page += 1
|
page += 1
|
||||||
if page % 10 == 0:
|
if page % 10 == 0:
|
||||||
|
|
@ -93,7 +87,7 @@ def main():
|
||||||
del date_to_db_id
|
del date_to_db_id
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
# 3. Cache Songs
|
# 3. Caching Songs
|
||||||
print("\n3. Caching Songs...")
|
print("\n3. Caching Songs...")
|
||||||
songs = session.exec(select(Song.id, Song.title)).all()
|
songs = session.exec(select(Song.id, Song.title)).all()
|
||||||
song_map = {s.title.lower().strip(): s.id for s in songs}
|
song_map = {s.title.lower().strip(): s.id for s in songs}
|
||||||
|
|
@ -101,31 +95,34 @@ def main():
|
||||||
gc.collect()
|
gc.collect()
|
||||||
print(f" Cached {len(song_map)} songs")
|
print(f" Cached {len(song_map)} songs")
|
||||||
|
|
||||||
# 4. Process Setlists
|
# 4. Importing Setlists
|
||||||
print("\n4. Importing Setlists...")
|
print("\n4. Importing Setlists...")
|
||||||
page = 1
|
page = 1
|
||||||
total_added = 0
|
total_added = 0
|
||||||
|
seen_batch_signatures = set()
|
||||||
|
|
||||||
|
# Cache existing performance keys (show_id, song_id, position)
|
||||||
|
print(" Caching existing performance keys...")
|
||||||
|
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
||||||
|
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
||||||
|
print(f" Cached {len(existing_keys)} existing performances")
|
||||||
|
del perfs
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = fetch_json("setlists", {"page": page})
|
data = fetch_json("setlists", {"page": page})
|
||||||
if not data:
|
if not data:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Prefetch checks for this batch to avoid N+1 SELECTs?
|
# Loop Detection (Setlists)
|
||||||
# Actually with 3600 perfs, one-by-one check is slow.
|
# Use signature of first item: (uniqueid or show_id+position)
|
||||||
# But "existing check" is needed.
|
if data:
|
||||||
# We can cache *existing performances* for the CURRENT batch's shows?
|
first = data[0]
|
||||||
# Or just cache ALL existing performance keys (show_id, song_id, position)?
|
signature = f"{first.get('uniqueid')}-{first.get('show_id')}-{first.get('position')}"
|
||||||
# Performance table might be large (40k rows?).
|
if signature in seen_batch_signatures:
|
||||||
# (show_id, song_id, position) tuples set is ~2MB RAM. Safe.
|
print(f"\n Loop detected in Setlists at page {page} (Sig {signature}). Breaking.")
|
||||||
|
break
|
||||||
if page == 1:
|
seen_batch_signatures.add(signature)
|
||||||
print(" Caching existing performance keys...")
|
|
||||||
perfs = session.exec(select(Performance.show_id, Performance.song_id, Performance.position)).all()
|
|
||||||
existing_keys = set((p.show_id, p.song_id, p.position) for p in perfs)
|
|
||||||
print(f" Cached {len(existing_keys)} performance keys")
|
|
||||||
del perfs
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
batch_added = 0
|
batch_added = 0
|
||||||
new_objects = []
|
new_objects = []
|
||||||
|
|
@ -143,11 +140,9 @@ def main():
|
||||||
|
|
||||||
position = perf.get('position', 0)
|
position = perf.get('position', 0)
|
||||||
|
|
||||||
# Check uniqueness
|
|
||||||
if (db_show_id, song_id, position) in existing_keys:
|
if (db_show_id, song_id, position) in existing_keys:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Create
|
|
||||||
set_val = str(perf.get('setnumber', '1'))
|
set_val = str(perf.get('setnumber', '1'))
|
||||||
if set_val.isdigit():
|
if set_val.isdigit():
|
||||||
set_name = f"Set {set_val}"
|
set_name = f"Set {set_val}"
|
||||||
|
|
@ -171,7 +166,7 @@ def main():
|
||||||
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
|
slug=f"{generate_slug(song_name)}-{db_show_id}-{position}"
|
||||||
)
|
)
|
||||||
new_objects.append(new_perf)
|
new_objects.append(new_perf)
|
||||||
existing_keys.add((db_show_id, song_id, position)) # Add to cache
|
existing_keys.add((db_show_id, song_id, position))
|
||||||
batch_added += 1
|
batch_added += 1
|
||||||
total_added += 1
|
total_added += 1
|
||||||
|
|
||||||
|
|
|
||||||
57
backend/repro_review_crash.py
Normal file
57
backend/repro_review_crash.py
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlmodel import Session, SQLModel, create_engine
|
||||||
|
from models import User, Review, Show, Rating
|
||||||
|
from schemas import ReviewCreate
|
||||||
|
from services.gamification import award_xp
|
||||||
|
from routers.reviews import create_review
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
# Mock auth
|
||||||
|
def mock_get_current_user():
|
||||||
|
return User(id=1, email="test@test.com", hashed_password="pw", is_active=True)
|
||||||
|
|
||||||
|
# Setup in-memory DB
|
||||||
|
sqlite_file_name = "test_review_debug.db"
|
||||||
|
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
||||||
|
engine = create_engine(sqlite_url)
|
||||||
|
|
||||||
|
def test_repro_review_crash():
|
||||||
|
SQLModel.metadata.create_all(engine)
|
||||||
|
|
||||||
|
with Session(engine) as session:
|
||||||
|
# Create dummy user and show
|
||||||
|
user = User(email="test@test.com", hashed_password="pw")
|
||||||
|
session.add(user)
|
||||||
|
|
||||||
|
show = Show(date="2025-01-01", slug="test-show")
|
||||||
|
session.add(show)
|
||||||
|
session.commit()
|
||||||
|
session.refresh(user)
|
||||||
|
session.refresh(show)
|
||||||
|
|
||||||
|
print(f"User ID: {user.id}, Show ID: {show.id}")
|
||||||
|
|
||||||
|
# Payload
|
||||||
|
review_payload = ReviewCreate(
|
||||||
|
show_id=show.id,
|
||||||
|
content="Test Review Content",
|
||||||
|
blurb="Test Blurb",
|
||||||
|
score=5.0
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Attempting to create review...")
|
||||||
|
result = create_review(
|
||||||
|
review=review_payload,
|
||||||
|
session=session,
|
||||||
|
current_user=user
|
||||||
|
)
|
||||||
|
print("Review created successfully:", result)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nCRASH DETECTED: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_repro_review_crash()
|
||||||
Loading…
Add table
Reference in a new issue