elmeg-demo/backend/populate_links.py


import requests
from bs4 import BeautifulSoup
from sqlmodel import Session, select
from database import engine
from models import Show
import time
import re

# El Goose API
API_BASE = "https://elgoose.net/api/v2"
SITE_BASE = "https://elgoose.net"

def get_shows_from_api():
    """Fetch all shows with their permalinks from the API"""
    print("Fetching all shows from API...")
    url = f"{API_BASE}/shows.json"
    params = {"artist": 1, "order_by": "showdate", "direction": "DESC"}

    all_shows = []

    # It seems the API might return ALL shows on page 1 if no limit is set.
    # We will try fetching page 1.
    try:
        print(f"  Fetching shows...", end="", flush=True)
        resp = requests.get(url, params=params)
        resp.raise_for_status()
        data = resp.json()

        if data and 'data' in data:
            all_shows = data['data']
            print(f" Got {len(all_shows)} shows.")
            return all_shows

    except Exception as e:
        print(f" Error: {e}")

    return all_shows

def scrape_links(session_http, permalink):
    """Scrape Bandcamp and Nugs links from an El Goose show page"""
    if not permalink:
        return None, None

    url = f"{SITE_BASE}/setlists/{permalink}"

    try:
        resp = session_http.get(url, timeout=10)
        if resp.status_code == 404:
             url = f"{SITE_BASE}/{permalink}"
             resp = session_http.get(url, timeout=10)

        if resp.status_code != 200:
            return None, None

        soup = BeautifulSoup(resp.text, 'html.parser')

        bandcamp = None
        nugs = None

        for a in soup.find_all('a', href=True):
            href = a['href']

            # Bandcamp
            if 'bandcamp.com' in href:
                if 'goosetheband.bandcamp.com' in href or 'bandcamp.com/album' in href:
                    bandcamp = href

            # Nugs
            if 'nugs.net' in href:
                if '/goose-' in href or 'recording' in href:
                    nugs = href

        return bandcamp, nugs

    except Exception as e:
        print(f"  ⚠️ Scraping error: {e}")
        return None, None

def main():
    print("🔗 Starting Link Population Script...")

    # 1. Fetch API data
    api_shows = get_shows_from_api()
    print(f"✓ Found {len(api_shows)} shows in API.")

    if not api_shows:
        print("❌ No shows found in API. Exiting.")
        return

    date_to_permalink = {s['showdate']: s['permalink'] for s in api_shows}

    # Setup HTTP Session
    http = requests.Session()
    http.headers.update({
        "User-Agent": "ElmegDemoBot/1.0 (+http://elmeg.xyz)"
    })

    with Session(engine) as session:
        # 2. Get our DB shows
        db_shows = session.exec(select(Show)).all()
        # Sort by date desc to update newest first
        db_shows.sort(key=lambda x: x.date, reverse=True)

        print(f"✓ Found {len(db_shows)} shows in DB to check.")

        updates = 0
        checked = 0

        for show in db_shows:
            checked += 1
            s_date = show.date.strftime("%Y-%m-%d")
            permalink = date_to_permalink.get(s_date)

            if not permalink:
                continue

            # Skip if we already have both
            if show.bandcamp_link and show.nugs_link:
                 continue

            print(f"[{checked}/{len(db_shows)}] {s_date}...", end="", flush=True)

            bc_link, nugs_link = scrape_links(http, permalink)

            updated = False
            if bc_link and bc_link != show.bandcamp_link:
                show.bandcamp_link = bc_link
                updated = True
                print(" [BC]", end="")

            if nugs_link and nugs_link != show.nugs_link:
                show.nugs_link = nugs_link
                updated = True
                print(" [Nugs]", end="")

            if updated:
                session.add(show)
                updates += 1
                try:
                    session.commit()
                    session.refresh(show)
                    print(" ✓")
                except Exception as e:
                    print(f" ❌ Save error: {e}")
            else:
                print(" -")

            # Small delay
            time.sleep(0.1)

    print(f"\n🎉 Done! Updated {updates} shows.")

if __name__ == "__main__":
    main()