"""
Bandcamp Catalog Import Script

Scrapes the Goose Bandcamp discography page to get all album URLs,
then matches them to shows by date and updates the database.

Run: python import_bandcamp_catalog.py [--save]
"""
import re
import requests
from datetime import datetime
from sqlmodel import Session, select
from database import engine
from models import Show

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}

def scrape_bandcamp_catalog() -> dict:
    """
    Scrape the Goose Bandcamp music page for all album URLs.
    Returns dict mapping date (YYYY-MM-DD) -> URL
    """
    music_url = "https://goosetheband.bandcamp.com/music"
    albums = {}
    
    try:
        resp = requests.get(music_url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"Failed to fetch Bandcamp catalog: {resp.status_code}")
            return {}
        
        html = resp.text
        
        # Extract album URLs and parse dates from them
        # Pattern: https://goosetheband.bandcamp.com/album/YYYY-MM-DD-...
        url_pattern = r'https://goosetheband\.bandcamp\.com/album/(\d{4})-(\d{2})-(\d{2})-[^"\'>\s]+'
        
        for match in re.finditer(url_pattern, html):
            url = match.group(0)
            year, month, day = match.groups()
            date_str = f"{year}-{month}-{day}"
            
            # Keep the first URL for each date (in case of duplicates)
            if date_str not in albums:
                albums[date_str] = url
        
        print(f"Scraped {len(albums)} Bandcamp album URLs")
        return albums
        
    except Exception as e:
        print(f"Error scraping Bandcamp: {e}")
        return {}

def main(dry_run: bool = True):
    print("=" * 60)
    print("Bandcamp Catalog Import")
    print("=" * 60)
    
    # Scrape catalog
    bandcamp_albums = scrape_bandcamp_catalog()
    
    if not bandcamp_albums:
        print("No albums found, exiting.")
        return
    
    with Session(engine) as session:
        # Get all shows
        shows = session.exec(select(Show)).all()
        print(f"Found {len(shows)} shows in database")
        
        updated = 0
        already_had = 0
        not_found = 0
        
        for show in shows:
            date_str = show.date.strftime("%Y-%m-%d")
            
            # Check if already has a proper album link
            if show.bandcamp_link and "/album/" in show.bandcamp_link:
                already_had += 1
                continue
            
            # Look up in catalog
            if date_str in bandcamp_albums:
                new_url = bandcamp_albums[date_str]
                print(f"✓ {date_str} -> {new_url}")
                updated += 1
                
                if not dry_run:
                    show.bandcamp_link = new_url
                    session.add(show)
            else:
                not_found += 1
        
        if not dry_run:
            session.commit()
        
        print("\n" + "=" * 60)
        print("Summary")
        print("=" * 60)
        print(f"Shows with new Bandcamp link: {updated}")
        print(f"Shows already had album link: {already_had}")
        print(f"Shows not found in Bandcamp: {not_found}")
        
        if dry_run:
            print("\n[DRY RUN - Run with --save to commit changes]")

if __name__ == "__main__":
    import sys
    dry_run = "--save" not in sys.argv
    main(dry_run=dry_run)