fix: update Nugs discovery to scrape catalog page
Some checks are pending
Deploy Elmeg / deploy (push) Waiting to run
Some checks are pending
Deploy Elmeg / deploy (push) Waiting to run
This commit is contained in:
parent
58edc0e070
commit
d54b217264
1 changed files with 36 additions and 42 deletions
|
|
@ -73,62 +73,56 @@ def scrape_nugs_catalog() -> dict:
|
|||
Scrape the Nugs Goose catalog page to build a mapping of date -> URL.
|
||||
Returns dict like {"2024-12-31": "https://www.nugs.net/..."}
|
||||
"""
|
||||
catalog_url = "https://play.nugs.net/artist/461/latest"
|
||||
catalog_url = "https://www.nugs.net/goose-concerts-live-downloads-in-mp3-flac-or-online-music-streaming/"
|
||||
nugs_shows = {}
|
||||
|
||||
try:
|
||||
# The Nugs catalog is a SPA, so we need to use their API
|
||||
# API endpoint for artist shows
|
||||
api_url = "https://streamapi.nugs.net/api.aspx"
|
||||
params = {
|
||||
"method": "catalog.container",
|
||||
"containerType": "artist",
|
||||
"containerId": "461", # Goose's artist ID
|
||||
}
|
||||
|
||||
resp = requests.get(api_url, params=params, headers=HEADERS, timeout=10)
|
||||
resp = requests.get(catalog_url, headers=HEADERS, timeout=30)
|
||||
if resp.status_code != 200:
|
||||
print(f"Failed to fetch Nugs catalog: {resp.status_code}")
|
||||
return {}
|
||||
|
||||
# Parse response to extract shows
|
||||
data = resp.json()
|
||||
if "Response" in data and "Shows" in data["Response"]:
|
||||
for show in data["Response"]["Shows"]:
|
||||
perfDate = show.get("perfDate", "")[:10] # YYYY-MM-DD
|
||||
showId = show.get("showID")
|
||||
if perfDate and showId:
|
||||
nugs_shows[perfDate] = f"https://www.nugs.net/live-download/goose-{perfDate.replace('-', '')}-{showId}.html"
|
||||
html = resp.text
|
||||
|
||||
# Extract URLs and dates using regex
|
||||
# URL pattern: https://www.nugs.net/live-download-of-goose-...-MM-DD-YYYY-mp3-flac-or-online-music-streaming/ID.html
|
||||
url_pattern = r'(https://www\.nugs\.net/live-download-of-goose-[^"]+\.html)'
|
||||
urls = re.findall(url_pattern, html)
|
||||
|
||||
# Date pattern in URL: MM-DD-YYYY (before mp3-flac...)
|
||||
for url in set(urls): # deduplicate
|
||||
# Try to extract date from URL
|
||||
date_match = re.search(r'-(\d{1,2})-(\d{1,2})-(\d{4})-mp3-flac', url)
|
||||
if date_match:
|
||||
month, day, year = date_match.groups()
|
||||
date_str = f"{year}-{int(month):02d}-{int(day):02d}"
|
||||
nugs_shows[date_str] = url
|
||||
|
||||
print(f" Scraped {len(nugs_shows)} Nugs show URLs from catalog")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error scraping Nugs: {e}")
|
||||
|
||||
return nugs_shows
|
||||
|
||||
def discover_nugs_url_fallback(show: Show, venue: Venue) -> str | None:
|
||||
# Global cache for Nugs catalog
|
||||
_nugs_catalog = None
|
||||
|
||||
def get_nugs_catalog():
|
||||
"""Get or build Nugs catalog cache"""
|
||||
global _nugs_catalog
|
||||
if _nugs_catalog is None:
|
||||
print("Building Nugs catalog from website...")
|
||||
_nugs_catalog = scrape_nugs_catalog()
|
||||
return _nugs_catalog
|
||||
|
||||
def discover_nugs_url(show: Show) -> str | None:
|
||||
"""
|
||||
Fallback: Try constructing Nugs URL patterns if API doesn't work.
|
||||
Look up Nugs URL from catalog by date.
|
||||
"""
|
||||
if not venue:
|
||||
return None
|
||||
|
||||
date_str = show.date.strftime("%Y-%m-%d")
|
||||
date_compact = show.date.strftime("%Y%m%d")
|
||||
|
||||
# Try common Nugs URL patterns
|
||||
city_slug = slugify_for_bandcamp(venue.city) if venue.city else ""
|
||||
state = venue.state.lower() if venue.state else ""
|
||||
|
||||
patterns = [
|
||||
f"https://www.nugs.net/live-download/goose-{date_compact}-{city_slug}-{state}.html",
|
||||
f"https://www.nugs.net/live-download/goose-{date_compact}.html",
|
||||
]
|
||||
|
||||
for url in patterns:
|
||||
if check_url_exists(url):
|
||||
return url
|
||||
|
||||
return None
|
||||
catalog = get_nugs_catalog()
|
||||
return catalog.get(date_str)
|
||||
|
||||
def main(dry_run: bool = True, limit: int = None):
|
||||
"""
|
||||
|
|
@ -177,9 +171,9 @@ def main(dry_run: bool = True, limit: int = None):
|
|||
else:
|
||||
already_have_bandcamp += 1
|
||||
|
||||
# Nugs discovery (using fallback for now)
|
||||
# Nugs discovery
|
||||
if not show.nugs_link or generic_nugs:
|
||||
nugs_url = discover_nugs_url_fallback(show, venue)
|
||||
nugs_url = discover_nugs_url(show)
|
||||
if nugs_url:
|
||||
print(f"✓ Nugs: {date_str} -> {nugs_url}")
|
||||
nugs_found += 1
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue