""" Fetch all videos from Goose YouTube channel using YouTube Data API v3 """ import requests import json import re from datetime import datetime API_KEY = "AIzaSyCxDpv6HM-sPD8vPJIBffwa2-skOpEJkOU" CHANNEL_HANDLE = "@GooseTheBand" def get_channel_id(handle: str) -> str: """Get channel ID from handle.""" url = "https://www.googleapis.com/youtube/v3/search" params = { "key": API_KEY, "q": handle, "type": "channel", "part": "snippet", "maxResults": 1 } resp = requests.get(url, params=params) data = resp.json() if "items" in data and len(data["items"]) > 0: return data["items"][0]["snippet"]["channelId"] return None def get_uploads_playlist_id(channel_id: str) -> str: """Get the uploads playlist ID for a channel.""" url = "https://www.googleapis.com/youtube/v3/channels" params = { "key": API_KEY, "id": channel_id, "part": "contentDetails" } resp = requests.get(url, params=params) data = resp.json() if "items" in data and len(data["items"]) > 0: return data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"] return None def get_all_videos(playlist_id: str) -> list: """Fetch all videos from a playlist (handles pagination).""" videos = [] url = "https://www.googleapis.com/youtube/v3/playlistItems" next_page_token = None while True: params = { "key": API_KEY, "playlistId": playlist_id, "part": "snippet,contentDetails", "maxResults": 50 } if next_page_token: params["pageToken"] = next_page_token resp = requests.get(url, params=params) data = resp.json() if "error" in data: print(f"API Error: {data['error']}") break for item in data.get("items", []): snippet = item["snippet"] video = { "videoId": snippet["resourceId"]["videoId"], "title": snippet["title"], "description": snippet.get("description", ""), "publishedAt": snippet["publishedAt"], "thumbnails": snippet.get("thumbnails", {}) } videos.append(video) next_page_token = data.get("nextPageToken") print(f"Fetched {len(videos)} videos so far...") if not next_page_token: break return videos def parse_video_metadata(videos: list) -> list: """Parse video titles to extract show date and type.""" parsed = [] # Date patterns to look for in titles/descriptions date_patterns = [ r'(\d{1,2})[./](\d{1,2})[./](\d{2,4})', # M/D/YY or M.D.YYYY r'(\d{4})-(\d{2})-(\d{2})', # YYYY-MM-DD ] for video in videos: title = video["title"] desc = video.get("description", "") # Determine video type video_type = "song" # default title_lower = title.lower() if "full show" in title_lower or "live at" in title_lower or "night 1" in title_lower or "night 2" in title_lower or "night 3" in title_lower: video_type = "full_show" elif "→" in title or "->" in title: video_type = "sequence" elif "documentary" in title_lower or "behind" in title_lower: video_type = "documentary" elif "visualizer" in title_lower: video_type = "visualizer" elif "session" in title_lower or "studio" in title_lower: video_type = "session" # Try to extract date show_date = None # Check description first (often has date info) combined_text = f"{title} {desc}" for pattern in date_patterns: match = re.search(pattern, combined_text) if match: groups = match.groups() try: if len(groups[0]) == 4: # YYYY-MM-DD show_date = f"{groups[0]}-{groups[1]}-{groups[2]}" else: # M/D/YY year = groups[2] if len(year) == 2: year = "20" + year if int(year) < 50 else "19" + year month = groups[0].zfill(2) day = groups[1].zfill(2) show_date = f"{year}-{month}-{day}" break except: pass # Extract venue from title if possible venue = None venue_patterns = [ r'@ (.+)$', r'at (.+?) -', r'Live at (.+)', r'- (.+?, [A-Z]{2})$', ] for pattern in venue_patterns: match = re.search(pattern, title, re.IGNORECASE) if match: venue = match.group(1).strip() break parsed.append({ "videoId": video["videoId"], "title": title, "date": show_date, "venue": venue, "type": video_type, "publishedAt": video["publishedAt"] }) return parsed def main(): print("Fetching Goose YouTube channel videos...") # Get channel ID print(f"Looking up channel: {CHANNEL_HANDLE}") channel_id = get_channel_id(CHANNEL_HANDLE) if not channel_id: print("Could not find channel!") return print(f"Channel ID: {channel_id}") # Get uploads playlist uploads_playlist = get_uploads_playlist_id(channel_id) if not uploads_playlist: print("Could not find uploads playlist!") return print(f"Uploads playlist: {uploads_playlist}") # Fetch all videos videos = get_all_videos(uploads_playlist) print(f"\nTotal videos found: {len(videos)}") # Parse metadata parsed = parse_video_metadata(videos) # Save to JSON output_file = "youtube_videos.json" with open(output_file, 'w') as f: json.dump(parsed, f, indent=2) print(f"\nSaved to {output_file}") # Show stats types = {} dated = 0 for v in parsed: types[v["type"]] = types.get(v["type"], 0) + 1 if v["date"]: dated += 1 print("\n=== Stats ===") print(f"Total: {len(parsed)}") print(f"With dates: {dated}") for vtype, count in sorted(types.items()): print(f" {vtype}: {count}") if __name__ == "__main__": main()