207 lines
6.4 KiB
Python
207 lines
6.4 KiB
Python
"""
|
|
Fetch all videos from Goose YouTube channel using YouTube Data API v3
|
|
"""
|
|
import requests
|
|
import json
|
|
import re
|
|
from datetime import datetime
|
|
|
|
API_KEY = "AIzaSyCxDpv6HM-sPD8vPJIBffwa2-skOpEJkOU"
|
|
CHANNEL_HANDLE = "@GooseTheBand"
|
|
|
|
def get_channel_id(handle: str) -> str:
|
|
"""Get channel ID from handle."""
|
|
url = "https://www.googleapis.com/youtube/v3/search"
|
|
params = {
|
|
"key": API_KEY,
|
|
"q": handle,
|
|
"type": "channel",
|
|
"part": "snippet",
|
|
"maxResults": 1
|
|
}
|
|
resp = requests.get(url, params=params)
|
|
data = resp.json()
|
|
if "items" in data and len(data["items"]) > 0:
|
|
return data["items"][0]["snippet"]["channelId"]
|
|
return None
|
|
|
|
def get_uploads_playlist_id(channel_id: str) -> str:
|
|
"""Get the uploads playlist ID for a channel."""
|
|
url = "https://www.googleapis.com/youtube/v3/channels"
|
|
params = {
|
|
"key": API_KEY,
|
|
"id": channel_id,
|
|
"part": "contentDetails"
|
|
}
|
|
resp = requests.get(url, params=params)
|
|
data = resp.json()
|
|
if "items" in data and len(data["items"]) > 0:
|
|
return data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
|
|
return None
|
|
|
|
def get_all_videos(playlist_id: str) -> list:
|
|
"""Fetch all videos from a playlist (handles pagination)."""
|
|
videos = []
|
|
url = "https://www.googleapis.com/youtube/v3/playlistItems"
|
|
next_page_token = None
|
|
|
|
while True:
|
|
params = {
|
|
"key": API_KEY,
|
|
"playlistId": playlist_id,
|
|
"part": "snippet,contentDetails",
|
|
"maxResults": 50
|
|
}
|
|
if next_page_token:
|
|
params["pageToken"] = next_page_token
|
|
|
|
resp = requests.get(url, params=params)
|
|
data = resp.json()
|
|
|
|
if "error" in data:
|
|
print(f"API Error: {data['error']}")
|
|
break
|
|
|
|
for item in data.get("items", []):
|
|
snippet = item["snippet"]
|
|
video = {
|
|
"videoId": snippet["resourceId"]["videoId"],
|
|
"title": snippet["title"],
|
|
"description": snippet.get("description", ""),
|
|
"publishedAt": snippet["publishedAt"],
|
|
"thumbnails": snippet.get("thumbnails", {})
|
|
}
|
|
videos.append(video)
|
|
|
|
next_page_token = data.get("nextPageToken")
|
|
print(f"Fetched {len(videos)} videos so far...")
|
|
|
|
if not next_page_token:
|
|
break
|
|
|
|
return videos
|
|
|
|
def parse_video_metadata(videos: list) -> list:
|
|
"""Parse video titles to extract show date and type."""
|
|
parsed = []
|
|
|
|
# Date patterns to look for in titles/descriptions
|
|
date_patterns = [
|
|
r'(\d{1,2})[./](\d{1,2})[./](\d{2,4})', # M/D/YY or M.D.YYYY
|
|
r'(\d{4})-(\d{2})-(\d{2})', # YYYY-MM-DD
|
|
]
|
|
|
|
for video in videos:
|
|
title = video["title"]
|
|
desc = video.get("description", "")
|
|
|
|
# Determine video type
|
|
video_type = "song" # default
|
|
title_lower = title.lower()
|
|
|
|
if "full show" in title_lower or "live at" in title_lower or "night 1" in title_lower or "night 2" in title_lower or "night 3" in title_lower:
|
|
video_type = "full_show"
|
|
elif "→" in title or "->" in title:
|
|
video_type = "sequence"
|
|
elif "documentary" in title_lower or "behind" in title_lower:
|
|
video_type = "documentary"
|
|
elif "visualizer" in title_lower:
|
|
video_type = "visualizer"
|
|
elif "session" in title_lower or "studio" in title_lower:
|
|
video_type = "session"
|
|
|
|
# Try to extract date
|
|
show_date = None
|
|
|
|
# Check description first (often has date info)
|
|
combined_text = f"{title} {desc}"
|
|
for pattern in date_patterns:
|
|
match = re.search(pattern, combined_text)
|
|
if match:
|
|
groups = match.groups()
|
|
try:
|
|
if len(groups[0]) == 4: # YYYY-MM-DD
|
|
show_date = f"{groups[0]}-{groups[1]}-{groups[2]}"
|
|
else: # M/D/YY
|
|
year = groups[2]
|
|
if len(year) == 2:
|
|
year = "20" + year if int(year) < 50 else "19" + year
|
|
month = groups[0].zfill(2)
|
|
day = groups[1].zfill(2)
|
|
show_date = f"{year}-{month}-{day}"
|
|
break
|
|
except:
|
|
pass
|
|
|
|
# Extract venue from title if possible
|
|
venue = None
|
|
venue_patterns = [
|
|
r'@ (.+)$',
|
|
r'at (.+?) -',
|
|
r'Live at (.+)',
|
|
r'- (.+?, [A-Z]{2})$',
|
|
]
|
|
for pattern in venue_patterns:
|
|
match = re.search(pattern, title, re.IGNORECASE)
|
|
if match:
|
|
venue = match.group(1).strip()
|
|
break
|
|
|
|
parsed.append({
|
|
"videoId": video["videoId"],
|
|
"title": title,
|
|
"date": show_date,
|
|
"venue": venue,
|
|
"type": video_type,
|
|
"publishedAt": video["publishedAt"]
|
|
})
|
|
|
|
return parsed
|
|
|
|
def main():
|
|
print("Fetching Goose YouTube channel videos...")
|
|
|
|
# Get channel ID
|
|
print(f"Looking up channel: {CHANNEL_HANDLE}")
|
|
channel_id = get_channel_id(CHANNEL_HANDLE)
|
|
if not channel_id:
|
|
print("Could not find channel!")
|
|
return
|
|
print(f"Channel ID: {channel_id}")
|
|
|
|
# Get uploads playlist
|
|
uploads_playlist = get_uploads_playlist_id(channel_id)
|
|
if not uploads_playlist:
|
|
print("Could not find uploads playlist!")
|
|
return
|
|
print(f"Uploads playlist: {uploads_playlist}")
|
|
|
|
# Fetch all videos
|
|
videos = get_all_videos(uploads_playlist)
|
|
print(f"\nTotal videos found: {len(videos)}")
|
|
|
|
# Parse metadata
|
|
parsed = parse_video_metadata(videos)
|
|
|
|
# Save to JSON
|
|
output_file = "youtube_videos.json"
|
|
with open(output_file, 'w') as f:
|
|
json.dump(parsed, f, indent=2)
|
|
print(f"\nSaved to {output_file}")
|
|
|
|
# Show stats
|
|
types = {}
|
|
dated = 0
|
|
for v in parsed:
|
|
types[v["type"]] = types.get(v["type"], 0) + 1
|
|
if v["date"]:
|
|
dated += 1
|
|
|
|
print("\n=== Stats ===")
|
|
print(f"Total: {len(parsed)}")
|
|
print(f"With dates: {dated}")
|
|
for vtype, count in sorted(types.items()):
|
|
print(f" {vtype}: {count}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|