""" YouTube Video Import Script v2 Imports videos from youtube_videos.json into the database. """ import json import re from datetime import datetime from sqlmodel import Session, select from database import engine from models import Performance, Show, Song def make_youtube_url(video_id: str) -> str: return f"https://www.youtube.com/watch?v={video_id}" def extract_song_title(title: str) -> str: """Extract the actual song title from YouTube video title.""" # Remove common prefixes title = re.sub(r'^Goose\s*[-–—]\s*', '', title, flags=re.IGNORECASE) # Remove date patterns at end (e.g., "- 12/13/25 Providence, RI") title = re.sub(r'\s*[-–—]\s*\d{1,2}/\d{1,2}/\d{2,4}.*$', '', title) # Remove "Live at..." suffix title = re.sub(r'\s*[-–—]\s*Live at.*$', '', title, flags=re.IGNORECASE) # Remove "(Official Audio)" etc title = re.sub(r'\s*\(Official\s*(Audio|Video|Visualizer)\)', '', title, flags=re.IGNORECASE) # Remove "(4K HDR)" etc title = re.sub(r'\s*\(4K\s*HDR\)', '', title, flags=re.IGNORECASE) # Remove "Set I Opener" etc title = re.sub(r'\s*Set\s*(I|II|1|2)?\s*Opener.*$', '', title, flags=re.IGNORECASE) return title.strip() def import_videos(): """Import video links into the database.""" with open("youtube_videos.json", 'r') as f: videos = json.load(f) stats = { 'songs_matched': 0, 'songs_not_found': 0, 'sequences_processed': 0, 'full_shows_matched': 0, 'full_shows_not_found': 0, 'no_date': 0, 'skipped': 0, 'show_not_found': 0 } with Session(engine) as session: for video in videos: video_id = video.get('videoId') raw_title = video.get('title', '') video_type = video.get('type', 'song') date_str = video.get('date') youtube_url = make_youtube_url(video_id) # Skip non-performance content if video_type in ('documentary', 'visualizer', 'session'): stats['skipped'] += 1 continue # Skip videos without dates (can't match to show) if not date_str: stats['no_date'] += 1 continue # Parse date try: show_date = datetime.strptime(date_str, '%Y-%m-%d') except ValueError: stats['no_date'] += 1 continue # Find show by date show = session.exec( select(Show).where(Show.date == show_date) ).first() if not show: stats['show_not_found'] += 1 continue # Handle full shows - link to Show entity if video_type == 'full_show': show.youtube_link = youtube_url session.add(show) stats['full_shows_matched'] += 1 print(f"[FULL SHOW] {date_str}: {raw_title[:50]}") continue # Extract song title song_title = extract_song_title(raw_title) # Handle sequences (multiple songs with →) if video_type == 'sequence' or '→' in song_title: song_titles = [s.strip() for s in re.split(r'[→>]', song_title)] matched_any = False for title in song_titles: if not title: continue # Find song by title (case insensitive partial match) songs = session.exec( select(Song).where(Song.title.ilike(f"%{title}%")) ).all() for song in songs: perf = session.exec( select(Performance).where( Performance.show_id == show.id, Performance.song_id == song.id ) ).first() if perf: perf.youtube_link = youtube_url session.add(perf) matched_any = True print(f"[SEQ] {date_str}: {title} -> Perf {perf.id}") if matched_any: stats['sequences_processed'] += 1 else: stats['songs_not_found'] += 1 continue # Single song - find and link songs = session.exec( select(Song).where(Song.title.ilike(f"%{song_title}%")) ).all() matched = False for song in songs: perf = session.exec( select(Performance).where( Performance.show_id == show.id, Performance.song_id == song.id ) ).first() if perf: perf.youtube_link = youtube_url session.add(perf) matched = True stats['songs_matched'] += 1 print(f"[SONG] {date_str}: {song_title} -> Perf {perf.id}") break if not matched: stats['songs_not_found'] += 1 session.commit() print("\n" + "="*50) print("IMPORT SUMMARY") print("="*50) for key, value in stats.items(): print(f" {key}: {value}") total_linked = stats['songs_matched'] + stats['sequences_processed'] + stats['full_shows_matched'] print(f"\n TOTAL LINKED: {total_linked}") if __name__ == "__main__": import_videos()