diff --git a/backend/import_youtube.py b/backend/import_youtube.py index 9a15e37..645c92a 100644 --- a/backend/import_youtube.py +++ b/backend/import_youtube.py @@ -18,21 +18,33 @@ def parse_youtube_md(filepath: str) -> list: with open(filepath, 'r') as f: content = f.read() + # Clean up escaped characters from markdown first + content = content.replace('\\`', '`').replace('\\-', '-').replace('\\_', '_') + # Find JSON block (between ```json and ```) - match = re.search(r'```json\s*\n?\s*(\[.*?\])', content, re.DOTALL) + match = re.search(r'```json\s*\n?\s*(\[.*)', content, re.DOTALL) if not match: print("No JSON block found in file.") return [] json_str = match.group(1) - # Clean up escaped characters from markdown - json_str = json_str.replace('\\-', '-').replace('\\_', '_') - + # Find the array - it may not be closed properly, so we find opening [ and match to end + # Try to parse as much valid JSON as possible try: - return json.loads(json_str) - except json.JSONDecodeError as e: - print(f"JSON parse error: {e}") - return [] + # Try to find a complete JSON array + return json.loads(json_str.split('```')[0].strip()) + except json.JSONDecodeError: + # If that fails, try line-by-line parsing + lines = json_str.split('\n') + videos = [] + for line in lines: + line = line.strip().rstrip(',') + if line.startswith('{') and line.endswith('}'): + try: + videos.append(json.loads(line)) + except json.JSONDecodeError: + continue + return videos def normalize_title(title: str) -> str: """Normalize song title for matching."""