From 98a7965c52a653f32908cd6601fb9119ef712f8e Mon Sep 17 00:00:00 2001 From: fullsizemalt <106900403+fullsizemalt@users.noreply.github.com> Date: Sun, 21 Dec 2025 22:33:51 -0800 Subject: [PATCH] fix: Handle escaped markdown in youtube import parser --- backend/import_youtube.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/backend/import_youtube.py b/backend/import_youtube.py index 9a15e37..645c92a 100644 --- a/backend/import_youtube.py +++ b/backend/import_youtube.py @@ -18,21 +18,33 @@ def parse_youtube_md(filepath: str) -> list: with open(filepath, 'r') as f: content = f.read() + # Clean up escaped characters from markdown first + content = content.replace('\\`', '`').replace('\\-', '-').replace('\\_', '_') + # Find JSON block (between ```json and ```) - match = re.search(r'```json\s*\n?\s*(\[.*?\])', content, re.DOTALL) + match = re.search(r'```json\s*\n?\s*(\[.*)', content, re.DOTALL) if not match: print("No JSON block found in file.") return [] json_str = match.group(1) - # Clean up escaped characters from markdown - json_str = json_str.replace('\\-', '-').replace('\\_', '_') - + # Find the array - it may not be closed properly, so we find opening [ and match to end + # Try to parse as much valid JSON as possible try: - return json.loads(json_str) - except json.JSONDecodeError as e: - print(f"JSON parse error: {e}") - return [] + # Try to find a complete JSON array + return json.loads(json_str.split('```')[0].strip()) + except json.JSONDecodeError: + # If that fails, try line-by-line parsing + lines = json_str.split('\n') + videos = [] + for line in lines: + line = line.strip().rstrip(',') + if line.startswith('{') and line.endswith('}'): + try: + videos.append(json.loads(line)) + except json.JSONDecodeError: + continue + return videos def normalize_title(title: str) -> str: """Normalize song title for matching."""