fix: Handle escaped markdown in youtube import parser

2025-12-21 22:33:51 -08:00 · 2025-12-21 22:33:51 -08:00 · 98a7965c52
commit 98a7965c52
parent 8620841932
1 changed files with 20 additions and 8 deletions
--- a/backend/import_youtube.py
+++ b/backend/import_youtube.py
@ -18,21 +18,33 @@ def parse_youtube_md(filepath: str) -> list:
    with open(filepath, 'r') as f:
        content = f.read()
    # Clean up escaped characters from markdown first
    content = content.replace('\\`', '`').replace('\\-', '-').replace('\\_', '_')
    # Find JSON block (between ```json and ```)
-    match = re.search(r'```json\s*\n?\s*(\[.*?\])', content, re.DOTALL)
+    match = re.search(r'```json\s*\n?\s*(\[.*)', content, re.DOTALL)
    if not match:
        print("No JSON block found in file.")
        return []
    json_str = match.group(1)
-    # Clean up escaped characters from markdown
+    # Find the array - it may not be closed properly, so we find opening [ and match to end
-    json_str = json_str.replace('\\-', '-').replace('\\_', '_')
+    # Try to parse as much valid JSON as possible
    try:
-        return json.loads(json_str)
+        # Try to find a complete JSON array
-    except json.JSONDecodeError as e:
+        return json.loads(json_str.split('```')[0].strip())
-        print(f"JSON parse error: {e}")
+    except json.JSONDecodeError:
-        return []
+        # If that fails, try line-by-line parsing
        lines = json_str.split('\n')
        videos = []
        for line in lines:
            line = line.strip().rstrip(',')
            if line.startswith('{') and line.endswith('}'):
                try:
                    videos.append(json.loads(line))
                except json.JSONDecodeError:
                    continue
        return videos
 def normalize_title(title: str) -> str:
    """Normalize song title for matching."""