fix: Handle escaped markdown in youtube import parser
This commit is contained in:
parent
8620841932
commit
98a7965c52
1 changed files with 20 additions and 8 deletions
|
|
@ -18,21 +18,33 @@ def parse_youtube_md(filepath: str) -> list:
|
|||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Clean up escaped characters from markdown first
|
||||
content = content.replace('\\`', '`').replace('\\-', '-').replace('\\_', '_')
|
||||
|
||||
# Find JSON block (between ```json and ```)
|
||||
match = re.search(r'```json\s*\n?\s*(\[.*?\])', content, re.DOTALL)
|
||||
match = re.search(r'```json\s*\n?\s*(\[.*)', content, re.DOTALL)
|
||||
if not match:
|
||||
print("No JSON block found in file.")
|
||||
return []
|
||||
|
||||
json_str = match.group(1)
|
||||
# Clean up escaped characters from markdown
|
||||
json_str = json_str.replace('\\-', '-').replace('\\_', '_')
|
||||
|
||||
# Find the array - it may not be closed properly, so we find opening [ and match to end
|
||||
# Try to parse as much valid JSON as possible
|
||||
try:
|
||||
return json.loads(json_str)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"JSON parse error: {e}")
|
||||
return []
|
||||
# Try to find a complete JSON array
|
||||
return json.loads(json_str.split('```')[0].strip())
|
||||
except json.JSONDecodeError:
|
||||
# If that fails, try line-by-line parsing
|
||||
lines = json_str.split('\n')
|
||||
videos = []
|
||||
for line in lines:
|
||||
line = line.strip().rstrip(',')
|
||||
if line.startswith('{') and line.endswith('}'):
|
||||
try:
|
||||
videos.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return videos
|
||||
|
||||
def normalize_title(title: str) -> str:
|
||||
"""Normalize song title for matching."""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue