35 lines
792 B
Python
35 lines
792 B
Python
|
|
#!/usr/bin/env python3
|
||
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
测试解析功能的脚本
|
||
|
|
"""
|
||
|
|
|
||
|
|
from parse_blog import parse_zhihu_blog, save_markdown
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse():
|
||
|
|
"""
|
||
|
|
测试解析功能
|
||
|
|
"""
|
||
|
|
# 直接指定HTML文件路径
|
||
|
|
html_file = "SIGGRAPH 2025 Papers.html"
|
||
|
|
output_file = "test_output.md"
|
||
|
|
|
||
|
|
print(f"测试解析: {html_file}")
|
||
|
|
|
||
|
|
# 解析HTML
|
||
|
|
markdown_content = parse_zhihu_blog(html_file)
|
||
|
|
|
||
|
|
if markdown_content:
|
||
|
|
print(f"解析成功,内容长度: {len(markdown_content)}")
|
||
|
|
print("\n前500个字符预览:")
|
||
|
|
print(markdown_content[:500] + "...")
|
||
|
|
|
||
|
|
# 保存到文件
|
||
|
|
save_markdown(markdown_content, output_file)
|
||
|
|
else:
|
||
|
|
print("解析失败")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
test_parse()
|