Files
XCDesktop/tools/tongyi/MD.py
2026-03-08 01:34:54 +08:00

218 lines
6.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
#coding=utf-8
import os
import json
import datetime
from PptExtraction import main as ppt_main
from Transcription import main as transcription_main
def get_ppt_result():
"""
运行 PptExtraction.py 并获取结果字典
"""
try:
# 导入 PptExtraction 模块的函数
from PptExtraction import read_a_txt, download_ppt_extraction, download_image
# 1. 读取并解析 a.txt 文件
ppt_extraction_url = read_a_txt()
if not ppt_extraction_url:
print("无法提取 PptExtraction 链接")
return {}
# 2. 下载并解析 PptExtraction 内容
key_frame_list = download_ppt_extraction(ppt_extraction_url)
if not key_frame_list:
print("无法获取 PPT 内容")
return {}
# 3. 生成时间戳年月日_时分秒
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
print(f"生成时间戳: {timestamp}")
# 4. 创建保存目录
save_dir = r'd:\Xuanchi\高斯泼溅\XCNote\tools\tongyi\ppt_output'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print(f"创建保存目录: {save_dir}")
# 5. 下载图片并整理结果
result_dict = {}
downloaded_count = 0
for i, frame in enumerate(key_frame_list):
image_url = frame.get('FileUrl')
if image_url:
# 下载图片
image_filename = download_image(image_url, save_dir, timestamp, i+1)
if image_filename:
downloaded_count += 1
# 整理成字典条目
result_dict[i+1] = {
"Start": frame.get('Start'),
"End": frame.get('End'),
"Type": "image",
"Content": f"![]({image_filename})"
}
print(f"成功获取 PPT 结果,共 {len(result_dict)} 张图片")
return result_dict
except Exception as e:
print(f"获取 PPT 结果失败: {e}")
import traceback
traceback.print_exc()
return {}
def get_transcription_result():
"""
运行 Transcription.py 并获取结果字典
"""
try:
# 导入 Transcription 模块的函数
from Transcription import read_a_txt, download_transcription, process_transcription
# 1. 读取并解析 a.txt 文件
transcription_url = read_a_txt()
if not transcription_url:
print("无法提取 Transcription 链接")
return {}
# 2. 下载并解析 Transcription 内容
paragraphs = download_transcription(transcription_url)
if not paragraphs:
print("无法获取 Transcription 内容")
return {}
# 3. 处理 Transcription 内容
result_dict = process_transcription(paragraphs)
# 转换格式,添加 Type 字段
for key, value in result_dict.items():
value["Type"] = "text"
value["Content"] = value.pop("Text")
print(f"成功获取 Transcription 结果,共 {len(result_dict)} 个句子")
return result_dict
except Exception as e:
print(f"获取 Transcription 结果失败: {e}")
import traceback
traceback.print_exc()
return {}
def merge_results(ppt_result, transcription_result):
"""
根据时间顺序拼合两个结果字典
"""
try:
# 转换为列表以便排序
items = []
# 添加 PPT 项目
for key, value in ppt_result.items():
items.append({
"id": f"ppt_{key}",
"start": value["Start"],
"end": value["End"],
"type": value["Type"],
"content": value["Content"]
})
# 添加 Transcription 项目
for key, value in transcription_result.items():
items.append({
"id": f"trans_{key}",
"start": value["Start"],
"end": value["End"],
"type": value["Type"],
"content": value["Content"]
})
# 根据 start 时间排序,相同时间下图片优先
items.sort(key=lambda x: (x["start"], 0 if x["type"] == "image" else 1))
print(f"成功拼合结果,共 {len(items)} 个项目")
return items
except Exception as e:
print(f"拼合结果失败: {e}")
import traceback
traceback.print_exc()
return []
def generate_md(items):
"""
根据拼合结果生成 md 文档
"""
try:
# 生成时间戳
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
# 生成 md 文件名
md_filename = f"{timestamp}_merged.md"
md_path = os.path.join(r'd:\Xuanchi\高斯泼溅\XCNote\tools\tongyi', md_filename)
# 创建 md 内容
md_content = f"# 拼合内容\n\n"
md_content += f"生成时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# 添加拼合的内容
for item in items:
if item["type"] == "image":
md_content += f"{item['content']}\n\n"
else:
md_content += f"{item['content']}\n\n"
# 保存 md 文件
with open(md_path, 'w', encoding='utf-8') as f:
f.write(md_content)
print(f"成功生成 md 文档: {md_filename}")
return md_filename
except Exception as e:
print(f"生成 md 文档失败: {e}")
import traceback
traceback.print_exc()
return None
def main():
"""
主函数
"""
print("===== 开始生成拼合 MD 文档 =====")
# 1. 获取 PPT 结果
print("\n===== 获取 PPT 结果 =====")
ppt_result = get_ppt_result()
# 2. 获取 Transcription 结果
print("\n===== 获取 Transcription 结果 =====")
transcription_result = get_transcription_result()
# 3. 拼合结果
print("\n===== 拼合结果 =====")
merged_items = merge_results(ppt_result, transcription_result)
if not merged_items:
print("无法生成拼合结果,程序退出")
return
# 4. 生成 md 文档
print("\n===== 生成 MD 文档 =====")
md_filename = generate_md(merged_items)
if md_filename:
print(f"\n===== 处理完成 =====")
print(f"生成的 MD 文档: {md_filename}")
print(f"保存位置: d:\Xuanchi\高斯泼溅\XCNote\tools\tongyi\{md_filename}")
else:
print("生成 MD 文档失败,程序退出")
if __name__ == "__main__":
main()