#!/usr/bin/env python #coding=utf-8 import os import json import datetime from PptExtraction import main as ppt_main from Transcription import main as transcription_main def get_ppt_result(): """ 运行 PptExtraction.py 并获取结果字典 """ try: # 导入 PptExtraction 模块的函数 from PptExtraction import read_a_txt, download_ppt_extraction, download_image # 1. 读取并解析 a.txt 文件 ppt_extraction_url = read_a_txt() if not ppt_extraction_url: print("无法提取 PptExtraction 链接") return {} # 2. 下载并解析 PptExtraction 内容 key_frame_list = download_ppt_extraction(ppt_extraction_url) if not key_frame_list: print("无法获取 PPT 内容") return {} # 3. 生成时间戳(年月日_时分秒) timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') print(f"生成时间戳: {timestamp}") # 4. 创建保存目录 save_dir = r'd:\Xuanchi\高斯泼溅\XCNote\tools\tongyi\ppt_output' if not os.path.exists(save_dir): os.makedirs(save_dir) print(f"创建保存目录: {save_dir}") # 5. 下载图片并整理结果 result_dict = {} downloaded_count = 0 for i, frame in enumerate(key_frame_list): image_url = frame.get('FileUrl') if image_url: # 下载图片 image_filename = download_image(image_url, save_dir, timestamp, i+1) if image_filename: downloaded_count += 1 # 整理成字典条目 result_dict[i+1] = { "Start": frame.get('Start'), "End": frame.get('End'), "Type": "image", "Content": f"![]({image_filename})" } print(f"成功获取 PPT 结果,共 {len(result_dict)} 张图片") return result_dict except Exception as e: print(f"获取 PPT 结果失败: {e}") import traceback traceback.print_exc() return {} def get_transcription_result(): """ 运行 Transcription.py 并获取结果字典 """ try: # 导入 Transcription 模块的函数 from Transcription import read_a_txt, download_transcription, process_transcription # 1. 读取并解析 a.txt 文件 transcription_url = read_a_txt() if not transcription_url: print("无法提取 Transcription 链接") return {} # 2. 下载并解析 Transcription 内容 paragraphs = download_transcription(transcription_url) if not paragraphs: print("无法获取 Transcription 内容") return {} # 3. 处理 Transcription 内容 result_dict = process_transcription(paragraphs) # 转换格式,添加 Type 字段 for key, value in result_dict.items(): value["Type"] = "text" value["Content"] = value.pop("Text") print(f"成功获取 Transcription 结果,共 {len(result_dict)} 个句子") return result_dict except Exception as e: print(f"获取 Transcription 结果失败: {e}") import traceback traceback.print_exc() return {} def merge_results(ppt_result, transcription_result): """ 根据时间顺序拼合两个结果字典 """ try: # 转换为列表以便排序 items = [] # 添加 PPT 项目 for key, value in ppt_result.items(): items.append({ "id": f"ppt_{key}", "start": value["Start"], "end": value["End"], "type": value["Type"], "content": value["Content"] }) # 添加 Transcription 项目 for key, value in transcription_result.items(): items.append({ "id": f"trans_{key}", "start": value["Start"], "end": value["End"], "type": value["Type"], "content": value["Content"] }) # 根据 start 时间排序,相同时间下图片优先 items.sort(key=lambda x: (x["start"], 0 if x["type"] == "image" else 1)) print(f"成功拼合结果,共 {len(items)} 个项目") return items except Exception as e: print(f"拼合结果失败: {e}") import traceback traceback.print_exc() return [] def generate_md(items): """ 根据拼合结果生成 md 文档 """ try: # 生成时间戳 timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') # 生成 md 文件名 md_filename = f"{timestamp}_merged.md" md_path = os.path.join(r'd:\Xuanchi\高斯泼溅\XCNote\tools\tongyi', md_filename) # 创建 md 内容 md_content = f"# 拼合内容\n\n" md_content += f"生成时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" # 添加拼合的内容 for item in items: if item["type"] == "image": md_content += f"{item['content']}\n\n" else: md_content += f"{item['content']}\n\n" # 保存 md 文件 with open(md_path, 'w', encoding='utf-8') as f: f.write(md_content) print(f"成功生成 md 文档: {md_filename}") return md_filename except Exception as e: print(f"生成 md 文档失败: {e}") import traceback traceback.print_exc() return None def main(): """ 主函数 """ print("===== 开始生成拼合 MD 文档 =====") # 1. 获取 PPT 结果 print("\n===== 获取 PPT 结果 =====") ppt_result = get_ppt_result() # 2. 获取 Transcription 结果 print("\n===== 获取 Transcription 结果 =====") transcription_result = get_transcription_result() # 3. 拼合结果 print("\n===== 拼合结果 =====") merged_items = merge_results(ppt_result, transcription_result) if not merged_items: print("无法生成拼合结果,程序退出") return # 4. 生成 md 文档 print("\n===== 生成 MD 文档 =====") md_filename = generate_md(merged_items) if md_filename: print(f"\n===== 处理完成 =====") print(f"生成的 MD 文档: {md_filename}") print(f"保存位置: d:\Xuanchi\高斯泼溅\XCNote\tools\tongyi\{md_filename}") else: print("生成 MD 文档失败,程序退出") if __name__ == "__main__": main()