#!/usr/bin/env python #coding=utf-8 import os import json import requests import datetime def read_a_txt(): """ 读取并解析 a.txt 文件,提取 Transcription 链接 """ try: file_path = r'd:\Xuanchi\高斯泼溅\XCNote\tools\tongyi\a.txt' with open(file_path, 'r', encoding='utf-8') as f: # 读取文件内容,跳过开头的 "响应数据: " 字符串 content = f.read().replace('响应数据: ', '') data = json.loads(content) transcription_url = data.get('Data', {}).get('Result', {}).get('Transcription') if transcription_url: print(f"成功提取 Transcription 链接: {transcription_url}") return transcription_url else: print("无法找到 Transcription 链接") return None except Exception as e: print(f"读取 a.txt 文件失败: {e}") import traceback traceback.print_exc() return None def download_transcription(url): """ 下载并解析 Transcription 内容 """ try: print(f"开始下载 Transcription 内容: {url}") response = requests.get(url, timeout=30) response.raise_for_status() data = response.json() transcription_data = data.get('Transcription', {}) paragraphs = transcription_data.get('Paragraphs', []) print(f"成功下载并解析 Transcription 内容,共 {len(paragraphs)} 个段落") return paragraphs except Exception as e: print(f"下载 Transcription 内容失败: {e}") import traceback traceback.print_exc() return [] def process_transcription(paragraphs): """ 处理 Transcription 内容,根据 SentenceId 拼合成句子,并整理成字典 """ try: result_dict = {} sentence_counter = 1 for paragraph in paragraphs: words = paragraph.get('Words', []) # 按 SentenceId 分组 sentences = {} for word in words: sentence_id = word.get('SentenceId') if sentence_id not in sentences: sentences[sentence_id] = { 'words': [word.get('Text')], 'start': word.get('Start'), 'end': word.get('End') } else: sentences[sentence_id]['words'].append(word.get('Text')) # 更新结束时间 sentences[sentence_id]['end'] = word.get('End') # 处理每个句子 for sentence_id, sentence_data in sorted(sentences.items()): # 拼接句子 sentence_text = ''.join(sentence_data['words']) if sentence_text: # 整理成字典条目 result_dict[sentence_counter] = { "Start": sentence_data['start'], "End": sentence_data['end'], "Text": sentence_text } sentence_counter += 1 print(f"成功处理 Transcription 内容,共 {len(result_dict)} 个句子") return result_dict except Exception as e: print(f"处理 Transcription 内容失败: {e}") import traceback traceback.print_exc() return {} def main(): """ 主函数 """ print("===== 开始处理 Transcription 结果 =====") # 1. 读取并解析 a.txt 文件 transcription_url = read_a_txt() if not transcription_url: print("无法提取 Transcription 链接,程序退出") return # 2. 下载并解析 Transcription 内容 paragraphs = download_transcription(transcription_url) if not paragraphs: print("无法获取 Transcription 内容,程序退出") return # 3. 处理 Transcription 内容 result_dict = process_transcription(paragraphs) if not result_dict: print("处理 Transcription 内容失败,程序退出") return # 4. 输出整理结果 print(f"\n===== 处理完成 =====") print(f"整理结果: ") print(json.dumps(result_dict, indent=4, ensure_ascii=False)) if __name__ == "__main__": main()