Files
XCDesktop/tools/tongyi/main.py

141 lines
4.5 KiB
Python
Raw Normal View History

2026-03-08 01:34:54 +08:00
#!/usr/bin/env python
# coding=utf-8
import sys
import os
from bilibili_spider import crawl_bilibili_video
from tingwu_api import submit_tingwu_task, get_task_result, check_task_status
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.oss_upload import upload_file_to_oss
def main():
"""
主函数爬取B站视频并提交给通义听悟API分析
"""
if len(sys.argv) != 2:
print("使用方法: python bilibili_to_tingwu.py <视频链接>")
print(
"示例: python bilibili_to_tingwu.py https://www.bilibili.com/video/BV1uHFjzVEju/"
)
sys.exit(1)
video_url = sys.argv[1]
print("===== 开始爬取B站视频 =====")
# 爬取视频获取视频标题和播放URL下载视频
video_title, video_play_url = crawl_bilibili_video(video_url, download=True)
if not video_play_url:
print("视频爬取失败无法提交给通义听悟API")
sys.exit(1)
# 构建下载的文件路径
from config import DOWNLOAD_DIR
from utils import sanitize_filename
# 音频流路径
audio_stream_path = os.path.join(
DOWNLOAD_DIR,
sanitize_filename(video_title),
f"{sanitize_filename(video_title)}_audio.mp4",
)
# 视频流路径
video_stream_path = os.path.join(
DOWNLOAD_DIR,
sanitize_filename(video_title),
f"{sanitize_filename(video_title)}_video.mp4",
)
# 合并后的视频路径
merged_video_path = os.path.join(
DOWNLOAD_DIR, f"{sanitize_filename(video_title)}.mp4"
)
# 强制使用合并后的视频文件
if os.path.exists(merged_video_path):
upload_path = merged_video_path
print("使用合并后的视频文件进行分析")
else:
print("错误:无法找到合并后的视频文件")
print("必须成功合并音视频才能继续")
sys.exit(1)
print(f"上传文件路径: {upload_path}")
print("\n===== 上传视频到OSS =====")
# 配置OSS参数
ACCESS_KEY_ID = "LTAI5tB7sQADpKZnXY7s6Xz8"
ACCESS_KEY_SECRET = "Fgab9klwKoH1GACP97WIb7s6BSvNAm"
BUCKET_NAME = "bucket-xcdesktop" # 请替换为实际的OSS桶名称
2026-03-08 01:34:54 +08:00
# 生成OSS对象名称
import datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
object_name = f"bilibili/{timestamp}_{sanitize_filename(video_title)}.mp4"
# 上传文件到OSS
oss_url = upload_file_to_oss(
upload_path, BUCKET_NAME, object_name, ACCESS_KEY_ID, ACCESS_KEY_SECRET
)
if not oss_url:
print("文件上传失败无法提交给通义听悟API")
sys.exit(1)
print("\n===== 开始提交给通义听悟API分析 =====")
print(f"视频标题: {video_title}")
print(f"使用的OSS文件URL: {oss_url}")
# 配置参数
APP_KEY = "vjZPEUfWtszQP2n6" # 必须替换为实际的AppKey
# 使用OSS的文件URL作为FileUrl
file_url = oss_url
# 提交任务
task_id = submit_tingwu_task(file_url, APP_KEY, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
if task_id:
print("\n===== 等待任务处理完成 =====")
# 轮询任务状态,直到任务完成或失败
import time
max_retries = 60 # 最大轮询次数
retry_interval = 10 # 轮询间隔(秒)
retry_count = 0
while retry_count < max_retries:
print(f"\n{retry_count + 1} 次查询任务状态...")
task_status = check_task_status(task_id, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
if task_status == "COMPLETED":
print("\n===== 任务处理完成 =====")
print("分析完成,结果已打印如上")
break
elif task_status in ["FAILED", "INVALID"]:
print(f"\n===== 任务处理{task_status} =====")
print("分析完成,结果已打印如上")
break
elif task_status == "ONGOING":
print(f"任务正在处理中,{retry_interval}秒后再次查询...")
time.sleep(retry_interval)
retry_count += 1
else:
print(f"未知任务状态: {task_status}{retry_interval}秒后再次查询...")
time.sleep(retry_interval)
retry_count += 1
if retry_count >= max_retries:
print("\n===== 任务处理超时 =====")
print("已达到最大查询次数,任务可能仍在处理中")
else:
print("\n===== 提交任务失败 =====")
if __name__ == "__main__":
main()