Files
XCDesktop/tools/tongyi/main.py

141 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# coding=utf-8
import sys
import os
from bilibili_spider import crawl_bilibili_video
from tingwu_api import submit_tingwu_task, get_task_result, check_task_status
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.oss_upload import upload_file_to_oss
def main():
"""
主函数爬取B站视频并提交给通义听悟API分析
"""
if len(sys.argv) != 2:
print("使用方法: python bilibili_to_tingwu.py <视频链接>")
print(
"示例: python bilibili_to_tingwu.py https://www.bilibili.com/video/BV1uHFjzVEju/"
)
sys.exit(1)
video_url = sys.argv[1]
print("===== 开始爬取B站视频 =====")
# 爬取视频获取视频标题和播放URL下载视频
video_title, video_play_url = crawl_bilibili_video(video_url, download=True)
if not video_play_url:
print("视频爬取失败无法提交给通义听悟API")
sys.exit(1)
# 构建下载的文件路径
from config import DOWNLOAD_DIR
from utils import sanitize_filename
# 音频流路径
audio_stream_path = os.path.join(
DOWNLOAD_DIR,
sanitize_filename(video_title),
f"{sanitize_filename(video_title)}_audio.mp4",
)
# 视频流路径
video_stream_path = os.path.join(
DOWNLOAD_DIR,
sanitize_filename(video_title),
f"{sanitize_filename(video_title)}_video.mp4",
)
# 合并后的视频路径
merged_video_path = os.path.join(
DOWNLOAD_DIR, f"{sanitize_filename(video_title)}.mp4"
)
# 强制使用合并后的视频文件
if os.path.exists(merged_video_path):
upload_path = merged_video_path
print("使用合并后的视频文件进行分析")
else:
print("错误:无法找到合并后的视频文件")
print("必须成功合并音视频才能继续")
sys.exit(1)
print(f"上传文件路径: {upload_path}")
print("\n===== 上传视频到OSS =====")
# 配置OSS参数
ACCESS_KEY_ID = "LTAI5tB7sQADpKZnXY7s6Xz8"
ACCESS_KEY_SECRET = "Fgab9klwKoH1GACP97WIb7s6BSvNAm"
BUCKET_NAME = "bucket-xcdesktop" # 请替换为实际的OSS桶名称
# 生成OSS对象名称
import datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
object_name = f"bilibili/{timestamp}_{sanitize_filename(video_title)}.mp4"
# 上传文件到OSS
oss_url = upload_file_to_oss(
upload_path, BUCKET_NAME, object_name, ACCESS_KEY_ID, ACCESS_KEY_SECRET
)
if not oss_url:
print("文件上传失败无法提交给通义听悟API")
sys.exit(1)
print("\n===== 开始提交给通义听悟API分析 =====")
print(f"视频标题: {video_title}")
print(f"使用的OSS文件URL: {oss_url}")
# 配置参数
APP_KEY = "vjZPEUfWtszQP2n6" # 必须替换为实际的AppKey
# 使用OSS的文件URL作为FileUrl
file_url = oss_url
# 提交任务
task_id = submit_tingwu_task(file_url, APP_KEY, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
if task_id:
print("\n===== 等待任务处理完成 =====")
# 轮询任务状态,直到任务完成或失败
import time
max_retries = 60 # 最大轮询次数
retry_interval = 10 # 轮询间隔(秒)
retry_count = 0
while retry_count < max_retries:
print(f"\n{retry_count + 1} 次查询任务状态...")
task_status = check_task_status(task_id, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
if task_status == "COMPLETED":
print("\n===== 任务处理完成 =====")
print("分析完成,结果已打印如上")
break
elif task_status in ["FAILED", "INVALID"]:
print(f"\n===== 任务处理{task_status} =====")
print("分析完成,结果已打印如上")
break
elif task_status == "ONGOING":
print(f"任务正在处理中,{retry_interval}秒后再次查询...")
time.sleep(retry_interval)
retry_count += 1
else:
print(f"未知任务状态: {task_status}{retry_interval}秒后再次查询...")
time.sleep(retry_interval)
retry_count += 1
if retry_count >= max_retries:
print("\n===== 任务处理超时 =====")
print("已达到最大查询次数,任务可能仍在处理中")
else:
print("\n===== 提交任务失败 =====")
if __name__ == "__main__":
main()