2026-03-08 01:34:54 +08:00
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
# coding=utf-8
|
|
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import os
|
|
|
|
|
|
from bilibili_spider import crawl_bilibili_video
|
|
|
|
|
|
from tingwu_api import submit_tingwu_task, get_task_result, check_task_status
|
|
|
|
|
|
|
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
|
from shared.oss_upload import upload_file_to_oss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
"""
|
|
|
|
|
|
主函数:爬取B站视频并提交给通义听悟API分析
|
|
|
|
|
|
"""
|
|
|
|
|
|
if len(sys.argv) != 2:
|
|
|
|
|
|
print("使用方法: python bilibili_to_tingwu.py <视频链接>")
|
|
|
|
|
|
print(
|
|
|
|
|
|
"示例: python bilibili_to_tingwu.py https://www.bilibili.com/video/BV1uHFjzVEju/"
|
|
|
|
|
|
)
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
video_url = sys.argv[1]
|
|
|
|
|
|
|
|
|
|
|
|
print("===== 开始爬取B站视频 =====")
|
|
|
|
|
|
# 爬取视频,获取视频标题和播放URL(下载视频)
|
|
|
|
|
|
video_title, video_play_url = crawl_bilibili_video(video_url, download=True)
|
|
|
|
|
|
|
|
|
|
|
|
if not video_play_url:
|
|
|
|
|
|
print("视频爬取失败,无法提交给通义听悟API")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
# 构建下载的文件路径
|
|
|
|
|
|
from config import DOWNLOAD_DIR
|
|
|
|
|
|
from utils import sanitize_filename
|
|
|
|
|
|
|
|
|
|
|
|
# 音频流路径
|
|
|
|
|
|
audio_stream_path = os.path.join(
|
|
|
|
|
|
DOWNLOAD_DIR,
|
|
|
|
|
|
sanitize_filename(video_title),
|
|
|
|
|
|
f"{sanitize_filename(video_title)}_audio.mp4",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 视频流路径
|
|
|
|
|
|
video_stream_path = os.path.join(
|
|
|
|
|
|
DOWNLOAD_DIR,
|
|
|
|
|
|
sanitize_filename(video_title),
|
|
|
|
|
|
f"{sanitize_filename(video_title)}_video.mp4",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 合并后的视频路径
|
|
|
|
|
|
merged_video_path = os.path.join(
|
|
|
|
|
|
DOWNLOAD_DIR, f"{sanitize_filename(video_title)}.mp4"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 强制使用合并后的视频文件
|
|
|
|
|
|
if os.path.exists(merged_video_path):
|
|
|
|
|
|
upload_path = merged_video_path
|
|
|
|
|
|
print("使用合并后的视频文件进行分析")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("错误:无法找到合并后的视频文件")
|
|
|
|
|
|
print("必须成功合并音视频才能继续")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"上传文件路径: {upload_path}")
|
|
|
|
|
|
|
|
|
|
|
|
print("\n===== 上传视频到OSS =====")
|
|
|
|
|
|
# 配置OSS参数
|
|
|
|
|
|
ACCESS_KEY_ID = "LTAI5tB7sQADpKZnXY7s6Xz8"
|
|
|
|
|
|
ACCESS_KEY_SECRET = "Fgab9klwKoH1GACP97WIb7s6BSvNAm"
|
2026-03-09 17:27:47 +08:00
|
|
|
|
BUCKET_NAME = "bucket-xcdesktop" # 请替换为实际的OSS桶名称
|
2026-03-08 01:34:54 +08:00
|
|
|
|
|
|
|
|
|
|
# 生成OSS对象名称
|
|
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
|
|
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
|
|
|
|
object_name = f"bilibili/{timestamp}_{sanitize_filename(video_title)}.mp4"
|
|
|
|
|
|
|
|
|
|
|
|
# 上传文件到OSS
|
|
|
|
|
|
oss_url = upload_file_to_oss(
|
|
|
|
|
|
upload_path, BUCKET_NAME, object_name, ACCESS_KEY_ID, ACCESS_KEY_SECRET
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not oss_url:
|
|
|
|
|
|
print("文件上传失败,无法提交给通义听悟API")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
print("\n===== 开始提交给通义听悟API分析 =====")
|
|
|
|
|
|
print(f"视频标题: {video_title}")
|
|
|
|
|
|
print(f"使用的OSS文件URL: {oss_url}")
|
|
|
|
|
|
|
|
|
|
|
|
# 配置参数
|
|
|
|
|
|
APP_KEY = "vjZPEUfWtszQP2n6" # 必须替换为实际的AppKey
|
|
|
|
|
|
|
|
|
|
|
|
# 使用OSS的文件URL作为FileUrl
|
|
|
|
|
|
file_url = oss_url
|
|
|
|
|
|
|
|
|
|
|
|
# 提交任务
|
|
|
|
|
|
task_id = submit_tingwu_task(file_url, APP_KEY, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
|
|
|
|
|
|
|
|
|
|
|
|
if task_id:
|
|
|
|
|
|
print("\n===== 等待任务处理完成 =====")
|
|
|
|
|
|
# 轮询任务状态,直到任务完成或失败
|
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
max_retries = 60 # 最大轮询次数
|
|
|
|
|
|
retry_interval = 10 # 轮询间隔(秒)
|
|
|
|
|
|
retry_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
while retry_count < max_retries:
|
|
|
|
|
|
print(f"\n第 {retry_count + 1} 次查询任务状态...")
|
|
|
|
|
|
task_status = check_task_status(task_id, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
|
|
|
|
|
|
|
|
|
|
|
|
if task_status == "COMPLETED":
|
|
|
|
|
|
print("\n===== 任务处理完成 =====")
|
|
|
|
|
|
print("分析完成,结果已打印如上")
|
|
|
|
|
|
break
|
|
|
|
|
|
elif task_status in ["FAILED", "INVALID"]:
|
|
|
|
|
|
print(f"\n===== 任务处理{task_status} =====")
|
|
|
|
|
|
print("分析完成,结果已打印如上")
|
|
|
|
|
|
break
|
|
|
|
|
|
elif task_status == "ONGOING":
|
|
|
|
|
|
print(f"任务正在处理中,{retry_interval}秒后再次查询...")
|
|
|
|
|
|
time.sleep(retry_interval)
|
|
|
|
|
|
retry_count += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"未知任务状态: {task_status},{retry_interval}秒后再次查询...")
|
|
|
|
|
|
time.sleep(retry_interval)
|
|
|
|
|
|
retry_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
if retry_count >= max_retries:
|
|
|
|
|
|
print("\n===== 任务处理超时 =====")
|
|
|
|
|
|
print("已达到最大查询次数,任务可能仍在处理中")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("\n===== 提交任务失败 =====")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
main()
|