Files
XCDesktop/tools/tongyi/bilibili_spider.py
2026-03-08 01:34:54 +08:00

93 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import sys
from utils import (
extract_bv_from_url,
get_video_info,
get_play_urls,
download_audio_video,
merge_audio_video,
cleanup_files,
sanitize_filename
)
from config import DOWNLOAD_DIR
def crawl_bilibili_video(video_url, download=False):
"""
爬取B站视频
:param video_url: 视频分享链接
:param download: 是否下载视频默认False
:return: 视频标题和视频播放URL
"""
print(f"开始爬取视频: {video_url}")
try:
# 1. 提取BV号
bv_id = extract_bv_from_url(video_url)
print(f"提取到BV号: {bv_id}")
# 2. 获取视频信息
video_info = get_video_info(bv_id)
video_title = video_info.get("title", f"{bv_id}")
cid = video_info.get("cid")
print(f"视频标题: {video_title}")
print(f"视频CID: {cid}")
# 3. 获取播放链接
play_urls = get_play_urls(bv_id, cid)
# 选择最高质量的视频和音频
if not play_urls.get("video") or not play_urls.get("audio"):
raise Exception("无法获取音视频流链接")
# 选择最高质量的视频
video_item = sorted(play_urls["video"], key=lambda x: x["quality"], reverse=True)[0]
video_play_url = video_item["url"]
print(f"选择视频质量: {video_item['quality']}")
print(f"视频播放URL: {video_play_url}")
# 选择最高质量的音频
audio_item = sorted(play_urls["audio"], key=lambda x: x["quality"], reverse=True)[0]
audio_url = audio_item["url"]
print(f"选择音频质量: {audio_item['quality']}")
# 4. 下载音视频(可选)
if download:
print("开始下载音视频...")
save_dir = os.path.join(DOWNLOAD_DIR, sanitize_filename(video_title))
video_path, audio_path = download_audio_video(video_play_url, audio_url, save_dir, video_title)
# 5. 合并音视频
output_filename = f"{sanitize_filename(video_title)}.mp4"
output_path = os.path.join(DOWNLOAD_DIR, output_filename)
merge_audio_video(video_path, audio_path, output_path)
# 6. 清理临时文件
cleanup_files(video_path, audio_path)
print(f"视频爬取完成,保存路径: {output_path}")
else:
print("跳过下载直接使用播放URL")
# 返回视频标题和视频播放URL
return video_title, video_play_url
except Exception as e:
print(f"爬取失败: {e}")
import traceback
traceback.print_exc()
return None, None
def main():
"""
主函数
"""
if len(sys.argv) != 2:
print("使用方法: python bilibili_spider.py <视频链接>")
print("示例: python bilibili_spider.py https://www.bilibili.com/video/BV1uHFjzVEju/")
sys.exit(1)
video_url = sys.argv[1]
crawl_bilibili_video(video_url, download=True) # 直接运行时默认下载视频
if __name__ == "__main__":
main()