Files
XCDesktop/tools/tongyi/bilibili_spider.py

93 lines
3.1 KiB
Python
Raw Normal View History

2026-03-08 01:34:54 +08:00
import os
import sys
from utils import (
extract_bv_from_url,
get_video_info,
get_play_urls,
download_audio_video,
merge_audio_video,
cleanup_files,
sanitize_filename
)
from config import DOWNLOAD_DIR
def crawl_bilibili_video(video_url, download=False):
"""
爬取B站视频
:param video_url: 视频分享链接
:param download: 是否下载视频默认False
:return: 视频标题和视频播放URL
"""
print(f"开始爬取视频: {video_url}")
try:
# 1. 提取BV号
bv_id = extract_bv_from_url(video_url)
print(f"提取到BV号: {bv_id}")
# 2. 获取视频信息
video_info = get_video_info(bv_id)
video_title = video_info.get("title", f"{bv_id}")
cid = video_info.get("cid")
print(f"视频标题: {video_title}")
print(f"视频CID: {cid}")
# 3. 获取播放链接
play_urls = get_play_urls(bv_id, cid)
# 选择最高质量的视频和音频
if not play_urls.get("video") or not play_urls.get("audio"):
raise Exception("无法获取音视频流链接")
# 选择最高质量的视频
video_item = sorted(play_urls["video"], key=lambda x: x["quality"], reverse=True)[0]
video_play_url = video_item["url"]
print(f"选择视频质量: {video_item['quality']}")
print(f"视频播放URL: {video_play_url}")
# 选择最高质量的音频
audio_item = sorted(play_urls["audio"], key=lambda x: x["quality"], reverse=True)[0]
audio_url = audio_item["url"]
print(f"选择音频质量: {audio_item['quality']}")
# 4. 下载音视频(可选)
if download:
print("开始下载音视频...")
save_dir = os.path.join(DOWNLOAD_DIR, sanitize_filename(video_title))
video_path, audio_path = download_audio_video(video_play_url, audio_url, save_dir, video_title)
# 5. 合并音视频
output_filename = f"{sanitize_filename(video_title)}.mp4"
output_path = os.path.join(DOWNLOAD_DIR, output_filename)
merge_audio_video(video_path, audio_path, output_path)
# 6. 清理临时文件
cleanup_files(video_path, audio_path)
print(f"视频爬取完成,保存路径: {output_path}")
else:
print("跳过下载直接使用播放URL")
# 返回视频标题和视频播放URL
return video_title, video_play_url
except Exception as e:
print(f"爬取失败: {e}")
import traceback
traceback.print_exc()
return None, None
def main():
"""
主函数
"""
if len(sys.argv) != 2:
print("使用方法: python bilibili_spider.py <视频链接>")
print("示例: python bilibili_spider.py https://www.bilibili.com/video/BV1uHFjzVEju/")
sys.exit(1)
video_url = sys.argv[1]
crawl_bilibili_video(video_url, download=True) # 直接运行时默认下载视频
if __name__ == "__main__":
main()