93 lines
3.1 KiB
Python
93 lines
3.1 KiB
Python
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
from utils import (
|
|||
|
|
extract_bv_from_url,
|
|||
|
|
get_video_info,
|
|||
|
|
get_play_urls,
|
|||
|
|
download_audio_video,
|
|||
|
|
merge_audio_video,
|
|||
|
|
cleanup_files,
|
|||
|
|
sanitize_filename
|
|||
|
|
)
|
|||
|
|
from config import DOWNLOAD_DIR
|
|||
|
|
|
|||
|
|
def crawl_bilibili_video(video_url, download=False):
|
|||
|
|
"""
|
|||
|
|
爬取B站视频
|
|||
|
|
:param video_url: 视频分享链接
|
|||
|
|
:param download: 是否下载视频(默认False)
|
|||
|
|
:return: 视频标题和视频播放URL
|
|||
|
|
"""
|
|||
|
|
print(f"开始爬取视频: {video_url}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 1. 提取BV号
|
|||
|
|
bv_id = extract_bv_from_url(video_url)
|
|||
|
|
print(f"提取到BV号: {bv_id}")
|
|||
|
|
|
|||
|
|
# 2. 获取视频信息
|
|||
|
|
video_info = get_video_info(bv_id)
|
|||
|
|
video_title = video_info.get("title", f"{bv_id}")
|
|||
|
|
cid = video_info.get("cid")
|
|||
|
|
print(f"视频标题: {video_title}")
|
|||
|
|
print(f"视频CID: {cid}")
|
|||
|
|
|
|||
|
|
# 3. 获取播放链接
|
|||
|
|
play_urls = get_play_urls(bv_id, cid)
|
|||
|
|
|
|||
|
|
# 选择最高质量的视频和音频
|
|||
|
|
if not play_urls.get("video") or not play_urls.get("audio"):
|
|||
|
|
raise Exception("无法获取音视频流链接")
|
|||
|
|
|
|||
|
|
# 选择最高质量的视频
|
|||
|
|
video_item = sorted(play_urls["video"], key=lambda x: x["quality"], reverse=True)[0]
|
|||
|
|
video_play_url = video_item["url"]
|
|||
|
|
print(f"选择视频质量: {video_item['quality']}")
|
|||
|
|
print(f"视频播放URL: {video_play_url}")
|
|||
|
|
|
|||
|
|
# 选择最高质量的音频
|
|||
|
|
audio_item = sorted(play_urls["audio"], key=lambda x: x["quality"], reverse=True)[0]
|
|||
|
|
audio_url = audio_item["url"]
|
|||
|
|
print(f"选择音频质量: {audio_item['quality']}")
|
|||
|
|
|
|||
|
|
# 4. 下载音视频(可选)
|
|||
|
|
if download:
|
|||
|
|
print("开始下载音视频...")
|
|||
|
|
save_dir = os.path.join(DOWNLOAD_DIR, sanitize_filename(video_title))
|
|||
|
|
video_path, audio_path = download_audio_video(video_play_url, audio_url, save_dir, video_title)
|
|||
|
|
|
|||
|
|
# 5. 合并音视频
|
|||
|
|
output_filename = f"{sanitize_filename(video_title)}.mp4"
|
|||
|
|
output_path = os.path.join(DOWNLOAD_DIR, output_filename)
|
|||
|
|
merge_audio_video(video_path, audio_path, output_path)
|
|||
|
|
|
|||
|
|
# 6. 清理临时文件
|
|||
|
|
cleanup_files(video_path, audio_path)
|
|||
|
|
|
|||
|
|
print(f"视频爬取完成,保存路径: {output_path}")
|
|||
|
|
else:
|
|||
|
|
print("跳过下载,直接使用播放URL")
|
|||
|
|
|
|||
|
|
# 返回视频标题和视频播放URL
|
|||
|
|
return video_title, video_play_url
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"爬取失败: {e}")
|
|||
|
|
import traceback
|
|||
|
|
traceback.print_exc()
|
|||
|
|
return None, None
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""
|
|||
|
|
主函数
|
|||
|
|
"""
|
|||
|
|
if len(sys.argv) != 2:
|
|||
|
|
print("使用方法: python bilibili_spider.py <视频链接>")
|
|||
|
|
print("示例: python bilibili_spider.py https://www.bilibili.com/video/BV1uHFjzVEju/")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
video_url = sys.argv[1]
|
|||
|
|
crawl_bilibili_video(video_url, download=True) # 直接运行时默认下载视频
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|