每日將b站top100視頻下載到本地

定時將b站top視頻下載到本地

說明

目標是每日爬取b站當日top100的視頻並下載到本地,目標網址:傳送門

實現

核心代碼如下:

# !/usr/bin/env python
# -*-coding:utf-8-*-

import requests
import random
import time
import os
import sys

from ..utils import Mp4info

class DownloadVideo:

	def __init__( self ):
		self.api_url = 'http://api.vc.bilibili.com/board/v1/ranking/top?'

	def _getJson(self, url, num):
		headers = {
			'User-Agent':
				'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
		}

		params = {
			'page_size'  : 10,
			'next_offset': str(num),
			'tag'        : '今日熱門',
			'platform'   : 'pc'
		}

		try:
			html = requests.get(url, params = params, headers = headers, verify = False, timeout = 2)
			return html.json()

		except BaseException:
			print('request error')
			pass


	def _download(self, url, path ):
		start = time.time()  # 開始時間
		size = 0
		headers = {
			'User-Agent':
				'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
		}

		response = requests.get(url, headers = headers, stream = True, verify = False, timeout = 2)  # stream屬性必須帶上
		chunk_size = 1024  # 每次下載的數據大小
		content_size = int(response.headers['content-length'])  # 總大小
		if response.status_code == 200:
			print('[文件大小]:%0.2f MB' % (content_size / chunk_size / 1024))  # 換算單位
			with open(path, 'wb') as file:
				for data in response.iter_content(chunk_size = chunk_size):
					file.write(data)
					size += len(data)  # 已下載的文件大小

	def _dispatcher(self):

		for i in range(10):
			url = self.api_url
			num = i * 10 + 1
			html = self._getJson(url, num)
			infos = html['data']['items']
			for info in infos:
				title = info['item']['description']  # 小視頻的標題
				print(title)
				video_url = info['item']['video_playurl']  # 小視頻的下載鏈接

				try:
					# 獲得視頻時長
					file = Mp4info(video_url)
					duration = file.get_duration()
					print('duration', duration)
					if duration > 60 * 60:
						continue
				except BaseException:
					continue


				# 爲了防止有些視頻沒有提供下載鏈接的情況
				try:
					currentVideoPath = os.path.join(sys.path[0], 'bilibili_video')  # 當前目錄作爲下載目錄
					# 創建文件夾存放下載的視頻
					if not os.path.exists(currentVideoPath):
						os.makedirs(currentVideoPath)

					self._download(video_url, path = '{}/{}.mp4'.format(currentVideoPath,title))
					print('成功下載一個!')

				except BaseException:
					print('涼涼,下載失敗')
					pass

			time.sleep(int(format(random.randint(2, 8))))  # 設置隨機等待時間

	def run(self):
		self._dispatcher()

運行:

curl -d "task_id=12345" http://127.0.0.1:5000/spider/bilibili/addjob

結果:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章