1.下載視頻的源碼如下:
import os
import requests
from bs4 import BeautifulSoup
import threading
from bj.models import Video
# globals(repo_dir = './../tmp')
repo_dir = './../tmp/video'
# 定義請求數據的返回結果的函數
def get_response(url):
# 爲了防止被網站禁止訪問,攜帶瀏覽器參數,假裝瀏覽器請求
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
# 取出返回的數據
response =requests.get(url=url,headers=headers).content
return response
# 解析網頁數據獲取視頻描述和視頻下載url
def get_content_video(html):
# 通過bs4解析,用內置的解析器html.parser
soup=BeautifulSoup(html,'html.parser')
# 獲取每個視頻模塊的信息
cont=soup.select('.j-r-list-c')
# 定義一個數組存放視頻desc+url
urlList=[]
for item in cont:
# 查找第一個a標籤的內容,作爲我們後面保存MP4的文件名
name=item.find('a').text
# 查找視頻url
pmUrl=item.select('.j-video')[0].get('data-mp4')
# 提取視頻id用於後期生成文件名
video_id=item.select('.j-video')[0].get('data-id')
#以元組的形式添加到數組
urlList.append((name,pmUrl,video_id))
return urlList
# 使用threading異步下載視頻
def download(urlList,page):
#判斷'./../tmp/vodeo'文件夾是否存在
f_path=os.path.join(repo_dir,page)
if not os._exists(f_path):
print('路徑不存在,馬上創建!')
os.makedirs(f_path)
for item in urlList:
#判斷當前視頻是否有url
if item[1] is None:
continue
# 創建視頻的路徑-->[-3:]截取文件名後綴
f_path_video=os.path.join(f_path,'%s.%s'%(item[2],item[1][-3:]))
#通過多線程的方式下載文件,增加下載速度
thread=threading.Thread(target=save_video,args=(f_path_video,item[1]))
#啓動線程
thread.start()
#如果下載正常則將視頻數據存入數據庫中
Video.objects.create(
video_id=item[2],
video_url=item[1],
video_desc=item[0],
)
# 正式下載視頻文件
def save_video(f_path_video,video_url):
response=get_response(video_url)#調用方法返回MP4文件的二進制流數據
# 通過文件寫入的方式保存成文件
with open(f_path_video,'wb') as f:
f.write(response)
#主函數
def main():
for i in range(1,50):
print("第" + i + "頁")
url = 'http://www.budejie.com/video/%s' % str(i)
html = get_response(url)
urlList=get_content_video(html)
download(urlList,str(i))
#
# if __name__=="__main__":
# main()
'''
** 由於我們這裏僅用於測試,所以我們之抓取一頁
** 鏈接最後的數字表示抓取的數據頁碼,由於首頁的1可以不寫,也可以寫上
** 爲了大家更好的理解多頁的表示,這裏我們僅抓取一頁,並且鏈接後面寫有頁碼1
'''
def test():
url = 'http://www.budejie.com/video/1'
html = get_response(url)
urlList = get_content_video(html)
download(urlList, str(1))
2.視頻尾部多餘部分的切割(這裏需要安裝ffmpeg很簡單,問度娘)
import os
import subprocess
import datetime
def substring(date):
r=date.decode()
r=r.strip()
rlist=r.split(":")
result=(int(rlist[0])*60*60)+(int(rlist[1])*60)+(float(rlist[2]))
return result
def sub_video():
# url="/home/facelive/Downloads/videos/"
# url2="/home/facelive/Downloads/sub_videos/"
# 硬盤路徑(原視頻存放路徑)
url="/media/facelive/Elements/videos/"
# 切割後的視頻存放路徑
url2="/media/facelive/Elements/sub_videos/"
fileList= os.listdir(url)
for file in fileList:
#獲取當前文件的視頻長度
strcmd=["ffmpeg -i "+url+file+" 2>&1 | grep 'Duration' | cut -d ' ' -f 4 | sed s/,//"]
result=subprocess.run(args=strcmd,stdout=subprocess.PIPE,shell=True)
date=result.stdout
print(type(date))
print(date)
time=substring(date)
end=time-4
sub="ffmpeg -ss 0 -t "+str(end)+" -accurate_seek -i "+url+file+" -codec copy -avoid_negative_ts 1 "+url2+file+''
videoresult=subprocess.run(args=sub,shell=True)
print(time)
print("視頻截取完成!!")
def test():
url = "/home/facelive/Downloads/videos/"
fileList = os.listdir(url)
for file in fileList:
print(file)
3.加入水印
import os
import subprocess
import datetime
def logo_video():
# 硬盤路徑
url = "/media/facelive/Elements/videos/"
url3="/media/facelive/Elements/logo_videos/"
fileList = os.listdir(url)
for file in fileList:
sub = "ffmpeg -i "+url+file+" -i /home/facelive/Downloads/image/11.png -filter_complex overlay=W-w " + url3 + file + ''
videoresult = subprocess.run(args=sub, shell=True)
print("視頻logo完成!!")