Project :視頻轉文本
Introduction:通過使用FFmpeg-Python庫,將視頻的音頻抽取出來存放至本地文件夾,再調用
百度語音識別REST Api,將音頻轉化文字,該Api可識別英語和普通話
Attention :上傳的視頻不能超過60s.同時需要主機聯網纔可調用雲Api
Quickstart :
1.Download Anaconda and Install it
https://www.anaconda.com/distribution/
2.Open the Terminal,and input this command:
conda env create -f bat_video.yaml
3. python main.py
import os
import ffmpeg
from aip import AipSpeech
# 百度語音識別REST-Api
APP_ID = '176xxxx'
API_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
SECRET_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 輸入視頻路徑
video_path = os.path.abspath('./Raw_Video')
text_path = os.path.abspath('./Output_Text')
radio_path = os.path.abspath('./Raw_Radio')
files= os.listdir(video_path)
# 主函數
def main():
for file in files:
filename = os.path.splitext(str(file))[0]
# ffmpeg視頻抽取音頻
in_stream = ffmpeg.input(video_path+'/'+str(file))
(in_stream
.output(radio_path+'/'+filename+'.pcm',format='s16le', acodec='pcm_s16le', ac=1, ar='16k')
.run()
)
# 識別本地文件
test=client.asr(get_file_content(radio_path+'/'+filename+'.pcm'), 'pcm', 16000, {
'dev_pid': 1537,
})
# 內容寫入文本並保存
txt_name = text_path+'/'+filename+'.txt'
f = open(txt_name, 'w')
f.write(str(test['result']))
f.close()
# 讀取文件
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
if __name__=='__main__':
main()