Google的語音識別API至今仍未正式發佈,沒有很詳細的開發文檔,只提供一個Service 服務。 Google ASR的API需要提供flac格式的音頻,格式轉換,可以使用ffmpeg庫來處理。當中涉及到具體的庫有:json,httpclient, httpcore和commons-logging包。具體代碼如下:
import java.io.*;
import java.net.MalformedURLException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class voiceRecognition {
private static final String SERVER="http://www.google.com/speech-api/v1/recognize?lang=zh-CN&client=chromium&maxresults=1";//en-US
private static final String HeaderType = "Content-Type";
private static final String HeaderContent = "audio/x-flac; rate=8000";
private static final String User_Agent = "Mozilla/5.0";
private static String parse(JSONObject obj){
JSONArray array=(JSONArray)obj.get("hypotheses");
JSONObject result=(JSONObject)array.get(0);
String sentence=(String)result.get("utterance");
return sentence;
}
public static String recognise(String voiceFile) {
HttpClient httpclient = new DefaultHttpClient();
HttpPost httppost= new HttpPost(SERVER);
HttpProtocolParams.setUserAgent(httpclient.getParams(), User_Agent);
httppost.setHeader(HeaderType,HeaderContent);
try {
File file = new File(voiceFile);
InputStreamEntity reqEntity = new InputStreamEntity(
new FileInputStream(file), -1);
reqEntity.setContentType("binary/octet-stream");
httppost.setEntity(reqEntity);
HttpResponse response = httpclient.execute(httppost);
HttpEntity resEntity = response.getEntity();
String buffer;
String jsonResponse = "";
BufferedReader br = new BufferedReader(new InputStreamReader(resEntity.getContent()));
while ((buffer = br.readLine()) != null) {
jsonResponse += buffer;
}
EntityUtils.consume(resEntity);
br.close();
JSONParser parser = new JSONParser();
JSONObject obj = (JSONObject) parser.parse(jsonResponse);
String result=parse(obj);
return result;
} catch (MalformedURLException e) {
e.printStackTrace();
} catch(IOException e){
e.printStackTrace();
}catch (ParseException e) {
e.printStackTrace();
}finally{
httpclient.getConnectionManager().shutdown();
}
return null;
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String voiceFile = "1.flac";
String result = recognise(voiceFile);
System.out.println(result);
}
}
API其他相關信息:
Chrome 11 API: http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
其他開發介紹:http://blog.csdn.net/dlangu0393/article/details/7214728
ffmpeg庫: http://ffmpeg.org/