rospeex_core: client_google.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 import urllib
00005 import urllib2
00006 import json
00007 import socket
00008 import logging
00009 import traceback
00010 import ssl
00011 import rospy
00012 
00013 # import library
00014 from rospeex_core.validators import accepts, check_wave_data, check_language
00015 from rospeex_core.exceptions import InvalidRequestException
00016 from rospeex_core.exceptions import InvalidResponseException
00017 from rospeex_core.exceptions import SpeechRecognitionException
00018 from rospeex_core.exceptions import RequestTimeoutException
00019 from rospeex_core.exceptions import ParameterException
00020 from rospeex_core.sr.client_base import SpeechRecognitionClient
00021 from rospeex_core.sr.client_nict import SpeechRecognitionClient_NICT
00022 
00023 # create logger
00024 logging.basicConfig(level=logging.INFO)
00025 logger = logging.getLogger(__name__)
00026 
00027 
00028 class SpeechRecognitionClient_Google(SpeechRecognitionClient):
00029     """ SpeechRecognitionClient_Google class """
00030     AUDIO_LENGTH = 16000
00031     FRAMERATE = 16000
00032     CHANNELS = 1
00033     SAMPWIDTH = 2
00034     LANGUAGES = ['ja', 'en']
00035     URL = 'https://www.google.com/speech-api/v2/recognize?'
00036 
00037 
00038     def __init__(self):
00039         """ initialize function """
00040         self._key = None
00041         self._load_parameter()
00042 
00043 
00044     @accepts(data=str, languate=str, timeout=int)
00045     def request(self, data, language='ja', timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
00046         """
00047         send speech recognition request to server
00048         @param data: speech binary data
00049         @type  data: str
00050         @param language: speech data language
00051         @type  language: str
00052         @param timeout: time out time[ms]
00053         @type  timeout: int
00054         """
00055         check_wave_data(data, self.FRAMERATE, self.CHANNELS, self.SAMPWIDTH, self.AUDIO_LENGTH)
00056         check_language(language, self.LANGUAGES)
00057         self._check_api_key()
00058 
00059         # speech recognition by nict engine
00060         nict_result = ''
00061         try:
00062             nict_result = SpeechRecognitionClient_NICT().request(data, language, 10)
00063         except Exception as err:
00064             pass
00065 
00066         # speech recognition by google engine
00067         result_text = None
00068         try:
00069             result_text = self._request_google_server(self._key, language, data, timeout)
00070         except InvalidResponseException:
00071             logger.info('google speech api connection failed. thus use nict api.')
00072             result_text = nict_result
00073 
00074         return result_text
00075 
00076 
00077     def _load_parameter(self):
00078         """ load parameter from rospy """
00079         try:
00080             self._key = rospy.get_param('~google_api_key', None)
00081         except Exception as err:
00082             rospy.loginfo(err)
00083 
00084 
00085     def _check_api_key(self):
00086         """ check api key """
00087         if self._key == None:
00088             msg = 'argment failed. if you want to use google engine,'\
00089                   'you MUST set api key for google speech api v2.'
00090             raise ParameterException(msg)
00091 
00092 
00093     def _request_google_server(self, access_key, language, data, timeout):
00094         """
00095         speech recognition request to google server (use speech api v2)
00096         @param access_key: google speech api key
00097         @type  access_key: str
00098         @param language: speech data language
00099         @type  language: str
00100         @param data: speech binary data
00101         @type  data: str
00102         @param timeout: timeout time [s]
00103         @type  timeout: int
00104         @raise SpeechRecognitionException:
00105         """
00106         try:
00107             # リクエストを作成し、googleにデータを送信する
00108             req = self._create_request(access_key, language, data)
00109             res = urllib2.urlopen(req, timeout=timeout)
00110             res_read = res.read()
00111             google_result_text = self._process_data(res_read)
00112 
00113         except urllib2.URLError as err:
00114             if isinstance(err.reason, socket.timeout):
00115                 raise RequestTimeoutException('request time out. Exception: %s' % str(err))
00116             raise InvalidRequestException('request url error. Exception:%s' % str(err))
00117 
00118         except urllib2.HTTPError as err:
00119             raise InvalidResponseException('http error. %s Exception:%s' % (err.code, err.msg))
00120 
00121         except (ssl.SSLError, socket.timeout) as err:
00122             raise RequestTimeoutException(str(err))
00123 
00124         except Exception as err:
00125             raise SpeechRecognitionException('unknown exception. Traceback: %s' % traceback.format_exc())
00126 
00127         return google_result_text
00128 
00129 
00130     def _create_request(self, access_key, language, data):
00131         """
00132         create http request data for google speech api v2
00133         @param access_key: google speech api key
00134         @type  access_key: str
00135         @param language: speech data language
00136         @type  language: str
00137         @param data: speech binary data
00138         @type  data: str
00139         """
00140         header = {'Content-Type' : 'audio/l16; rate=16000;'}
00141         values = {
00142             'output': 'json',
00143             'lang'  : language,
00144             'key': access_key
00145         }
00146         url_req = self.URL + urllib.urlencode(values)
00147         request = urllib2.Request(url_req, data, header)
00148         return request
00149 
00150 
00151     def _process_data(self, input_str):
00152         result_list = input_str.split('\n')
00153         json_result_list = []
00154         for result in result_list:
00155             try:
00156                 json_result_list.append(json.loads(result))
00157             except:
00158                 pass
00159 
00160         # データの抽出
00161         result_data = self._extract_result_key_data(json_result_list)
00162         if result_data != '':
00163             result_data = self._extract_alternative_final_key_data(result_data)
00164             result_data = self._extract_final_data(result_data)
00165             result_data = self._extract_transcript_data(result_data)
00166 
00167         # get data
00168         result_text = result_data[0] if len(result_data) else ''
00169         return result_text
00170 
00171 
00172     @classmethod
00173     def _extract_result_key_data(cls, input_data):
00174         """ extract result data from server response
00175         @param input_data:
00176         @type  input_data: dict()
00177         """
00178         # 必要なデータを取り出す
00179         result_data = [result['result'] for result in input_data if result.has_key('result')]
00180         if len(result_data) is 0:
00181             raise InvalidResponseException('result key is not found. Input: %s' % input_data)
00182 
00183         result_data = filter(lambda x: len(x), result_data)
00184         if len(result_data) is 0:
00185             return ''
00186 
00187         result_data = reduce(lambda a, b: a+b, result_data)
00188         return result_data
00189 
00190 
00191     @classmethod
00192     def _extract_alternative_final_key_data(cls, input_data):
00193         """ extract alternative key data
00194         @param input_data:
00195         @type  input_data: dict()
00196         """
00197         # key=>alternative と key=>final を持つ結果を取得する
00198         result_data = filter(lambda x: x.has_key('alternative') and x.has_key('final'), input_data)
00199         if len(result_data) is 0:
00200             raise InvalidResponseException('alternative key is not found. Input: %s' % input_data)
00201         return result_data
00202 
00203 
00204     @classmethod
00205     def _extract_final_data(cls, intput_data):
00206         """ extract final data from server response
00207         @param input_data:
00208         @type  input_data: dict()
00209         """
00210         # result['final'] is True のデータを取得する
00211         result_data = [result['alternative'] for result in intput_data if len(result['alternative']) > 0 and result['final'] is True]
00212         if len(result_data) is 0:
00213             raise InvalidResponseException('final key is not found. Input: %s' % intput_data)
00214         return result_data
00215 
00216 
00217     @classmethod
00218     def _extract_transcript_data(cls, input_data):
00219         """ extract transcript data from server response
00220         @param input_data:
00221         @type  input_data: dict()
00222         """
00223         # result['transcript'] を持つデータを取得する
00224         result_data = reduce(lambda a, b: a+b, input_data)
00225         result_data = [result['transcript'] for result in result_data if result.has_key('transcript')]
00226         if len(result_data) is 0:
00227             raise InvalidResponseException('transcript key is not found. Input: %s' % input_data)
00228         return result_data
00229