00001
00002
00003
00004 import urllib
00005 import urllib2
00006 import json
00007 import socket
00008 import logging
00009 import traceback
00010 import ssl
00011 import rospy
00012
00013
00014 from rospeex_core.validators import accepts, check_wave_data, check_language
00015 from rospeex_core.exceptions import InvalidRequestException
00016 from rospeex_core.exceptions import InvalidResponseException
00017 from rospeex_core.exceptions import SpeechRecognitionException
00018 from rospeex_core.exceptions import RequestTimeoutException
00019 from rospeex_core.exceptions import ParameterException
00020 from rospeex_core.sr.client_base import SpeechRecognitionClient
00021 from rospeex_core.sr.client_nict import SpeechRecognitionClient_NICT
00022
00023
00024 logging.basicConfig(level=logging.INFO)
00025 logger = logging.getLogger(__name__)
00026
00027
00028 class SpeechRecognitionClient_Google(SpeechRecognitionClient):
00029 """ SpeechRecognitionClient_Google class """
00030 AUDIO_LENGTH = 16000
00031 FRAMERATE = 16000
00032 CHANNELS = 1
00033 SAMPWIDTH = 2
00034 LANGUAGES = ['ja', 'en']
00035 URL = 'https://www.google.com/speech-api/v2/recognize?'
00036
00037
00038 def __init__(self):
00039 """ initialize function """
00040 self._key = None
00041 self._load_parameter()
00042
00043
00044 @accepts(data=str, languate=str, timeout=int)
00045 def request(self, data, language='ja', timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
00046 """
00047 send speech recognition request to server
00048 @param data: speech binary data
00049 @type data: str
00050 @param language: speech data language
00051 @type language: str
00052 @param timeout: time out time[ms]
00053 @type timeout: int
00054 """
00055 check_wave_data(data, self.FRAMERATE, self.CHANNELS, self.SAMPWIDTH, self.AUDIO_LENGTH)
00056 check_language(language, self.LANGUAGES)
00057 self._check_api_key()
00058
00059
00060 nict_result = ''
00061 try:
00062 nict_result = SpeechRecognitionClient_NICT().request(data, language, 10)
00063 except Exception as err:
00064 pass
00065
00066
00067 result_text = None
00068 try:
00069 result_text = self._request_google_server(self._key, language, data, timeout)
00070 except InvalidResponseException:
00071 logger.info('google speech api connection failed. thus use nict api.')
00072 result_text = nict_result
00073
00074 return result_text
00075
00076
00077 def _load_parameter(self):
00078 """ load parameter from rospy """
00079 try:
00080 self._key = rospy.get_param('~google_api_key', None)
00081 except Exception as err:
00082 rospy.loginfo(err)
00083
00084
00085 def _check_api_key(self):
00086 """ check api key """
00087 if self._key == None:
00088 msg = 'argment failed. if you want to use google engine,'\
00089 'you MUST set api key for google speech api v2.'
00090 raise ParameterException(msg)
00091
00092
00093 def _request_google_server(self, access_key, language, data, timeout):
00094 """
00095 speech recognition request to google server (use speech api v2)
00096 @param access_key: google speech api key
00097 @type access_key: str
00098 @param language: speech data language
00099 @type language: str
00100 @param data: speech binary data
00101 @type data: str
00102 @param timeout: timeout time [s]
00103 @type timeout: int
00104 @raise SpeechRecognitionException:
00105 """
00106 try:
00107
00108 req = self._create_request(access_key, language, data)
00109 res = urllib2.urlopen(req, timeout=timeout)
00110 res_read = res.read()
00111 google_result_text = self._process_data(res_read)
00112
00113 except urllib2.URLError as err:
00114 if isinstance(err.reason, socket.timeout):
00115 raise RequestTimeoutException('request time out. Exception: %s' % str(err))
00116 raise InvalidRequestException('request url error. Exception:%s' % str(err))
00117
00118 except urllib2.HTTPError as err:
00119 raise InvalidResponseException('http error. %s Exception:%s' % (err.code, err.msg))
00120
00121 except (ssl.SSLError, socket.timeout) as err:
00122 raise RequestTimeoutException(str(err))
00123
00124 except Exception as err:
00125 raise SpeechRecognitionException('unknown exception. Traceback: %s' % traceback.format_exc())
00126
00127 return google_result_text
00128
00129
00130 def _create_request(self, access_key, language, data):
00131 """
00132 create http request data for google speech api v2
00133 @param access_key: google speech api key
00134 @type access_key: str
00135 @param language: speech data language
00136 @type language: str
00137 @param data: speech binary data
00138 @type data: str
00139 """
00140 header = {'Content-Type' : 'audio/l16; rate=16000;'}
00141 values = {
00142 'output': 'json',
00143 'lang' : language,
00144 'key': access_key
00145 }
00146 url_req = self.URL + urllib.urlencode(values)
00147 request = urllib2.Request(url_req, data, header)
00148 return request
00149
00150
00151 def _process_data(self, input_str):
00152 result_list = input_str.split('\n')
00153 json_result_list = []
00154 for result in result_list:
00155 try:
00156 json_result_list.append(json.loads(result))
00157 except:
00158 pass
00159
00160
00161 result_data = self._extract_result_key_data(json_result_list)
00162 if result_data != '':
00163 result_data = self._extract_alternative_final_key_data(result_data)
00164 result_data = self._extract_final_data(result_data)
00165 result_data = self._extract_transcript_data(result_data)
00166
00167
00168 result_text = result_data[0] if len(result_data) else ''
00169 return result_text
00170
00171
00172 @classmethod
00173 def _extract_result_key_data(cls, input_data):
00174 """ extract result data from server response
00175 @param input_data:
00176 @type input_data: dict()
00177 """
00178
00179 result_data = [result['result'] for result in input_data if result.has_key('result')]
00180 if len(result_data) is 0:
00181 raise InvalidResponseException('result key is not found. Input: %s' % input_data)
00182
00183 result_data = filter(lambda x: len(x), result_data)
00184 if len(result_data) is 0:
00185 return ''
00186
00187 result_data = reduce(lambda a, b: a+b, result_data)
00188 return result_data
00189
00190
00191 @classmethod
00192 def _extract_alternative_final_key_data(cls, input_data):
00193 """ extract alternative key data
00194 @param input_data:
00195 @type input_data: dict()
00196 """
00197
00198 result_data = filter(lambda x: x.has_key('alternative') and x.has_key('final'), input_data)
00199 if len(result_data) is 0:
00200 raise InvalidResponseException('alternative key is not found. Input: %s' % input_data)
00201 return result_data
00202
00203
00204 @classmethod
00205 def _extract_final_data(cls, intput_data):
00206 """ extract final data from server response
00207 @param input_data:
00208 @type input_data: dict()
00209 """
00210
00211 result_data = [result['alternative'] for result in intput_data if len(result['alternative']) > 0 and result['final'] is True]
00212 if len(result_data) is 0:
00213 raise InvalidResponseException('final key is not found. Input: %s' % intput_data)
00214 return result_data
00215
00216
00217 @classmethod
00218 def _extract_transcript_data(cls, input_data):
00219 """ extract transcript data from server response
00220 @param input_data:
00221 @type input_data: dict()
00222 """
00223
00224 result_data = reduce(lambda a, b: a+b, input_data)
00225 result_data = [result['transcript'] for result in result_data if result.has_key('transcript')]
00226 if len(result_data) is 0:
00227 raise InvalidResponseException('transcript key is not found. Input: %s' % input_data)
00228 return result_data
00229