00001
00002
00003
00004
00005 import urllib
00006 import urllib2
00007 import json
00008 import socket
00009 import traceback
00010 import ssl
00011
00012 import uuid
00013 import requests
00014 import platform
00015
00016 import rospy
00017
00018
00019 import rospeex_core.exceptions as ext
00020 from rospeex_core import logging_util
00021 from rospeex_core.validators import accepts, check_wave_data, check_language
00022 from rospeex_core.sr.base.client import IClient
00023 from rospeex_core.sr.nict import Client
00024
00025
00026 logger = logging_util.get_logger(__name__)
00027
00028
00029 class SyncClient(IClient):
00030 """ SpeechRecognitionClient_Microsoft class """
00031 AUDIO_LENGTH = 16000
00032 FRAMERATE = 16000
00033 CHANNELS = 1
00034 SAMPWIDTH = 2
00035 LANGUAGES = ['en', 'ko', 'ja', 'zh']
00036 MICROSOFT_LANGUAGES = {
00037 'en': 'en-US',
00038 'ko': 'ko-KR',
00039 'ja': 'ja-JP',
00040 'zh': 'zh-CN'
00041 }
00042 DEFAULT_LANGUAGE = 'ja-JP'
00043 URL = 'https://speech.platform.bing.com'
00044 USER_AGENT = 'rospeex.MicrosoftSynClient'
00045 UNIQUE_ID = str(uuid.uuid4()).replace('-', '')
00046
00047 def __init__(self, microsoft_api_key=None, *args, **kwargs):
00048 """ initialize function """
00049 self._api_key = microsoft_api_key
00050 self._instance_id = self._generate_id()
00051 self._token = ''
00052
00053 def _authorize(self):
00054 """ send web authorization request to server """
00055 url = 'https://api.cognitive.microsoft.com/sts/v1.0/issueToken'
00056
00057 headers = {
00058 'Content-type': 'application/x-www-form-urlencoded',
00059 'Content-Length': '0',
00060 'Ocp-Apim-Subscription-Key': self._api_key
00061 }
00062
00063 self._check_api_key()
00064
00065 response = requests.post(url, headers=headers)
00066 if response.ok:
00067 self._token = response.text
00068 else:
00069 response.raise_for_status()
00070
00071 @accepts(data=str, languate=str, timeout=int)
00072 def request(
00073 self,
00074 data,
00075 language='ja',
00076 timeout=socket._GLOBAL_DEFAULT_TIMEOUT
00077 ):
00078 """ send speech recognition request to server
00079 @param data: speech binary data
00080 @type data: str
00081 @param language: speech data language
00082 @type language: str
00083 @param timeout: time out time[ms]
00084 @type timeout: int
00085 """
00086 self._check_api_key()
00087
00088 check_wave_data(
00089 data,
00090 self.FRAMERATE,
00091 self.CHANNELS,
00092 self.SAMPWIDTH,
00093 self.AUDIO_LENGTH
00094 )
00095 check_language(language, self.LANGUAGES)
00096
00097 lang = self.DEFAULT_LANGUAGE
00098
00099 if language in self.MICROSOFT_LANGUAGES:
00100 lang = self.MICROSOFT_LANGUAGES[language]
00101 else:
00102 rospy.logwarn(
00103 '%s is not supported. set locale default to [%s].',
00104 language, self.DEFAULT_LANGUAGE
00105 )
00106 lang = self.DEFAULT_LANGUAGE
00107
00108
00109 nict_result = ''
00110 try:
00111 client = Client()
00112 nict_result = client.request(data, language, 10)
00113
00114 except Exception:
00115 pass
00116
00117 self._authorize()
00118
00119
00120 result_text = None
00121 try:
00122 result_text = self._request_microsoft_server(
00123 self._api_key,
00124 lang,
00125 data,
00126 timeout
00127 )
00128
00129 except Exception:
00130 logger.info(
00131 'microsoft speech api connection failed. thus use nict api.'
00132 )
00133 result_text = nict_result
00134
00135 return result_text
00136
00137 def _check_api_key(self):
00138 """ check api key """
00139 if not self._api_key:
00140 msg = 'argment failed. if you want to use microsoft engine,'\
00141 'you MUST set client api key for microsoft speech api.'
00142 raise ext.ParameterException(msg)
00143
00144 def _request_microsoft_server(self, api_key, language, data, timeout):
00145 """ speech recognition request to microsoft server (use speech api)
00146 @param api_key: microsoft api key
00147 @type api_key: str
00148 @param language: speech data language
00149 @type language: str
00150 @param data: speech binary data
00151 @type data: str
00152 @param timeout: timeout time [s]
00153 @type timeout: int
00154 @raise SpeechRecognitionException:
00155 """
00156 try:
00157
00158 req = self._create_request(language, data)
00159 res = urllib2.urlopen(req, timeout=timeout)
00160 res_read = res.read()
00161 microsoft_result_text = self._process_data(res_read)
00162
00163 except urllib2.URLError as err:
00164 if isinstance(err.reason, socket.timeout):
00165 raise ext.RequestTimeoutException(
00166 'request time out. Exception: %s',
00167 str(err)
00168 )
00169 raise ext.InvalidRequestException(
00170 'request url error. Exception:%s',
00171 str(err)
00172 )
00173
00174 except urllib2.HTTPError as err:
00175 raise ext.InvalidResponseException(
00176 'http error. %s Exception:%s',
00177 err.code,
00178 err.msg
00179 )
00180
00181 except (ssl.SSLError, socket.timeout) as err:
00182 raise ext.RequestTimeoutException(str(err))
00183
00184 except Exception as err:
00185 msg = 'unknown exception. Traceback: {}'.format(
00186 traceback.format_exc()
00187 )
00188 raise ext.SpeechRecognitionException(msg)
00189
00190 return microsoft_result_text
00191
00192 def _create_request(self, language, data):
00193 """ create http request data for microsoft speech api
00194 @param language: speech data language
00195 @type language: str
00196 @param data: speech binary data
00197 @type data: str
00198 """
00199 samplerate = 8000
00200 scenarios = 'ulm'
00201
00202 params = {
00203 'version': '3.0',
00204 'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
00205 'instanceid': self._instance_id,
00206 'requestid': self._generate_id(),
00207 'format': 'json',
00208 'locale': language,
00209 'device.os': platform.system() + ' ' + platform.release(),
00210 'scenarios': scenarios,
00211 }
00212
00213 headers = {
00214 'Content-type': 'audio/wav; samplerate={0}'.format(samplerate),
00215 'Authorization': 'Bearer ' + self._token,
00216 'codec': 'audio/pcm',
00217 'samplerate': samplerate,
00218 }
00219
00220 url_req = self.URL + '/recognize/query?' + urllib.urlencode(params)
00221 request = urllib2.Request(url_req, data, headers)
00222 return request
00223
00224 def _process_data(self, input_str):
00225 result_list = input_str.split('\n')
00226 json_result_list = []
00227 for result in result_list:
00228 try:
00229 json_result_list.append(json.loads(result))
00230 except:
00231 pass
00232
00233
00234 result_data = self._extract_result_key_data(json_result_list)
00235 if result_data != '':
00236 result_data = self._extract_lexical_data(result_data)
00237
00238 result_text = result_data[0] if len(result_data) else ''
00239 return result_text
00240
00241 @classmethod
00242 def _extract_result_key_data(cls, input_data):
00243 """ extract result data from server response
00244 @param input_data:
00245 @type input_data: dict()
00246 @returns: extract result data from serer response
00247 @rtype: str or unicode
00248 """
00249
00250 result_data = [
00251 result['results'] for result in input_data if 'results' in result
00252 ]
00253 if len(result_data) is 0:
00254 raise ext.InvalidResponseException(
00255 'result key is not found. Input: %s' %
00256 input_data
00257 )
00258
00259 result_data = filter(lambda x: len(x), result_data)
00260 if len(result_data) is 0:
00261 return ''
00262
00263 result_data = reduce(lambda a, b: a+b, result_data)
00264 return result_data
00265
00266 @classmethod
00267 def _extract_lexical_data(cls, input_data):
00268 """ extract lexical data from server response
00269 @param input_data:
00270 @type input_data: dict()
00271 @returns: extract lexical data from server response as result data
00272 @rtype: str or unicode
00273 """
00274
00275 result_data = [
00276 result['lexical'] for result in input_data if 'lexical' in result
00277 ]
00278 if len(result_data) is 0:
00279 raise ext.InvalidResponseException(
00280 'lexical key is not found. Input: %s',
00281 input_data
00282 )
00283 return result_data
00284
00285 def support_streaming(self):
00286 """
00287 check support streaming
00288 @returns: True for support streaming / False for NOT support streaming
00289 """
00290 return False
00291
00292 def add_streaming_packet(self, packet_type, packet_data):
00293 """
00294 add streaming packet
00295 @param packet_type:
00296 @type packet_type: int
00297 @param packet_data:
00298 @type packet_data: str
00299 """
00300 pass
00301
00302 def register_streaming_cb(self, cb):
00303 """
00304 register streaming result callback
00305 @param cb:
00306 @type cb:
00307 """
00308 pass
00309
00310 def unregister_streaming_cb(self, cb):
00311 """
00312 unregister streaming result callback
00313 @param cb:
00314 @type cb:
00315 """
00316 pass
00317
00318 def set_streaming_config(self, language):
00319 """
00320 set streaming config
00321 @param language:
00322 @type language: str
00323 """
00324 pass
00325
00326 def join(self, timeout=None):
00327 """
00328 join streaming client
00329 @param timeout:
00330 @type timeout:
00331 """
00332 pass
00333
00334 @classmethod
00335 def _generate_id(cls):
00336 return str(uuid.uuid4()).replace('-', '')
00337
00338