rospeex_core: sync_client.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 # import python libraries
00005 import urllib
00006 import urllib2
00007 import json
00008 import socket
00009 import traceback
00010 import ssl
00011 
00012 import uuid
00013 import requests
00014 import platform
00015 
00016 import rospy
00017 
00018 # import local libraries
00019 import rospeex_core.exceptions as ext
00020 from rospeex_core import logging_util
00021 from rospeex_core.validators import accepts, check_wave_data, check_language
00022 from rospeex_core.sr.base.client import IClient
00023 from rospeex_core.sr.nict import Client
00024 
00025 # create logger
00026 logger = logging_util.get_logger(__name__)
00027 
00028 
00029 class SyncClient(IClient):
00030     """ SpeechRecognitionClient_Microsoft class """
00031     AUDIO_LENGTH = 16000
00032     FRAMERATE = 16000
00033     CHANNELS = 1
00034     SAMPWIDTH = 2
00035     LANGUAGES = ['en', 'ko', 'ja', 'zh']
00036     MICROSOFT_LANGUAGES = {
00037         'en': 'en-US',
00038         'ko': 'ko-KR',
00039         'ja': 'ja-JP',
00040         'zh': 'zh-CN'
00041     }
00042     DEFAULT_LANGUAGE = 'ja-JP'
00043     URL = 'https://speech.platform.bing.com'
00044     USER_AGENT = 'rospeex.MicrosoftSynClient'
00045     UNIQUE_ID = str(uuid.uuid4()).replace('-', '')
00046 
00047     def __init__(self, microsoft_api_key=None, *args, **kwargs):
00048         """ initialize function """
00049         self._api_key = microsoft_api_key
00050         self._instance_id = self._generate_id()
00051         self._token = ''
00052 
00053     def _authorize(self):
00054         """ send web authorization request to server """
00055         url = 'https://api.cognitive.microsoft.com/sts/v1.0/issueToken'
00056 
00057         headers = {
00058             'Content-type': 'application/x-www-form-urlencoded',
00059             'Content-Length': '0',
00060             'Ocp-Apim-Subscription-Key': self._api_key
00061         }
00062 
00063         self._check_api_key()
00064 
00065         response = requests.post(url, headers=headers)
00066         if response.ok:
00067             self._token = response.text
00068         else:
00069             response.raise_for_status()
00070 
00071     @accepts(data=str, languate=str, timeout=int)
00072     def request(
00073         self,
00074         data,
00075         language='ja',
00076         timeout=socket._GLOBAL_DEFAULT_TIMEOUT
00077     ):
00078         """ send speech recognition request to server
00079         @param data: speech binary data
00080         @type  data: str
00081         @param language: speech data language
00082         @type  language: str
00083         @param timeout: time out time[ms]
00084         @type  timeout: int
00085         """
00086         self._check_api_key()
00087 
00088         check_wave_data(
00089             data,
00090             self.FRAMERATE,
00091             self.CHANNELS,
00092             self.SAMPWIDTH,
00093             self.AUDIO_LENGTH
00094         )
00095         check_language(language, self.LANGUAGES)
00096 
00097         lang = self.DEFAULT_LANGUAGE
00098 
00099         if language in self.MICROSOFT_LANGUAGES:
00100             lang = self.MICROSOFT_LANGUAGES[language]
00101         else:
00102             rospy.logwarn(
00103                '%s is not supported. set locale default to [%s].',
00104                language, self.DEFAULT_LANGUAGE
00105             )
00106             lang = self.DEFAULT_LANGUAGE
00107             
00108         # speech recognition by nict engine
00109         nict_result = ''
00110         try:
00111             client = Client()
00112             nict_result = client.request(data, language, 10)
00113 
00114         except Exception:
00115             pass
00116 
00117         self._authorize()
00118 
00119         # speech recognition by microsoft engine
00120         result_text = None
00121         try:
00122             result_text = self._request_microsoft_server(
00123                 self._api_key,
00124                 lang,
00125                 data,
00126                 timeout
00127             )
00128 
00129         except Exception:
00130             logger.info(
00131                 'microsoft speech api connection failed. thus use nict api.'
00132             )
00133             result_text = nict_result
00134 
00135         return result_text
00136 
00137     def _check_api_key(self):
00138         """ check api key """
00139         if not self._api_key:
00140             msg = 'argment failed. if you want to use microsoft engine,'\
00141                   'you MUST set client api key for microsoft speech api.'
00142             raise ext.ParameterException(msg)
00143 
00144     def _request_microsoft_server(self, api_key, language, data, timeout):
00145         """ speech recognition request to microsoft server (use speech api)
00146         @param api_key: microsoft api key
00147         @type  api_key: str
00148         @param language: speech data language
00149         @type  language: str
00150         @param data: speech binary data
00151         @type  data: str
00152         @param timeout: timeout time [s]
00153         @type  timeout: int
00154         @raise SpeechRecognitionException:
00155         """
00156         try:
00157             # create request and send microsoft server
00158             req = self._create_request(language, data)
00159             res = urllib2.urlopen(req, timeout=timeout)
00160             res_read = res.read()
00161             microsoft_result_text = self._process_data(res_read)
00162 
00163         except urllib2.URLError as err:
00164             if isinstance(err.reason, socket.timeout):
00165                 raise ext.RequestTimeoutException(
00166                     'request time out. Exception: %s',
00167                     str(err)
00168                 )
00169             raise ext.InvalidRequestException(
00170                 'request url error. Exception:%s',
00171                 str(err)
00172             )
00173 
00174         except urllib2.HTTPError as err:
00175             raise ext.InvalidResponseException(
00176                 'http error. %s Exception:%s',
00177                 err.code,
00178                 err.msg
00179             )
00180 
00181         except (ssl.SSLError, socket.timeout) as err:
00182             raise ext.RequestTimeoutException(str(err))
00183 
00184         except Exception as err:
00185             msg = 'unknown exception. Traceback: {}'.format(
00186                 traceback.format_exc()
00187             )
00188             raise ext.SpeechRecognitionException(msg)
00189 
00190         return microsoft_result_text
00191 
00192     def _create_request(self, language, data):
00193         """ create http request data for microsoft speech api
00194         @param language: speech data language
00195         @type  language: str
00196         @param data: speech binary data
00197         @type  data: str
00198         """
00199         samplerate = 8000
00200         scenarios = 'ulm'
00201 
00202         params = {
00203             'version': '3.0',
00204             'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
00205             'instanceid': self._instance_id,
00206             'requestid': self._generate_id(),
00207             'format': 'json',
00208             'locale': language,
00209             'device.os': platform.system() + ' ' + platform.release(),
00210             'scenarios': scenarios,
00211         }
00212 
00213         headers = {
00214             'Content-type': 'audio/wav; samplerate={0}'.format(samplerate),
00215             'Authorization': 'Bearer ' + self._token,
00216             'codec': 'audio/pcm',
00217             'samplerate': samplerate,
00218         }
00219 
00220         url_req = self.URL + '/recognize/query?' + urllib.urlencode(params)
00221         request = urllib2.Request(url_req, data, headers)
00222         return request
00223 
00224     def _process_data(self, input_str):
00225         result_list = input_str.split('\n')
00226         json_result_list = []
00227         for result in result_list:
00228             try:
00229                 json_result_list.append(json.loads(result))
00230             except:
00231                 pass
00232 
00233         # get data
00234         result_data = self._extract_result_key_data(json_result_list)
00235         if result_data != '':
00236             result_data = self._extract_lexical_data(result_data)
00237 
00238         result_text = result_data[0] if len(result_data) else ''
00239         return result_text
00240 
00241     @classmethod
00242     def _extract_result_key_data(cls, input_data):
00243         """ extract result data from server response
00244         @param input_data:
00245         @type  input_data: dict()
00246         @returns: extract result data from serer response
00247         @rtype: str or unicode
00248         """
00249         # get recognize result from input_data
00250         result_data = [
00251             result['results'] for result in input_data if 'results' in result
00252         ]
00253         if len(result_data) is 0:
00254             raise ext.InvalidResponseException(
00255                 'result key is not found. Input: %s' %
00256                 input_data
00257             )
00258 
00259         result_data = filter(lambda x: len(x), result_data)
00260         if len(result_data) is 0:
00261             return ''
00262 
00263         result_data = reduce(lambda a, b: a+b, result_data)
00264         return result_data
00265 
00266     @classmethod
00267     def _extract_lexical_data(cls, input_data):
00268         """ extract lexical data from server response
00269         @param input_data:
00270         @type  input_data: dict()
00271         @returns: extract lexical data from server response as result data
00272         @rtype: str or unicode
00273         """
00274         # get recognize result-text from result['lexical']
00275         result_data = [
00276             result['lexical'] for result in input_data if 'lexical' in result
00277         ]
00278         if len(result_data) is 0:
00279             raise ext.InvalidResponseException(
00280                 'lexical key is not found. Input: %s',
00281                 input_data
00282             )
00283         return result_data
00284 
00285     def support_streaming(self):
00286         """
00287         check support streaming
00288         @returns: True for support streaming / False for NOT support streaming
00289         """
00290         return False
00291 
00292     def add_streaming_packet(self, packet_type, packet_data):
00293         """
00294         add streaming packet
00295         @param packet_type:
00296         @type  packet_type: int
00297         @param packet_data:
00298         @type  packet_data: str
00299         """
00300         pass
00301 
00302     def register_streaming_cb(self, cb):
00303         """
00304         register streaming result callback
00305         @param cb:
00306         @type cb:
00307         """
00308         pass
00309 
00310     def unregister_streaming_cb(self, cb):
00311         """
00312         unregister streaming result callback
00313         @param cb:
00314         @type cb:
00315         """
00316         pass
00317 
00318     def set_streaming_config(self, language):
00319         """
00320         set streaming config
00321         @param language:
00322         @type  language: str
00323         """
00324         pass
00325 
00326     def join(self, timeout=None):
00327         """
00328         join streaming client
00329         @param timeout:
00330         @type  timeout:
00331         """
00332         pass
00333 
00334     @classmethod
00335     def _generate_id(cls):
00336         return str(uuid.uuid4()).replace('-', '')
00337 
00338