00001
00002
00003
00004
00005 import os
00006 import rospy
00007 import rospkg
00008 import subprocess
00009
00010
00011 from rospeex_msgs.msg import SpeechRecognitionRequest
00012 from rospeex_msgs.msg import SpeechSynthesisRequest
00013 from rospeex_msgs.msg import SpeechRecognitionResponse
00014 from rospeex_msgs.msg import SignalProcessingResponse
00015 from rospeex_msgs.msg import SpeechSynthesisResponse
00016 from rospeex_msgs.msg import SpeechSynthesisHeader
00017 from rospeex_msgs.msg import SpeechSynthesisState
00018 from rospeex_msgs.srv import SpeechRecognitionConfig
00019
00020
00021 class ROSpeexInterface(object):
00022 """
00023 class:
00024 ROSpeexInterface class
00025 brief:
00026 Provides rospeex interface for python.
00027 """
00028 _SR_REQUEST_TOPIC_NAME = 'sr_req'
00029 _SR_RESPONSE_TOPIC_NAME = 'sr_res'
00030 _SS_REQUEST_TOPIC_NAME = 'ss_req'
00031 _SS_RESPONSE_TOPIC_NAME = 'ss_res'
00032 _SPI_RESPONSE_TOPIC_NAME = 'spi_res'
00033 _SPI_STATE_TOPIC_NAME = 'ss_state'
00034 _SPI_CONFIG_SERVICE_NAME = 'spi_config'
00035 _NO_MESSAGE_WAV_FILENAME = 'nomessage.wav'
00036 _ACCEPT_MESSAGE_WAV_FILENAME = 'accept.wav'
00037
00038 TEXT_RANGE = [0, 100]
00039
00040 def __init__(self):
00041
00042
00043 self._sr_response = None
00044 self._ss_response = None
00045
00046
00047 self._pub_sr = None
00048 self._pub_ss = None
00049 self._pub_ss_state = None
00050
00051
00052 self._spi_config_srv = None
00053
00054
00055 self._ss_req_id = 0
00056 self._sr_req_id = 0
00057
00058
00059 self._spi_language = 'ja'
00060 self._spi_engine = 'nict'
00061
00062 def init(self, ss=True, sr=True, spi=True):
00063 """
00064 brief:
00065 Initializing rospeex.
00066 param[in]:
00067 ss: set true to enable speech synthesis
00068 sr: set true to enable speech recognition
00069 spi: set true to enable signal processing interface (waveform monitor)
00070 """
00071
00072 self._ss_enable = ss
00073 self._sr_enable = sr
00074 self._spi_enable = spi
00075
00076
00077 if self._sr_enable:
00078 rospy.loginfo('enable speech recognition.')
00079 self._pub_sr = rospy.Publisher(
00080 self._SR_REQUEST_TOPIC_NAME,
00081 SpeechRecognitionRequest,
00082 queue_size=5
00083 )
00084 rospy.Subscriber(
00085 self._SR_RESPONSE_TOPIC_NAME,
00086 SpeechRecognitionResponse,
00087 self._sr_response_callback
00088 )
00089
00090 if self._ss_enable:
00091 rospy.loginfo('enable speech synthesis.')
00092 self._pub_ss = rospy.Publisher(
00093 self._SS_REQUEST_TOPIC_NAME,
00094 SpeechSynthesisRequest,
00095 queue_size=5
00096 )
00097 rospy.Subscriber(
00098 self._SS_RESPONSE_TOPIC_NAME,
00099 SpeechSynthesisResponse,
00100 self._ss_response_callback
00101 )
00102
00103 if self._spi_enable:
00104 rospy.loginfo('enable signal processing interface.')
00105 self._pub_ss_state = rospy.Publisher(
00106 self._SPI_STATE_TOPIC_NAME,
00107 SpeechSynthesisState,
00108 queue_size=5
00109 )
00110
00111 rospy.Subscriber(
00112 self._SPI_RESPONSE_TOPIC_NAME,
00113 SignalProcessingResponse,
00114 self._spi_response_callback
00115 )
00116
00117 rospy.wait_for_service(self._SPI_CONFIG_SERVICE_NAME)
00118 self._spi_config_srv = rospy.ServiceProxy(
00119 self._SPI_CONFIG_SERVICE_NAME,
00120 SpeechRecognitionConfig
00121 )
00122
00123 def play_sound(self, sound_path):
00124 """
00125 brief:
00126 playing audio file.
00127 param[in]:
00128 sound_path: sound file path
00129 """
00130
00131 self._publish_ss_state(True)
00132
00133 cmd = ['aplay', '-q', sound_path]
00134 try:
00135 subprocess.check_call(cmd)
00136
00137 except subprocess.CalledProcessError as err:
00138 rospy.logwarn(str(err))
00139
00140 finally:
00141 self._publish_ss_state(False)
00142
00143 def _publish_ss_state(self, state):
00144 """
00145 brief:
00146 send ss state to spi node.
00147 param[in]:
00148 state: ss state flag.
00149 """
00150 if self._pub_ss_state:
00151 msg = SpeechSynthesisState()
00152 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00153 msg.header.engine = ''
00154 msg.header.voice_font = ''
00155 msg.header.language = ''
00156 msg.header.user = rospy.get_name()
00157 msg.header.request_id = ''
00158 msg.play_state = state
00159 self._pub_ss_state.publish(msg)
00160
00161 def _play_package_sound(self, file_name):
00162 """
00163 brief:
00164 playing audio file.
00165 param[in]:
00166 file_name: sound file name (file location: rospeex/sound)
00167 """
00168 rp = rospkg.RosPack()
00169 rospeex_dir = rp.get_path('rospeex_if')
00170 sound_path = os.path.join(rospeex_dir, 'sound', file_name)
00171 self.play_sound(sound_path)
00172
00173 def _spi_response_callback(self, response):
00174 """
00175 brief:
00176 Response from signal processing interface (wave monitor).
00177 param[in]:
00178 response: response from ros node
00179 """
00180 if self._spi_enable:
00181 self._sr_req_id += 1
00182 self._play_package_sound(self._ACCEPT_MESSAGE_WAV_FILENAME)
00183
00184 def _ss_response_callback(self, response):
00185 """
00186 brief:
00187 Response from speech synthesis node.
00188 param[in]:
00189 response: speech synthesis result audio file (wave)
00190 """
00191 if self._ss_response and response.header.user == rospy.get_name():
00192 self._ss_response(response.data)
00193
00194 def register_ss_response(self, func):
00195 """
00196 brief:
00197 Registering a speech synthesis callback function.
00198 param[in]:
00199 func: form: func(data)
00200
00201 data: speech synthesis result audio file (wave)
00202 """
00203 self._ss_response = func
00204
00205 def _sr_response_callback(self, response):
00206 """
00207 brief:
00208 Response from speech recognition node.
00209 param[in]:
00210 response: speech recognition result surface string
00211 """
00212 if self._sr_response:
00213 if response.header.user in (rospy.get_name(), 'spi'):
00214 self._sr_response(response.message)
00215
00216 if not response.message:
00217 self._play_package_sound(self._NO_MESSAGE_WAV_FILENAME)
00218
00219 def register_sr_response(self, func):
00220 """
00221 brief:
00222 Registering a speech recognition callback function.
00223 param[in]:
00224 func: form: func(message)
00225
00226 message: speech recognition result surface string
00227 """
00228 self._sr_response = func
00229
00230 def set_spi_config(self, language='ja', engine='nict'):
00231 """
00232 brief:
00233 Setting signal processing interface (wave monitor) parameters.
00234 param[in]:
00235 language: language setting (ja, en, zh, ko)
00236
00237 engine: speech recognition engine setting (nict or google)
00238
00239 nict: supports ja, en, zh, ko
00240
00241 google: supports ja, en
00242 """
00243 self._spi_language = language
00244 self._spi_engine = engine
00245 if self._spi_config_srv:
00246 self._spi_config_srv(self._spi_engine, self._spi_language)
00247
00248 def say(self, message, language='ja', engine='nict', voice_font='*', limit=True):
00249 """
00250 brief:
00251 Performs speech synthesis, and outputs to the speaker.
00252 param[in]:
00253 message: text for performing speech synthesis
00254
00255 language: language setting (ja, en, zh, ko)
00256
00257 engine: speech synthesis engine setting (nict or google)
00258
00259 nict: supports ja, en, zh, ko
00260
00261 google: supports ja, en
00262
00263 voice_font: voice font setting
00264
00265 nict(ja): F128 or F117
00266
00267 nict(en): EF007
00268
00269 nict(zh): CJF101
00270
00271 nict(ko): KF001
00272
00273 google(ja/en): None
00274
00275 limit: limit for message string length
00276
00277 (True:limit to 100 characters, False:no limit)
00278 """
00279 msg_check = True
00280 if limit:
00281 msg_check = self._check_text(message)
00282
00283 if self._ss_enable and msg_check:
00284 msg = SpeechSynthesisRequest()
00285 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00286 msg.header.engine = engine
00287 msg.header.voice_font = voice_font
00288 msg.header.language = language
00289 msg.header.user = rospy.get_name()
00290 msg.header.request_id = str(self._ss_req_id)
00291 msg.message = message
00292 msg.memo = ""
00293 self._pub_ss.publish(msg)
00294 self._ss_req_id += 1
00295
00296 else:
00297 rospy.loginfo('ss interface is disabled.')
00298
00299 def tts(self, message, file_name, language='ja', engine='nict', voice_font='*'):
00300 """
00301 brief:
00302 Performs speech synthesis, and outputs to the wave file.
00303 param[in]:
00304 message: text to perform speech synthesis
00305
00306 file_name: filepath to output
00307
00308 language: language setting (ja, en, zh, ko)
00309
00310 engine: speech synthesis engine setting (nict or google)
00311
00312 nict: supports ja, en, zh, ko
00313
00314 google: supports ja, en
00315
00316 voice_font: voice font setting
00317
00318 nict(ja): F128 or F117
00319
00320 nict(en): EF007
00321
00322 nict(zh): CJF101
00323
00324 nict(ko): KF001
00325
00326 google(ja/en): None
00327
00328 """
00329 if self._ss_enable:
00330 msg = SpeechSynthesisRequest()
00331 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_TTS
00332 msg.header.engine = engine
00333 msg.header.voice_font = voice_font
00334 msg.header.language = language
00335 msg.header.user = rospy.get_name()
00336 msg.header.request_id = str(self._ss_req_id)
00337 msg.message = message
00338 msg.memo = file_name
00339 self._pub_ss.publish(msg)
00340 self._ss_req_id += 1
00341
00342 else:
00343 rospy.loginfo('ss interface is disabled.')
00344
00345 def recognize(self, data, language='ja', engine='nict'):
00346 """
00347 brief:
00348 Recognizing voice data.
00349 param[in]:
00350 data: voice file (wave format: 16kHz, 16bit, mono, LE)
00351
00352 language: language setting (ja, en, zh, ko)
00353
00354 engine: speech recognition engine setting (nict or google)
00355
00356 nict: supports ja, en, zh, ko
00357
00358 google: supports ja, en
00359 """
00360 if self._sr_enable:
00361 msg = SpeechRecognitionRequest()
00362 msg.header.user = rospy.get_name()
00363 msg.header.request_id = str(self._sr_req_id)
00364 msg.header.language = language
00365 msg.header.engine = engine
00366 msg.data = data
00367 self._pub_sr.publish(msg)
00368 self._sr_req_id += 1
00369
00370 else:
00371 rospy.loginfo('sr interface is disabled.')
00372
00373 def _check_text(self, text):
00374 """
00375 brief:
00376 check tts text
00377 param[in]:
00378 text: input mesage
00379 """
00380 res = True
00381 input_text = text
00382 if type(text) is str:
00383 input_text = text.decode('utf-8')
00384
00385 if not len(input_text) in range(*self.TEXT_RANGE):
00386 msg = 'parameter failed. text length is not in range.'\
00387 'Except: {range[0]} <= text_length:{value}'\
00388 ' <= {range[1]}'.format(
00389 range=self.TEXT_RANGE,
00390 value=len(input_text)
00391 )
00392 rospy.logerr(msg)
00393 res = False
00394
00395 return res