00001
00002
00003
00004
00005 try:
00006 import roslib; roslib.load_manifest('rospeex_if')
00007 except:
00008 pass
00009
00010 import os
00011 import rospy
00012 import rospkg
00013 import subprocess
00014
00015
00016 from rospeex_msgs.msg import SpeechRecognitionRequest
00017 from rospeex_msgs.msg import SpeechSynthesisRequest
00018 from rospeex_msgs.msg import SpeechRecognitionResponse
00019 from rospeex_msgs.msg import SpeechSynthesisResponse
00020 from rospeex_msgs.msg import SignalProcessingResponse
00021 from rospeex_msgs.msg import SpeechSynthesisHeader
00022 from rospeex_msgs.msg import SpeechSynthesisState
00023
00024 class ROSpeexInterface(object):
00025 """
00026 class:
00027 ROSpeexInterface class
00028 brief:
00029 Provides rospeex interface for python.
00030 """
00031 _SR_REQUEST_TOPIC_NAME = 'sr_req'
00032 _SR_RESPONSE_TOPIC_NAME = 'sr_res'
00033 _SS_REQUEST_TOPIC_NAME = 'ss_req'
00034 _SS_RESPONSE_TOPIC_NAME = 'ss_res'
00035 _SPI_RESPONSE_TOPIC_NAME = 'spi_res'
00036 _SPI_STATE_TOPIC_NAME = 'ss_state'
00037 _NO_MESSAGE_WAV_FILENAME = 'nomessage.wav'
00038 _ACCEPT_MESSAGE_WAV_FILENAME = 'accept.wav'
00039
00040 def __init__(self):
00041
00042
00043 self._sr_response = None
00044 self._ss_response = None
00045
00046
00047 self._pub_sr = None
00048 self._pub_ss = None
00049 self._pub_ss_state = None
00050
00051
00052 self._ss_req_id = 0
00053 self._sr_req_id = 0
00054
00055 self._sr_queue_num = 0
00056 self._sr_queue_disp = False
00057
00058
00059 self._spi_language = 'ja'
00060 self._spi_engine = 'nict'
00061
00062 def init(self, ss=True, sr=True, spi=True):
00063 """
00064 brief:
00065 Initializing rospeex.
00066 param[in]:
00067 ss: set true to enable speech synthesis
00068
00069 sr: set true to enable speech recognition
00070
00071 spi: set true to enable signal processing interface (waveform monitor)
00072 """
00073
00074 self._ss_enable = ss
00075 self._sr_enable = sr
00076 self._spi_enable = spi
00077
00078
00079 if self._sr_enable == True:
00080 rospy.loginfo('enable speech recognition.')
00081 self._pub_sr = rospy.Publisher(self._SR_REQUEST_TOPIC_NAME, SpeechRecognitionRequest)
00082 rospy.Subscriber(self._SR_RESPONSE_TOPIC_NAME, SpeechRecognitionResponse, self._sr_response_callback)
00083
00084 if self._ss_enable == True:
00085 rospy.loginfo('enable speech synthesis.')
00086 self._pub_ss = rospy.Publisher(self._SS_REQUEST_TOPIC_NAME, SpeechSynthesisRequest)
00087 rospy.Subscriber(self._SS_RESPONSE_TOPIC_NAME, SpeechSynthesisResponse, self._ss_response_callback)
00088
00089 if self._spi_enable == True:
00090 rospy.loginfo('enable signal processing interface.')
00091 self._pub_ss_state = rospy.Publisher(self._SPI_STATE_TOPIC_NAME, SpeechSynthesisState)
00092 rospy.Subscriber(self._SPI_RESPONSE_TOPIC_NAME, SignalProcessingResponse, self._spi_response_callback)
00093
00094
00095 def play_sound(self, sound_path):
00096 """
00097 brief:
00098 playing audio file.
00099 param[in]:
00100 sound_path: sound file path
00101 """
00102
00103 self._publish_ss_state(True)
00104
00105 cmd = ['aplay', '-q', sound_path]
00106 try:
00107 subprocess.check_call(cmd)
00108 except subprocess.CalledProcessError as err:
00109 rospy.logwarn(str(err))
00110 finally:
00111 self._publish_ss_state(False)
00112
00113
00114 def _publish_ss_state(self, state):
00115 """
00116 brief:
00117 send ss state to spi node.
00118 param[in]:
00119 state: ss state flag.
00120 """
00121 if self._pub_ss_state:
00122 msg = SpeechSynthesisState()
00123 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00124 msg.header.engine = ''
00125 msg.header.voice_font = ''
00126 msg.header.language = ''
00127 msg.header.user = rospy.get_name()
00128 msg.header.request_id = ''
00129 msg.play_state = state
00130 self._pub_ss_state.publish(msg)
00131
00132
00133 def _play_package_sound(self, file_name):
00134 """
00135 brief:
00136 playing audio file.
00137 param[in]:
00138 file_name: sound file name (file location: rospeex/sound)
00139 """
00140 rp = rospkg.RosPack()
00141 rospeex_dir = rp.get_path('rospeex_if')
00142 sound_path = os.path.join(rospeex_dir, 'sound', file_name)
00143 self.play_sound(sound_path)
00144
00145
00146 def _spi_response_callback(self, response):
00147 """
00148 brief:
00149 Response from signal processing interface (wave monitor).
00150 param[in]:
00151 response: response from ros node
00152 """
00153
00154 if self._spi_enable == True and self._pub_sr != None:
00155
00156 msg = SpeechRecognitionRequest()
00157 msg.header.user = rospy.get_name()
00158 msg.header.request_id = str(self._sr_req_id)
00159 msg.header.language = self._spi_language
00160 msg.header.engine = self._spi_engine
00161 msg.data = response.data
00162 self._pub_sr.publish(msg)
00163 self._sr_req_id += 1
00164 self._sr_queue_num += 1
00165 if self._sr_queue_num > 1:
00166 self._sr_queue_disp = True
00167 if self._sr_queue_disp == True:
00168 rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00169
00170
00171 self._play_package_sound("accept.wav")
00172
00173 def _ss_response_callback(self, response):
00174 """
00175 brief:
00176 Response from speech synthesis node.
00177 param[in]:
00178 response: speech synthesis result audio file (wave)
00179 """
00180 if self._ss_response and response.header.user == rospy.get_name():
00181 self._ss_response(response.data)
00182
00183
00184 def register_ss_response(self, func):
00185 """
00186 brief:
00187 Registering a speech synthesis callback function.
00188 param[in]:
00189 func: form: func(data)
00190
00191 data: speech synthesis result audio file (wave)
00192 """
00193 self._ss_response = func
00194
00195
00196 def _sr_response_callback(self, response):
00197 """
00198 brief:
00199 Response from speech recognition node.
00200 param[in]:
00201 response: speech recognition result surface string
00202 """
00203
00204 self._sr_queue_num -= 1
00205 if self._sr_queue_disp == True:
00206 rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00207 if self._sr_queue_num == 0:
00208 self._sr_queue_disp = False
00209
00210 if self._sr_response and response.header.user == rospy.get_name():
00211 self._sr_response(response.message)
00212
00213 if not response.message:
00214 self._play_package_sound(self._NO_MESSAGE_WAV_FILENAME)
00215 elif self._spi_enable != True:
00216 self._play_package_sound(self._ACCEPT_MESSAGE_WAV_FILENAME)
00217
00218
00219 def register_sr_response(self, func):
00220 """
00221 brief:
00222 Registering a speech recognition callback function.
00223 param[in]:
00224 func: form: func(message)
00225
00226 message: speech recognition result surface string
00227 """
00228 self._sr_response = func
00229
00230
00231 def set_spi_config(self, language='ja', engine='nict'):
00232 """
00233 brief:
00234 Setting signal processing interface (wave monitor) parameters.
00235 param[in]:
00236 language: language setting (ja, en, zh, ko)
00237
00238 engine: speech recognition engine setting (nict or google)
00239
00240 nict: supports ja, en, zh, ko
00241
00242 google: supports ja, en
00243 """
00244 self._spi_language = language
00245 self._spi_engine = engine
00246
00247
00248 def say(self, message, language='ja', engine='nict', voice_font='*'):
00249 """
00250 brief:
00251 Performs speech synthesis, and outputs to the speaker.
00252 param[in]:
00253 message: text for performing speech synthesis
00254
00255 language: language setting (ja, en, zh, ko)
00256
00257 engine: speech synthesis engine setting (nict or google or voicetext)
00258
00259 nict: supports ja, en, zh, ko
00260
00261 google: supports ja, en
00262
00263 voicetext: supports ja
00264
00265 voice_font: voice font setting
00266
00267 nict(ja): F128 or F117
00268
00269 nict(en): EF007
00270
00271 nict(zh): CJF101
00272
00273 nict(ko): KF001
00274
00275 google(ja/en): None
00276
00277 voicetext(ja): show, haruka, hikari, takeru, santa, bear
00278 """
00279 if self._ss_enable == True:
00280 msg = SpeechSynthesisRequest()
00281 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00282 msg.header.engine = engine
00283 msg.header.voice_font = voice_font
00284 msg.header.language = language
00285 msg.header.user = rospy.get_name()
00286 msg.header.request_id = str(self._ss_req_id)
00287 msg.message = message
00288 msg.memo = ""
00289 self._pub_ss.publish(msg)
00290 self._ss_req_id += 1
00291 else:
00292 rospy.loginfo('ss interface is disabled.')
00293
00294
00295 def tts(self, message, file_name, language='ja', engine='nict', voice_font='*'):
00296 """
00297 brief:
00298 Performs speech synthesis, and outputs to the wave file.
00299 param[in]:
00300 message: text to perform speech synthesis
00301
00302 file_name: filepath to output
00303
00304 language: language setting (ja, en, zh, ko)
00305
00306 engine: speech synthesis engine setting (nict or google)
00307
00308 nict: supports ja, en, zh, ko
00309
00310 google: supports ja, en
00311
00312 voice_font: voice font setting
00313
00314 nict(ja): F128 or F117
00315
00316 nict(en): EF007
00317
00318 nict(zh): CJF101
00319
00320 nict(ko): KF001
00321
00322 google(ja/en): None
00323
00324 """
00325 if self._ss_enable == True:
00326 msg = SpeechSynthesisRequest()
00327 msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_TTS
00328 msg.header.engine = engine
00329 msg.header.voice_font = voice_font
00330 msg.header.language = language
00331 msg.header.user = rospy.get_name()
00332 msg.header.request_id = str(self._ss_req_id)
00333 msg.message = message
00334 msg.memo = file_name
00335 self._pub_ss.publish(msg)
00336 self._ss_req_id += 1
00337 else:
00338 rospy.loginfo('ss interface is disabled.')
00339
00340
00341 def recognize(self, data, language='ja', engine='nict'):
00342 """
00343 brief:
00344 Recognizing voice data.
00345 param[in]:
00346 data: voice file (wave format: 16kHz, 16bit, mono, LE)
00347
00348 language: language setting (ja, en, zh, ko)
00349
00350 engine: speech recognition engine setting (nict or google)
00351
00352 nict: supports ja, en, zh, ko
00353
00354 google: supports ja, en
00355 """
00356 if self._sr_enable == True:
00357 msg = SpeechRecognitionRequest()
00358 msg.header.user = rospy.get_name()
00359 msg.header.request_id = str(self._sr_req_id)
00360 msg.header.language = language
00361 msg.header.engine = engine
00362 msg.data = data
00363 self._pub_sr.publish(msg)
00364 self._sr_req_id += 1
00365 self._sr_queue_num += 1
00366 if self._sr_queue_num > 1:
00367 self._sr_queue_disp = True
00368 if self._sr_queue_disp == True:
00369 rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00370 else:
00371 rospy.loginfo('sr interface is disabled.')
00372