00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 import rospy
00036 import actionlib
00037
00038 from dynamic_reconfigure.server import Server as ReConfServer
00039 import dynamic_reconfigure.client
00040 from nao_driver.cfg import nao_speechConfig as NodeConfig
00041 from nao_driver import NaoNode
00042 from naoqi import (ALBroker, ALProxy, ALModule)
00043
00044 from std_msgs.msg import( String )
00045 from std_srvs.srv import( Empty, EmptyResponse )
00046 from nao_msgs.msg import(
00047 WordRecognized,
00048 SetSpeechVocabularyGoal,
00049 SetSpeechVocabularyResult,
00050 SetSpeechVocabularyAction,
00051 SpeechWithFeedbackGoal,
00052 SpeechWithFeedbackResult,
00053 SpeechWithFeedbackFeedback,
00054 SpeechWithFeedbackAction )
00055
00056
00057 class Constants:
00058 NODE_NAME = "nao_speech"
00059 EVENT = "WordRecognized"
00060 TEXT_STARTED_EVENT = "ALTextToSpeech/TextStarted"
00061 TEXT_DONE_EVENT = "ALTextToSpeech/TextDone"
00062
00063
00064 class Util:
00065 @staticmethod
00066 def parse_vocabulary( vocabulary ):
00067
00068 vocabulary_list = vocabulary.split("/")
00069
00070 vocabulary_list = [ entry.strip() for entry in vocabulary_list]
00071
00072 return filter(None, vocabulary_list)
00073
00074
00075 @staticmethod
00076 def to_naoqi_name(name):
00077 return "ros{}_{}".format(
00078 name.replace("/", "_"),
00079 rospy.Time.now().to_sec() )
00080
00081 class DummyAudioDevice:
00082 def getOutputVolume(self):
00083 return 0
00084
00085 def setOutputVolume(self, vol):
00086 pass
00087
00088 class NaoSpeech(ALModule, NaoNode):
00089
00090 def __init__( self, moduleName ):
00091
00092 NaoNode.__init__(self, Constants.NODE_NAME )
00093
00094
00095 self.moduleName = moduleName
00096
00097 self.ip = ""
00098 self.port = 0
00099 self.init_almodule()
00100
00101
00102 self.speech_with_feedback_flag = False
00103
00104
00105 self.conf = None
00106
00107
00108
00109 self.srw = None
00110
00111
00112 self.subscribe()
00113
00114
00115 self.reconf_server = ReConfServer(NodeConfig, self.reconfigure)
00116
00117 self.reconf_client = dynamic_reconfigure.client.Client(Constants.NODE_NAME)
00118
00119
00120 self.sub = rospy.Subscriber("speech", String, self.say )
00121
00122
00123 self.pub = rospy.Publisher("word_recognized", WordRecognized, queue_size=10)
00124
00125
00126 self.start_srv = rospy.Service(
00127 "start_recognition",
00128 Empty,
00129 self.start )
00130
00131 self.stop_srv = rospy.Service(
00132 "stop_recognition",
00133 Empty,
00134 self.stop )
00135
00136
00137 self.setSpeechVocabularyServer = actionlib.SimpleActionServer("speech_vocabulary_action", SetSpeechVocabularyAction,
00138 execute_cb=self.executeSpeechVocabularyAction,
00139 auto_start=False)
00140
00141
00142 self.speechWithFeedbackServer = actionlib.SimpleActionServer("speech_action", SpeechWithFeedbackAction,
00143 execute_cb=self.executeSpeechWithFeedbackAction,
00144 auto_start=False)
00145
00146 self.setSpeechVocabularyServer.start()
00147 self.speechWithFeedbackServer.start()
00148
00149 def init_almodule(self):
00150
00151 rospy.loginfo("Connecting to NaoQi at %s:%d", self.pip, self.pport)
00152 try:
00153 self.broker = ALBroker("%sBroker" % self.moduleName, self.ip, self.port, self.pip, self.pport)
00154 except RuntimeError,e:
00155 print("Could not connect to NaoQi's main broker")
00156 exit(1)
00157 ALModule.__init__(self, self.moduleName)
00158
00159 self.memProxy = ALProxy("ALMemory",self.pip,self.pport)
00160
00161 if self.memProxy is None:
00162 rospy.logerr("Could not get a proxy to ALMemory on %s:%d", self.pip, self.pport)
00163 exit(1)
00164
00165 self.tts = self.get_proxy("ALTextToSpeech")
00166
00167 if self.tts is None:
00168 rospy.logerr("Could not get a proxy to ALTextToSpeech on %s:%d", self.pip, self.pport)
00169 exit(1)
00170
00171 self.audio = self.get_proxy("ALAudioDevice")
00172 if self.audio is None:
00173
00174
00175 rospy.logwarn("Proxy to ALAudioDevice not available, using dummy device (normal in simulation; volume controls disabled)")
00176 self.audio = DummyAudioDevice()
00177
00178 def subscribe(self):
00179
00180 self.memProxy.subscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName, "onTextDone")
00181 self.memProxy.subscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName, "onTextStarted")
00182
00183 def unsubscribe(self):
00184 self.memProxy.unsubscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName)
00185 self.memProxy.unsubscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName)
00186
00187 def onTextStarted(self, strVarName, value, strMessage):
00188
00189
00190 if value == 0 or self.speech_with_feedback_flag == False:
00191 return
00192
00193
00194 fb = SpeechWithFeedbackFeedback()
00195 self.speechWithFeedbackServer.publish_feedback(fb)
00196
00197 def onTextDone(self, strVarName, value, strMessage):
00198
00199
00200 if value == 0 or self.speech_with_feedback_flag == False:
00201 return
00202
00203
00204
00205 self.speech_with_feedback_flag = False
00206
00207
00208 def executeSpeechWithFeedbackAction(self, goal):
00209
00210 self.speech_with_feedback_flag = True
00211 saystr = goal.say
00212 self.internalSay(saystr)
00213
00214
00215 counter = 0
00216 while self.speech_with_feedback_flag == True and counter < 1200:
00217 rospy.sleep(0.1)
00218 counter += 1
00219
00220
00221 self.speechWithFeedbackServer.set_succeeded()
00222
00223 def executeSpeechVocabularyAction(self, goal):
00224
00225 rospy.loginfo("SetSpeechVocabulary action executing");
00226
00227 words = goal.words
00228 words_str = ""
00229
00230
00231 if len(words) == 0:
00232 setVocabularyResult = SetSpeechVocabularyResult()
00233 setVocabularyResult.success = False
00234 self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00235 return
00236
00237
00238 for i in range(0, len(words) - 1):
00239 words_str += str(words[i]) + "/"
00240
00241 words_str += words[len(words) - 1]
00242
00243
00244 params = { 'vocabulary' : words_str }
00245 self.reconf_client.update_configuration(params)
00246
00247
00248 setVocabularyResult = SetSpeechVocabularyResult()
00249 setVocabularyResult.success = True
00250 self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00251
00252
00253 def reconfigure( self, request, level ):
00254 newConf = {}
00255
00256
00257 newConf["voice"] = request["voice"]
00258 newConf["language"] = request["language"]
00259 newConf["volume"] = request["volume"]
00260 newConf["vocabulary"] = request["vocabulary"]
00261 newConf["audio_expression"] = request["audio_expression"]
00262 newConf["visual_expression"] = request["visual_expression"]
00263 newConf["word_spotting"] = request["word_spotting"]
00264
00265
00266 if not newConf["voice"]:
00267 newConf["voice"] = self.tts.getVoice()
00268 elif newConf["voice"] not in self.tts.getAvailableVoices():
00269 rospy.logwarn(
00270 "Unknown voice '{}'. Using current voice instead".format(
00271 newConf["voice"] ) )
00272 rospy.loginfo("Voices available: {}".format(
00273 self.tts.getAvailableVoices()))
00274 newConf["voice"] = self.tts.getVoice()
00275
00276 if not newConf["language"]:
00277 newConf["language"] = self.tts.getLanguage()
00278 elif newConf["language"] not in self.tts.getAvailableLanguages():
00279 newConf["language"] = self.tts.getLanguage()
00280 rospy.logwarn(
00281 "Unknown language '{}'. Using current language instead".format(
00282 newConf["language"] ) )
00283 rospy.loginfo("Languages available: {}".format(
00284 self.tts.getAvailableLanguages()))
00285
00286
00287 if not self.conf and not rospy.has_param("~volume"):
00288 newConf["volume"] = self.audio.getOutputVolume()
00289
00290
00291 if self.srw and not Util.parse_vocabulary(newConf["vocabulary"]):
00292 rospy.logwarn("Empty vocabulary. Using current vocabulary instead")
00293 newConf["vocabulary"] = self.conf["vocabulary"]
00294
00295
00296 if self.srw and self.conf and (
00297 newConf["language"] != self.conf["language"] or
00298 newConf["vocabulary"] != self.conf["language"] or
00299 newConf["audio_expression"] != self.conf["audio_expression"] or
00300 newConf["visual_expression"] != self.conf["visual_expression"] or
00301 newConf["word_spotting"] != self.conf["word_spotting"] ):
00302 need_to_restart_speech = True
00303 else:
00304 need_to_restart_speech = False
00305
00306 self.conf = newConf
00307
00308
00309 if need_to_restart_speech:
00310 self.stop()
00311 self.start()
00312
00313 return self.conf
00314
00315
00316
00317 def say( self, request ):
00318 self.internalSay(request.data)
00319
00320
00321
00322 def internalSay( self, sentence ):
00323
00324 current_voice = self.tts.getVoice()
00325 current_language = self.tts.getLanguage()
00326 current_volume = self.audio.getOutputVolume()
00327 current_gain = self.tts.getVolume()
00328 target_gain = 1.0
00329
00330
00331 if self.conf["voice"] != current_voice:
00332 self.tts.setVoice( self.conf["voice"] )
00333
00334 if self.conf["language"] != current_language:
00335 self.tts.setLanguage( self.conf["language"] )
00336
00337 if self.conf["volume"] != current_volume:
00338 self.audio.setOutputVolume( self.conf["volume"] )
00339
00340 if target_gain != current_gain:
00341 self.tts.setVolume(target_gain)
00342
00343
00344 self.tts.say( sentence )
00345
00346
00347 if self.conf["voice"] != current_voice:
00348 self.tts.setVoice( current_voice )
00349
00350 if self.conf["language"] != current_language:
00351 self.tts.setLanguage( current_language )
00352
00353 if self.conf["volume"] != current_volume:
00354 self.audio.setOutputVolume( current_volume )
00355
00356 if target_gain != current_gain:
00357 self.tts.setVolume(current_gain)
00358
00359
00360 def start( self, request = None ):
00361 if self.srw:
00362 rospy.logwarn("Speech recognition already started. Restarting.")
00363 self.srw.close()
00364
00365 if Util.parse_vocabulary( self.conf["vocabulary"] ):
00366 self.srw = SpeechRecognitionWrapper(
00367 self.pip,
00368 self.pport,
00369 self.pub,
00370 self.conf )
00371 else:
00372 rospy.logwarn("Empty vocabulary. Ignoring request.")
00373
00374 return EmptyResponse()
00375
00376 def stop( self, request = None ):
00377 if not self.srw:
00378 rospy.logerr("Speech recognition was not started")
00379 else:
00380 self.srw.stop()
00381 self.srw = None
00382
00383 return EmptyResponse()
00384
00385 def shutdown(self):
00386 self.unsubscribe()
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396 class SpeechRecognitionWrapper(ALModule):
00397
00398 """ROS wrapper for Naoqi speech recognition"""
00399 def __init__(self, ip, port, publisher, config):
00400
00401
00402
00403 self.naoqi_name = Util.to_naoqi_name( rospy.get_name() )
00404
00405
00406 self.broker = ALBroker(self.naoqi_name + "_broker",
00407 "0.0.0.0",
00408 0,
00409 ip,
00410 port )
00411
00412
00413
00414 ALModule.__init__( self, self.naoqi_name )
00415
00416
00417 self.memory = ALProxy("ALMemory")
00418 self.proxy = ALProxy("ALSpeechRecognition")
00419
00420
00421 self.pub = publisher
00422
00423
00424 self.install_naoqi_globals()
00425
00426
00427 subscribers = self.memory.getSubscribers(Constants.EVENT)
00428 if subscribers:
00429 rospy.logwarn("Speech recognition already in use by another node")
00430 for module in subscribers:
00431 self.stop(module)
00432
00433
00434 self.reconfigure(config)
00435
00436
00437 rospy.loginfo("Subscribing '{}' to NAO speech recognition".format(
00438 self.naoqi_name) )
00439 self.memory.subscribeToEvent(
00440 Constants.EVENT,
00441 self.naoqi_name,
00442 self.on_word_recognised.func_name )
00443
00444
00445
00446 def install_naoqi_globals(self):
00447 globals()[self.naoqi_name] = self
00448 globals()["memory"] = self.memory
00449
00450
00451 def reconfigure(self, config):
00452 self.proxy.setLanguage( config["language"] )
00453 self.proxy.setAudioExpression( config["audio_expression"] )
00454 self.proxy.setVisualExpression( config["visual_expression"] )
00455 self.proxy.setVocabulary(
00456 Util.parse_vocabulary( config["vocabulary"] ),
00457 config["word_spotting"] )
00458
00459
00460 def stop(self, module = None):
00461 if module is None:
00462 module = self.naoqi_name
00463
00464 rospy.loginfo("Unsubscribing '{}' from NAO speech recognition".format(
00465 module))
00466 try:
00467 self.memory.unsubscribeToEvent( Constants.EVENT, module )
00468 except RuntimeError:
00469 rospy.logwarn("Could not unsubscribe from NAO speech recognition")
00470
00471
00472 def on_word_recognised(self, key, value, subscriber_id ):
00473 """Publish the words recognized by NAO via ROS """
00474
00475
00476 temp_dict = dict( value[i:i+2] for i in range(0, len(value), 2) )
00477
00478
00479 if '' in temp_dict:
00480 del(temp_dict[''])
00481
00482 self.pub.publish(WordRecognized( temp_dict.keys(), temp_dict.values() ))
00483
00484
00485 if __name__ == '__main__':
00486
00487 ROSNaoSpeechModule = NaoSpeech("ROSNaoSpeechModule")
00488 rospy.loginfo( "ROSNaoSpeechModule running..." )
00489
00490 rospy.spin()
00491
00492 rospy.loginfo("Stopping ROSNaoSpeechModule ...")
00493
00494 if ROSNaoSpeechModule.srw:
00495 ROSNaoSpeechModule.srw.stop()
00496
00497 ROSNaoSpeechModule.shutdown();
00498 rospy.loginfo("ROSNaoSpeechModule stopped.")
00499
00500 exit(0)