00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 import rospy
00036 import actionlib
00037 
00038 from dynamic_reconfigure.server import Server as ReConfServer
00039 import dynamic_reconfigure.client
00040 from naoqi_driver.cfg import NaoqiSpeechConfig as NodeConfig
00041 from naoqi_driver.naoqi_node import NaoqiNode
00042 from naoqi import (ALBroker, ALProxy, ALModule)
00043 
00044 from std_msgs.msg import( String )
00045 from std_srvs.srv import( Empty, EmptyResponse )
00046 from naoqi_bridge_msgs.msg import(
00047     WordRecognized,
00048     SetSpeechVocabularyGoal,
00049     SetSpeechVocabularyResult,
00050     SetSpeechVocabularyAction,
00051     SpeechWithFeedbackGoal,
00052     SpeechWithFeedbackResult,
00053     SpeechWithFeedbackFeedback,
00054     SpeechWithFeedbackAction )
00055 
00056 
00057 class Constants:
00058     NODE_NAME = "nao_speech"
00059     EVENT = "WordRecognized"
00060     TEXT_STARTED_EVENT = "ALTextToSpeech/TextStarted"
00061     TEXT_DONE_EVENT = "ALTextToSpeech/TextDone"
00062 
00063 
00064 class Util:
00065     @staticmethod
00066     def parse_vocabulary( vocabulary ):
00067         
00068         vocabulary_list = vocabulary.split("/")
00069         
00070         vocabulary_list = [ entry.strip() for entry in vocabulary_list]
00071         
00072         return filter(None, vocabulary_list)
00073 
00074     
00075     @staticmethod
00076     def to_naoqi_name(name):
00077         return "ros{}_{}".format(
00078             name.replace("/", "_"),
00079             rospy.Time.now().to_sec() )
00080 
00081 class DummyAudioDevice:
00082     def getOutputVolume(self):
00083         return 0
00084 
00085     def setOutputVolume(self, vol):
00086         pass
00087 
00088 class NaoSpeech(ALModule, NaoqiNode):
00089 
00090     def __init__( self, moduleName ):
00091         
00092         NaoqiNode.__init__(self, Constants.NODE_NAME )
00093 
00094         
00095         self.moduleName = moduleName
00096         
00097         self.ip = ""
00098         self.port = 0
00099         self.init_almodule()
00100 
00101         
00102         self.speech_with_feedback_flag = False
00103 
00104         
00105         self.conf = None
00106 
00107         
00108         
00109         self.srw = None
00110 
00111         
00112         self.subscribe()
00113 
00114         
00115         self.reconf_server = ReConfServer(NodeConfig, self.reconfigure)
00116         
00117         self.reconf_client = dynamic_reconfigure.client.Client(Constants.NODE_NAME)
00118 
00119         
00120         self.sub = rospy.Subscriber("speech", String, self.say )
00121 
00122         
00123         self.pub = rospy.Publisher("word_recognized", WordRecognized )
00124 
00125         
00126         self.start_srv = rospy.Service(
00127             "start_recognition",
00128             Empty,
00129             self.start )
00130 
00131         self.stop_srv = rospy.Service(
00132             "stop_recognition",
00133             Empty,
00134             self.stop )
00135 
00136         
00137         self.setSpeechVocabularyServer = actionlib.SimpleActionServer("speech_vocabulary_action", SetSpeechVocabularyAction,
00138                                                                   execute_cb=self.executeSpeechVocabularyAction,
00139                                                                   auto_start=False)
00140 
00141         
00142         self.speechWithFeedbackServer = actionlib.SimpleActionServer("speech_action", SpeechWithFeedbackAction,
00143                                                                   execute_cb=self.executeSpeechWithFeedbackAction,
00144                                                                   auto_start=False)
00145         
00146         self.setSpeechVocabularyServer.start()
00147         self.speechWithFeedbackServer.start()
00148 
00149     def init_almodule(self):
00150         
00151         rospy.loginfo("Connecting to NaoQi at %s:%d", self.pip, self.pport)
00152         try:
00153             self.broker = ALBroker("%sBroker" % self.moduleName, self.ip, self.port, self.pip, self.pport)
00154         except RuntimeError,e:
00155             print("Could not connect to NaoQi's main broker")
00156             exit(1)
00157         ALModule.__init__(self, self.moduleName)
00158 
00159         self.memProxy = ALProxy("ALMemory",self.pip,self.pport)
00160         
00161         if self.memProxy is None:
00162             rospy.logerr("Could not get a proxy to ALMemory on %s:%d", self.pip, self.pport)
00163             exit(1)
00164 
00165         self.tts = self.get_proxy("ALTextToSpeech")
00166         
00167         if self.tts is None:
00168             rospy.logerr("Could not get a proxy to ALTextToSpeech on %s:%d", self.pip, self.pport)
00169             exit(1)
00170 
00171         self.audio = self.get_proxy("ALAudioDevice")
00172         if self.audio is None:
00173             
00174             
00175             rospy.logwarn("Proxy to ALAudioDevice not available, using dummy device (normal in simulation; volume controls disabled)")
00176             self.audio = DummyAudioDevice()
00177 
00178     def subscribe(self):
00179         
00180         self.memProxy.subscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName, "onTextDone")
00181         self.memProxy.subscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName, "onTextStarted")
00182 
00183     def unsubscribe(self):
00184         self.memProxy.unsubscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName)
00185         self.memProxy.unsubscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName)
00186 
00187     def onTextStarted(self, strVarName, value, strMessage):
00188         
00189         
00190         if value == 0 or self.speech_with_feedback_flag == False:
00191             return
00192 
00193         
00194         fb = SpeechWithFeedbackFeedback()
00195         self.speechWithFeedbackServer.publish_feedback(fb)
00196 
00197     def onTextDone(self, strVarName, value, strMessage):
00198         
00199         
00200         if value == 0 or self.speech_with_feedback_flag == False:
00201             return
00202 
00203         
00204         
00205         self.speech_with_feedback_flag = False
00206 
00207 
00208     def executeSpeechWithFeedbackAction(self, goal):
00209         
00210         self.speech_with_feedback_flag = True
00211         saystr = goal.say
00212         self.internalSay(saystr)
00213 
00214         
00215         counter = 0
00216         while self.speech_with_feedback_flag == True and counter < 1200:
00217             rospy.sleep(0.1)
00218             counter += 1
00219 
00220         
00221         self.speechWithFeedbackServer.set_succeeded()
00222 
00223     def executeSpeechVocabularyAction(self, goal):
00224         
00225         rospy.loginfo("SetSpeechVocabulary action executing");
00226 
00227         words = goal.words
00228         words_str = ""
00229 
00230         
00231         if len(words) == 0:
00232             setVocabularyResult = SetSpeechVocabularyResult()
00233             setVocabularyResult.success = False
00234             self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00235             return
00236 
00237         
00238         for i in range(0, len(words) - 1):
00239             words_str += str(words[i]) + "/"
00240 
00241         words_str += words[len(words) - 1]
00242 
00243         
00244         params = { 'vocabulary' : words_str }
00245         self.reconf_client.update_configuration(params)
00246 
00247         
00248         setVocabularyResult = SetSpeechVocabularyResult()
00249         setVocabularyResult.success = True
00250         self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00251 
00252     
00253     def reconfigure( self, request, level ):
00254         newConf = {}
00255 
00256         
00257         newConf["voice"] = request["voice"]
00258         newConf["language"] = request["language"]
00259         newConf["volume"] = request["volume"]
00260         newConf["vocabulary"] = request["vocabulary"]
00261         newConf["audio_expression"] = request["audio_expression"]
00262         newConf["visual_expression"] = request["visual_expression"]
00263         newConf["word_spotting"] = request["word_spotting"]
00264 
00265         
00266         if not newConf["voice"]:
00267             newConf["voice"] = self.tts.getVoice()
00268         elif newConf["voice"] not in self.tts.getAvailableVoices():
00269             rospy.logwarn(
00270                 "Unknown voice '{}'. Using current voice instead".format(
00271                     newConf["voice"] ) )
00272             rospy.loginfo("Voices available: {}".format(
00273                 self.tts.getAvailableVoices()))
00274             newConf["voice"] = self.tts.getVoice()
00275 
00276         if not newConf["language"]:
00277             newConf["language"] = self.tts.getLanguage()
00278         elif newConf["language"] not in self.tts.getAvailableLanguages():
00279             newConf["language"] = self.tts.getLanguage()
00280             rospy.logwarn(
00281                 "Unknown language '{}'. Using current language instead".format(
00282                     newConf["language"] ) )
00283             rospy.loginfo("Languages available: {}".format(
00284                 self.tts.getAvailableLanguages()))
00285 
00286         
00287         if not self.conf and not rospy.has_param("~volume"):
00288             newConf["volume"] = self.audio.getOutputVolume()
00289 
00290         
00291         if self.srw and not Util.parse_vocabulary(newConf["vocabulary"]):
00292             rospy.logwarn("Empty vocabulary. Using current vocabulary instead")
00293             newConf["vocabulary"] = self.conf["vocabulary"]
00294 
00295         
00296         if self.srw and self.conf and (
00297             newConf["language"] != self.conf["language"] or
00298             newConf["vocabulary"] != self.conf["language"] or
00299             newConf["audio_expression"] != self.conf["audio_expression"] or
00300             newConf["visual_expression"] != self.conf["visual_expression"] or
00301             newConf["word_spotting"] != self.conf["word_spotting"] ):
00302             need_to_restart_speech = True
00303         else:
00304             need_to_restart_speech = False
00305 
00306         self.conf = newConf
00307 
00308         
00309         if need_to_restart_speech:
00310             self.stop()
00311             self.start()
00312 
00313         return self.conf
00314 
00315 
00316     
00317     def say( self, request ):
00318         self.internalSay(request.data)
00319 
00320     
00321     
00322     def internalSay( self, sentence ):
00323         
00324         current_voice = self.tts.getVoice()
00325         current_language = self.tts.getLanguage()
00326         current_volume = self.audio.getOutputVolume()
00327         current_gain = self.tts.getVolume()
00328         target_gain = 1.0
00329 
00330         
00331         if self.conf["voice"] != current_voice:
00332             self.tts.setVoice( self.conf["voice"] )
00333 
00334         if self.conf["language"] != current_language:
00335             self.tts.setLanguage( self.conf["language"] )
00336 
00337         if self.conf["volume"] != current_volume:
00338             self.audio.setOutputVolume( self.conf["volume"] )
00339 
00340         if target_gain != current_gain:
00341             self.tts.setVolume(target_gain)
00342 
00343         
00344         self.tts.say( sentence )
00345 
00346         
00347         if self.conf["voice"] != current_voice:
00348             self.tts.setVoice( current_voice )
00349 
00350         if self.conf["language"] != current_language:
00351             self.tts.setLanguage( current_language )
00352 
00353         if self.conf["volume"] != current_volume:
00354             self.audio.setOutputVolume( current_volume )
00355 
00356         if target_gain != current_gain:
00357             self.tts.setVolume(current_gain)
00358 
00359     
00360     def start( self, request = None ):
00361         if self.srw:
00362             rospy.logwarn("Speech recognition already started. Restarting.")
00363             self.srw.close()
00364         
00365         if Util.parse_vocabulary( self.conf["vocabulary"] ):
00366             self.srw = SpeechRecognitionWrapper(
00367                 self.pip,
00368                 self.pport,
00369                 self.pub,
00370                 self.conf )
00371         else:
00372             rospy.logwarn("Empty vocabulary. Ignoring request.")
00373 
00374         return EmptyResponse()
00375 
00376     def stop( self, request = None ):
00377         if not self.srw:
00378             rospy.logerr("Speech recognition was not started")
00379         else:
00380             self.srw.stop()
00381             self.srw = None
00382 
00383         return EmptyResponse()
00384 
00385     def shutdown(self):
00386         self.unsubscribe()
00387         
00388         
00389         
00390         
00391         
00392 
00393 
00394 
00395 
00396 class SpeechRecognitionWrapper(ALModule):
00397 
00398     """ROS wrapper for Naoqi speech recognition"""
00399     def __init__(self, ip, port, publisher, config):
00400 
00401         
00402         
00403         self.naoqi_name = Util.to_naoqi_name( rospy.get_name() )
00404 
00405         
00406         self.broker = ALBroker(self.naoqi_name + "_broker",
00407             "0.0.0.0",   
00408             0,           
00409             ip,          
00410             port )       
00411 
00412 
00413         
00414         ALModule.__init__( self, self.naoqi_name )
00415 
00416         
00417         self.memory = ALProxy("ALMemory")
00418         self.proxy = ALProxy("ALSpeechRecognition")
00419 
00420         
00421         self.pub = publisher
00422 
00423         
00424         self.install_naoqi_globals()
00425 
00426         
00427         subscribers = self.memory.getSubscribers(Constants.EVENT)
00428         if subscribers:
00429             rospy.logwarn("Speech recognition already in use by another node")
00430             for module in subscribers:
00431                 self.stop(module)
00432 
00433         
00434         self.reconfigure(config)
00435 
00436         
00437         rospy.loginfo("Subscribing '{}' to NAO speech recognition".format(
00438             self.naoqi_name) )
00439         self.memory.subscribeToEvent(
00440             Constants.EVENT,
00441             self.naoqi_name,
00442             self.on_word_recognised.func_name )
00443 
00444 
00445     
00446     def install_naoqi_globals(self):
00447         globals()[self.naoqi_name] = self
00448         globals()["memory"] = self.memory
00449 
00450 
00451     def reconfigure(self, config):
00452         self.proxy.setLanguage( config["language"] )
00453         self.proxy.setAudioExpression( config["audio_expression"] )
00454         self.proxy.setVisualExpression( config["visual_expression"] )
00455         self.proxy.setVocabulary(
00456             Util.parse_vocabulary( config["vocabulary"].encode('utf-8') ),
00457             config["word_spotting"] )
00458 
00459 
00460     def stop(self, module = None):
00461         if module is None:
00462             module = self.naoqi_name
00463 
00464         rospy.loginfo("Unsubscribing '{}' from NAO speech recognition".format(
00465             module))
00466         try:
00467             self.memory.unsubscribeToEvent( Constants.EVENT, module )
00468         except RuntimeError:
00469             rospy.logwarn("Could not unsubscribe from NAO speech recognition")
00470 
00471 
00472     def on_word_recognised(self, key, value, subscriber_id ):
00473         """Publish the words recognized by NAO via ROS """
00474 
00475         
00476         temp_dict = dict( value[i:i+2] for i in range(0, len(value), 2) )
00477 
00478         
00479         if '' in temp_dict:
00480             del(temp_dict[''])
00481 
00482         self.pub.publish(WordRecognized( temp_dict.keys(), temp_dict.values() ))
00483 
00484 
00485 if __name__ == '__main__':
00486 
00487     ROSNaoSpeechModule = NaoSpeech("ROSNaoSpeechModule")
00488     rospy.loginfo( "ROSNaoSpeechModule running..." )
00489 
00490     rospy.spin()
00491 
00492     rospy.loginfo("Stopping ROSNaoSpeechModule ...")
00493     
00494     if ROSNaoSpeechModule.srw:
00495         ROSNaoSpeechModule.srw.stop()
00496     
00497     ROSNaoSpeechModule.shutdown();
00498     rospy.loginfo("ROSNaoSpeechModule stopped.")
00499 
00500     exit(0)