nao_driver: nao_speech.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 #
00004 # ROS node to interface with Naoqi speech recognition and text-to-speech modules
00005 # Tested with NaoQI: 1.12
00006 #
00007 # Copyright (c) 2012, 2013, Miguel Sarabia
00008 # Imperial College London
00009 #
00010 # Redistribution and use in source and binary forms, with or without
00011 # modification, are permitted provided that the following conditions are met:
00012 #
00013 #    # Redistributions of source code must retain the above copyright
00014 #       notice, this list of conditions and the following disclaimer.
00015 #    # Redistributions in binary form must reproduce the above copyright
00016 #       notice, this list of conditions and the following disclaimer in the
00017 #       documentation and/or other materials provided with the distribution.
00018 #    # Neither the name of the Imperial College London nor the names of its
00019 #       contributors may be used to endorse or promote products derived from
00020 #       this software without specific prior written permission.
00021 #
00022 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00023 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00024 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00025 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00026 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00027 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00028 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00029 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00030 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00031 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032 # POSSIBILITY OF SUCH DAMAGE.
00033 #
00034 
00035 import rospy
00036 import actionlib
00037 
00038 from dynamic_reconfigure.server import Server as ReConfServer
00039 import dynamic_reconfigure.client
00040 from nao_driver.cfg import nao_speechConfig as NodeConfig
00041 from nao_driver import NaoNode
00042 from naoqi import (ALBroker, ALProxy, ALModule)
00043 
00044 from std_msgs.msg import( String )
00045 from std_srvs.srv import( Empty, EmptyResponse )
00046 from nao_msgs.msg import( 
00047     WordRecognized,
00048     SetSpeechVocabularyGoal,
00049     SetSpeechVocabularyResult,
00050     SetSpeechVocabularyAction,
00051     SpeechWithFeedbackGoal,
00052     SpeechWithFeedbackResult,
00053     SpeechWithFeedbackFeedback,
00054     SpeechWithFeedbackAction )
00055 
00056 
00057 class Constants:
00058     NODE_NAME = "nao_speech"
00059     EVENT = "WordRecognized"
00060     TEXT_STARTED_EVENT = "ALTextToSpeech/TextStarted"
00061     TEXT_DONE_EVENT = "ALTextToSpeech/TextDone"
00062 
00063 
00064 class Util:
00065     @staticmethod
00066     def parse_vocabulary( vocabulary ):
00067         # Split string
00068         vocabulary_list = vocabulary.split("/")
00069         # Remove surrounding whitespace
00070         vocabulary_list = [ entry.strip() for entry in vocabulary_list]
00071         # Remove empty strings
00072         return filter(None, vocabulary_list)
00073 
00074     # Methods for name conversion
00075     @staticmethod
00076     def to_naoqi_name(name):
00077         return "ros{}_{}".format(
00078             name.replace("/", "_"),
00079             rospy.Time.now().to_sec() )
00080 
00081 class DummyAudioDevice:
00082     def getOutputVolume(self):
00083         return 0
00084 
00085     def setOutputVolume(self, vol):
00086         pass
00087 
00088 class NaoSpeech(ALModule, NaoNode):
00089 
00090     def __init__( self, moduleName ):
00091         # ROS Initialisation
00092         NaoNode.__init__(self, Constants.NODE_NAME )
00093         
00094         # NAOQi Module initialization
00095         self.moduleName = moduleName
00096         # Causes ALBroker to fill in ip and find a unused port
00097         self.ip = ""
00098         self.port = 0
00099         self.init_almodule()
00100         
00101         # Used for speech with feedback mode only
00102         self.speech_with_feedback_flag = False
00103 
00104         # State variables
00105         self.conf = None
00106 
00107         # Get Audio proxies
00108         # Speech-recognition wrapper will be lazily initialized
00109         self.srw = None
00110 
00111         # Subscription to the Proxy events
00112         self.subscribe()
00113         
00114         # Start reconfigure server
00115         self.reconf_server = ReConfServer(NodeConfig, self.reconfigure)
00116         # Client for receiving the new information
00117         self.reconf_client = dynamic_reconfigure.client.Client(Constants.NODE_NAME)
00118 
00119         #Subscribe to speech topic
00120         self.sub = rospy.Subscriber("speech", String, self.say )
00121 
00122         # Advertise word recognise topic
00123         self.pub = rospy.Publisher("word_recognized", WordRecognized, queue_size=10)
00124 
00125         # Register ROS services
00126         self.start_srv = rospy.Service(
00127             "start_recognition",
00128             Empty,
00129             self.start )
00130 
00131         self.stop_srv = rospy.Service(
00132             "stop_recognition",
00133             Empty,
00134             self.stop )
00135         
00136         # Actionlib server for altering the speech recognition vocabulary
00137         self.setSpeechVocabularyServer = actionlib.SimpleActionServer("speech_vocabulary_action", SetSpeechVocabularyAction, 
00138                                                                   execute_cb=self.executeSpeechVocabularyAction,
00139                                                                   auto_start=False)
00140         
00141         # Actionlib server for having speech with feedback                                                          
00142         self.speechWithFeedbackServer = actionlib.SimpleActionServer("speech_action", SpeechWithFeedbackAction, 
00143                                                                   execute_cb=self.executeSpeechWithFeedbackAction,
00144                                                                   auto_start=False)
00145         # Start both actionlib servers                            
00146         self.setSpeechVocabularyServer.start()
00147         self.speechWithFeedbackServer.start()
00148         
00149     def init_almodule(self):
00150         # before we can instantiate an ALModule, an ALBroker has to be created
00151         rospy.loginfo("Connecting to NaoQi at %s:%d", self.pip, self.pport)
00152         try:
00153             self.broker = ALBroker("%sBroker" % self.moduleName, self.ip, self.port, self.pip, self.pport)
00154         except RuntimeError,e:
00155             print("Could not connect to NaoQi's main broker")
00156             exit(1)
00157         ALModule.__init__(self, self.moduleName)
00158         
00159         self.memProxy = ALProxy("ALMemory",self.pip,self.pport)
00160         # TODO: check self.memProxy.version() for > 1.6
00161         if self.memProxy is None:
00162             rospy.logerr("Could not get a proxy to ALMemory on %s:%d", self.pip, self.pport)
00163             exit(1)
00164             
00165         self.tts = self.get_proxy("ALTextToSpeech")
00166         # TODO: check self.memProxy.version() for > 1.6
00167         if self.tts is None:
00168             rospy.logerr("Could not get a proxy to ALTextToSpeech on %s:%d", self.pip, self.pport)
00169             exit(1)
00170             
00171         self.audio = self.get_proxy("ALAudioDevice")        
00172         if self.audio is None:
00173             # When using simulated naoqi, audio device is not available,
00174             # Use a dummy instead
00175             rospy.logwarn("Proxy to ALAudioDevice not available, using dummy device (normal in simulation; volume controls disabled)")
00176             self.audio = DummyAudioDevice()
00177  
00178     def subscribe(self):
00179         # Subscription to the ALProxies events
00180         self.memProxy.subscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName, "onTextDone")
00181         self.memProxy.subscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName, "onTextStarted")
00182 
00183     def unsubscribe(self):
00184         self.memProxy.unsubscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName)
00185         self.memProxy.unsubscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName)
00186         
00187     def onTextStarted(self, strVarName, value, strMessage):
00188         # Called when NAO begins or ends the speech. On begin the value = 1
00189         # Must work only on speech with feedback mode
00190         if value == 0 or self.speech_with_feedback_flag == False:
00191             return
00192         
00193         # Send feedback via the speech actionlib server
00194         fb = SpeechWithFeedbackFeedback()
00195         self.speechWithFeedbackServer.publish_feedback(fb)
00196         
00197     def onTextDone(self, strVarName, value, strMessage):
00198         # Called when NAO begins or ends the speech. On end the value = 1
00199         # Must work only on speech with feedback mode
00200         if value == 0 or self.speech_with_feedback_flag == False:
00201             return
00202         
00203         # Change the flag to inform the executeSpeechWithFeedbackAction function that
00204         # the speaking process is over
00205         self.speech_with_feedback_flag = False
00206         
00207 
00208     def executeSpeechWithFeedbackAction(self, goal):
00209         # Gets the goal and begins the speech
00210         self.speech_with_feedback_flag = True
00211         saystr = goal.say
00212         self.internalSay(saystr)
00213         
00214         # Wait till the onTextDone event is called or 2 mins are passed
00215         counter = 0
00216         while self.speech_with_feedback_flag == True and counter < 1200:
00217             rospy.sleep(0.1)
00218             counter += 1
00219         
00220         # Send the success feedback
00221         self.speechWithFeedbackServer.set_succeeded()
00222 
00223     def executeSpeechVocabularyAction(self, goal):
00224         #~ Called by action client
00225         rospy.loginfo("SetSpeechVocabulary action executing");
00226                     
00227         words = goal.words
00228         words_str = ""        
00229         
00230         #~ Empty word list. Send failure.
00231         if len(words) == 0:
00232             setVocabularyResult = SetSpeechVocabularyResult()
00233             setVocabularyResult.success = False
00234             self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00235             return
00236         
00237         #~ Create the vocabulary string
00238         for i in range(0, len(words) - 1):
00239             words_str += str(words[i]) + "/"
00240             
00241         words_str += words[len(words) - 1]  
00242 
00243         #~ Update the dynamic reconfigure vocabulary parameter
00244         params = { 'vocabulary' : words_str }
00245         self.reconf_client.update_configuration(params)
00246         
00247         #~ Send success
00248         setVocabularyResult = SetSpeechVocabularyResult()
00249         setVocabularyResult.success = True
00250         self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
00251         
00252     # RECONFIGURE THIS PROGRAM
00253     def reconfigure( self, request, level ):
00254         newConf = {}
00255 
00256         #Copy values
00257         newConf["voice"] = request["voice"]
00258         newConf["language"] = request["language"]
00259         newConf["volume"] = request["volume"]
00260         newConf["vocabulary"] = request["vocabulary"]
00261         newConf["audio_expression"] = request["audio_expression"]
00262         newConf["visual_expression"] = request["visual_expression"]
00263         newConf["word_spotting"] = request["word_spotting"]
00264 
00265         # Check and update values
00266         if not newConf["voice"]:
00267             newConf["voice"] = self.tts.getVoice()
00268         elif newConf["voice"] not in self.tts.getAvailableVoices():
00269             rospy.logwarn(
00270                 "Unknown voice '{}'. Using current voice instead".format(
00271                     newConf["voice"] ) )
00272             rospy.loginfo("Voices available: {}".format(
00273                 self.tts.getAvailableVoices()))
00274             newConf["voice"] = self.tts.getVoice()
00275 
00276         if not newConf["language"]:
00277             newConf["language"] = self.tts.getLanguage()
00278         elif newConf["language"] not in self.tts.getAvailableLanguages():
00279             newConf["language"] = self.tts.getLanguage()
00280             rospy.logwarn(
00281                 "Unknown language '{}'. Using current language instead".format(
00282                     newConf["language"] ) )
00283             rospy.loginfo("Languages available: {}".format(
00284                 self.tts.getAvailableLanguages()))
00285 
00286         # If first time and parameter not explicitly set
00287         if not self.conf and not rospy.has_param("~volume"):
00288             newConf["volume"] = self.audio.getOutputVolume()
00289 
00290         # if srw is running and the vocabulary request is invalid, ignore it
00291         if self.srw and not Util.parse_vocabulary(newConf["vocabulary"]):
00292             rospy.logwarn("Empty vocabulary. Using current vocabulary instead")
00293             newConf["vocabulary"] = self.conf["vocabulary"]
00294 
00295         # Check if we need to restart srw
00296         if self.srw and self.conf and (
00297             newConf["language"] != self.conf["language"] or
00298             newConf["vocabulary"] != self.conf["language"] or
00299             newConf["audio_expression"] != self.conf["audio_expression"] or
00300             newConf["visual_expression"] != self.conf["visual_expression"] or
00301             newConf["word_spotting"] != self.conf["word_spotting"] ):
00302             need_to_restart_speech = True
00303         else:
00304             need_to_restart_speech = False
00305 
00306         self.conf = newConf
00307 
00308         #If we have enabled the speech recognition wrapper, reconfigure it
00309         if need_to_restart_speech:
00310             self.stop()
00311             self.start()
00312 
00313         return self.conf
00314 
00315 
00316     # CALLBACK FOR SPEECH METHOD
00317     def say( self, request ):
00318         self.internalSay(request.data)
00319 
00320     # Used for internal use. Called to say one sentence either from the speech 
00321     # action goal callback or message callback
00322     def internalSay( self, sentence ):
00323         #Get current voice parameters
00324         current_voice = self.tts.getVoice()
00325         current_language = self.tts.getLanguage()
00326         current_volume = self.audio.getOutputVolume()
00327         current_gain = self.tts.getVolume()
00328         target_gain = 1.0
00329 
00330         #Modify them if needed
00331         if self.conf["voice"] != current_voice:
00332             self.tts.setVoice( self.conf["voice"] )
00333 
00334         if self.conf["language"] != current_language:
00335             self.tts.setLanguage( self.conf["language"] )
00336 
00337         if self.conf["volume"] != current_volume:
00338             self.audio.setOutputVolume( self.conf["volume"] )
00339 
00340         if target_gain != current_gain:
00341             self.tts.setVolume(target_gain)
00342 
00343         #Say whatever it is Nao needs to say
00344         self.tts.say( sentence )
00345 
00346         #And restore them
00347         if self.conf["voice"] != current_voice:
00348             self.tts.setVoice( current_voice )
00349 
00350         if self.conf["language"] != current_language:
00351             self.tts.setLanguage( current_language )
00352 
00353         if self.conf["volume"] != current_volume:
00354             self.audio.setOutputVolume( current_volume )
00355 
00356         if target_gain != current_gain:
00357             self.tts.setVolume(current_gain)
00358 
00359     # SPEECH RECOGNITION SERVICES
00360     def start( self, request = None ):
00361         if self.srw:
00362             rospy.logwarn("Speech recognition already started. Restarting.")
00363             self.srw.close()
00364         # Start only if vocabulary is valid
00365         if Util.parse_vocabulary( self.conf["vocabulary"] ):
00366             self.srw = SpeechRecognitionWrapper(
00367                 self.pip,
00368                 self.pport,
00369                 self.pub,
00370                 self.conf )
00371         else:
00372             rospy.logwarn("Empty vocabulary. Ignoring request.")
00373 
00374         return EmptyResponse()
00375 
00376     def stop( self, request = None ):
00377         if not self.srw:
00378             rospy.logerr("Speech recognition was not started")
00379         else:
00380             self.srw.stop()
00381             self.srw = None
00382 
00383         return EmptyResponse()
00384         
00385     def shutdown(self): 
00386         self.unsubscribe()
00387         # Shutting down broker seems to be not necessary any more
00388         # try:
00389         #     self.broker.shutdown()
00390         # except RuntimeError,e:
00391         #     rospy.logwarn("Could not shut down Python Broker: %s", e)
00392 
00393 
00394 #This class is meant to be used only by NaoSpeech
00395 #The speech recognition wrapper is lazily initialised
00396 class SpeechRecognitionWrapper(ALModule):
00397 
00398     """ROS wrapper for Naoqi speech recognition"""
00399     def __init__(self, ip, port, publisher, config):
00400 
00401         # Get a (unique) name for naoqi module which is based on the node name
00402         # and is a valid Python identifier (will be useful later)
00403         self.naoqi_name = Util.to_naoqi_name( rospy.get_name() )
00404 
00405         #Start ALBroker (needed by ALModule)
00406         self.broker = ALBroker(self.naoqi_name + "_broker",
00407             "0.0.0.0",   # listen to anyone
00408             0,           # find a free port and use it
00409             ip,          # parent broker IP
00410             port )       # parent broker port
00411 
00412 
00413         #Init superclass ALModule
00414         ALModule.__init__( self, self.naoqi_name )
00415 
00416         # Start naoqi proxies
00417         self.memory = ALProxy("ALMemory")
00418         self.proxy = ALProxy("ALSpeechRecognition")
00419 
00420         #Keep publisher to send word recognized
00421         self.pub = publisher
00422 
00423         #Install global variables needed by Naoqi
00424         self.install_naoqi_globals()
00425 
00426         #Check no one else is subscribed to this event
00427         subscribers = self.memory.getSubscribers(Constants.EVENT)
00428         if subscribers:
00429             rospy.logwarn("Speech recognition already in use by another node")
00430             for module in subscribers:
00431                 self.stop(module)
00432 
00433         # Configure this instance
00434         self.reconfigure(config)
00435 
00436         #And subscribe to the event raised by speech recognition
00437         rospy.loginfo("Subscribing '{}' to NAO speech recognition".format(
00438             self.naoqi_name) )
00439         self.memory.subscribeToEvent(
00440             Constants.EVENT,
00441             self.naoqi_name,
00442             self.on_word_recognised.func_name )
00443 
00444 
00445     # Install global variables needed for Naoqi callbacks to work
00446     def install_naoqi_globals(self):
00447         globals()[self.naoqi_name] = self
00448         globals()["memory"] = self.memory
00449 
00450 
00451     def reconfigure(self, config):
00452         self.proxy.setLanguage( config["language"] )
00453         self.proxy.setAudioExpression( config["audio_expression"] )
00454         self.proxy.setVisualExpression( config["visual_expression"] )
00455         self.proxy.setVocabulary(
00456             Util.parse_vocabulary( config["vocabulary"] ),
00457             config["word_spotting"] )
00458 
00459 
00460     def stop(self, module = None):
00461         if module is None:
00462             module = self.naoqi_name
00463 
00464         rospy.loginfo("Unsubscribing '{}' from NAO speech recognition".format(
00465             module))
00466         try:
00467             self.memory.unsubscribeToEvent( Constants.EVENT, module )
00468         except RuntimeError:
00469             rospy.logwarn("Could not unsubscribe from NAO speech recognition")
00470 
00471 
00472     def on_word_recognised(self, key, value, subscriber_id ):
00473         """Publish the words recognized by NAO via ROS """
00474 
00475         #Create dictionary, by grouping into tuples the list in value
00476         temp_dict = dict( value[i:i+2] for i in range(0, len(value), 2) )
00477 
00478         #Delete empty string from dictionary
00479         if '' in temp_dict:
00480             del(temp_dict[''])
00481 
00482         self.pub.publish(WordRecognized( temp_dict.keys(), temp_dict.values() ))
00483 
00484 
00485 if __name__ == '__main__':
00486   
00487     ROSNaoSpeechModule = NaoSpeech("ROSNaoSpeechModule")
00488     rospy.loginfo( "ROSNaoSpeechModule running..." )
00489 
00490     rospy.spin()
00491     
00492     rospy.loginfo("Stopping ROSNaoSpeechModule ...")
00493     #If speech recognition was started make sure we stop it
00494     if ROSNaoSpeechModule.srw:
00495         ROSNaoSpeechModule.srw.stop()
00496     # Shutdown the module
00497     ROSNaoSpeechModule.shutdown();        
00498     rospy.loginfo("ROSNaoSpeechModule stopped.")
00499 
00500     exit(0)