nao_driver: nao_driver: nao_speech.py Source File

00001 #!/usr/bin/env python
00002 
00003 #
00004 # ROS node to interface with Naoqi speech recognition and text-to-speech modules
00005 # Tested with NaoQI: 1.12
00006 #
00007 # Copyright (c) 2012, Miguel Sarabia
00008 # Imperial College London
00009 #
00010 # Redistribution and use in source and binary forms, with or without
00011 # modification, are permitted provided that the following conditions are met:
00012 #
00013 #    # Redistributions of source code must retain the above copyright
00014 #       notice, this list of conditions and the following disclaimer.
00015 #    # Redistributions in binary form must reproduce the above copyright
00016 #       notice, this list of conditions and the following disclaimer in the
00017 #       documentation and/or other materials provided with the distribution.
00018 #    # Neither the name of the Imperial College London nor the names of its
00019 #       contributors may be used to endorse or promote products derived from
00020 #       this software without specific prior written permission.
00021 #
00022 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00023 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00024 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00025 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00026 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00027 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00028 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00029 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00030 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00031 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032 # POSSIBILITY OF SUCH DAMAGE.
00033 #
00034 
00035 
00036 import roslib
00037 
00038 roslib.load_manifest('nao_driver')
00039 
00040 import rospy
00041 
00042 from nao_driver import NaoNode
00043 from naoqi import (ALBroker, ALProxy, ALModule)
00044 
00045 from std_msgs.msg import( String )
00046 from std_srvs.srv import( Empty, EmptyResponse )
00047 from nao_msgs.msg import( WordRecognized )
00048 
00049 class NaoSpeech(NaoNode):
00050     #This should be treated as a constant
00051     NODE_NAME = "nao_speech"   
00052     
00053     def __init__( self ):
00054         #Initialisation
00055         NaoNode.__init__( self )
00056         rospy.init_node( self.NODE_NAME )
00057         
00058         #Variable to keep track of subscription to Naoqi's speech recognition
00059         self.subscribed = False
00060         
00061         # Get text-to-speech proxy
00062         # Speech-recognition wrapper will be lazily initialized
00063         self.tts = self.getProxy("ALTextToSpeech")
00064         self.srw = None
00065         
00066         #Update parameters from ROS Parameter server
00067         self.reconfigure( None )
00068         
00069         #Subscribe to speech topic
00070         self.sub = rospy.Subscriber("speech", String, self.say )
00071         
00072         # Advertise word recognise topic
00073         self.pub = rospy.Publisher("word_recognized", WordRecognized )
00074         
00075         # Register ROS services
00076         self.reconfigure_srv = rospy.Service(
00077             self.NODE_NAME + "/reconfigure",
00078             Empty,
00079             self.reconfigure
00080             )
00081             
00082         self.start_srv = rospy.Service(
00083             self.NODE_NAME + "/start_recognition",
00084             Empty,
00085             self.start )
00086         
00087         self.stop_srv = rospy.Service(
00088             self.NODE_NAME + "/stop_recognition",
00089             Empty,
00090             self.stop )
00091 
00092     # DESTRUCTOR
00093     def __del__( self ):
00094         if self.subscribed:
00095             self.stop( None )
00096     
00097     # RETRIEVE PARAMETERS FROM PARAMETER SERVER AND UPDATE PROGRAM
00098     def reconfigure( self, request ):
00099         self.voice = (
00100             str( rospy.get_param("~voice") ) 
00101             if rospy.has_param("~voice") 
00102             else self.tts.getVoice() )
00103 
00104         self.language = (
00105             str( rospy.get_param("~language") )
00106             if rospy.has_param("~language")
00107             else self.tts.getLanguage() )
00108       
00109         self.volume = (
00110             float( rospy.get_param("~volume") )
00111             if rospy.has_param("~volume")
00112             else self.tts.getVolume() )
00113         
00114         self.vocabulary = (
00115             list(rospy.get_param("~vocabulary") )
00116             if rospy.has_param("~vocabulary")
00117             else [] )
00118             
00119         self.audio_expression = (
00120             bool(rospy.get_param("~enable_audio_expression"))
00121             if rospy.has_param("~enable_audio_expression")
00122             else None )
00123         
00124         self.visual_expression = ( 
00125             bool(rospy.get_param("~enable_visual_expression"))
00126             if rospy.has_param("~enable_visual_expression")
00127             else None )
00128         
00129         self.word_spotting = (
00130             bool(rospy.get_param("~enable_word_spotting"))
00131             if rospy.has_param("~enable_word_spotting")
00132             else False  )
00133         
00134         #if subscribed reconfigure speech recognition wrapper by restarting it
00135         if self.subscribed:
00136             stop( None )
00137             start( None )
00138         
00139         return EmptyResponse()
00140         
00141     # CALLBACK FOR SPEECH METHOD
00142     def say( self, request ):
00143         #Get current voice parameters
00144         current_voice = self.tts.getVoice()
00145         current_language = self.tts.getLanguage()
00146         current_volume = self.tts.getVolume()
00147         
00148         #Modify them if needed
00149         if self.voice != current_voice:
00150             self.tts.setVoice( self.voice )
00151         
00152         if self.language != current_language:
00153             self.tts.setLanguage( self.language )
00154         
00155         if self.volume != current_volume:
00156             self.tts.setVolume( self.volume )
00157         
00158         #Say whatever we need to say        
00159         self.tts.say( request.data )
00160         
00161         #Restore them
00162         if self.voice != current_voice:
00163             self.tts.setVoice( current_voice )
00164         
00165         if self.language != current_language:
00166             self.tts.setLanguage( current_language )
00167         
00168         if self.volume != current_volume:
00169             self.tts.setVolume( current_volume )
00170     
00171     # SPEECH RECOGNITION CALLBACK
00172     def start( self, request ):
00173         #Before starting make sure we have the wrapper ready
00174         if not self.srw:
00175             self.srw = SpeechRecognitionWrapper(self.pip, self.pport, self.pub)
00176         
00177         #Check we're not already subscribed
00178         if self.subscribed:
00179             rospy.logerr("Speech recognition already started")
00180             return EmptyResponse()
00181         
00182         #Check no one else has started module
00183         if self.srw.memory.getSubscribers("WordRecognized"):
00184             rospy.logerr("Speech recognition in use by another node")
00185             return EmptyResponse()
00186         
00187         if not self.vocabulary:
00188             rospy.logerr("No vocabulary specified for speech recognition")
00189             return EmptyResponse()
00190         
00191         if not self.audio_expression is None: 
00192             self.srw.proxy.setAudioExpression( self.audio_expression )
00193         
00194         if not self.visual_expression is None:
00195             self.srw.proxy.setVisualExpression( self.visual_expression )
00196         
00197         #Set language and vocabulary
00198         self.srw.proxy.setLanguage( self.language )
00199         self.srw.proxy.setVocabulary( self.vocabulary, self.word_spotting )
00200                 
00201         #Set NaoQi callback to go to the speech recognition wrapper
00202         self.srw.memory.subscribeToEvent(
00203             "WordRecognized",
00204             self.srw.naoqi_name,
00205             "on_word_recognised"
00206             )
00207                     
00208         self.subscribed = True
00209         
00210         return EmptyResponse()
00211         
00212     def stop( self, request ):
00213         if not self.subscribed:
00214             rospy.logerr("Speech recognition wasn't started")
00215             return EmptyResponse()
00216         
00217         self.srw.memory.unsubscribeToEvent( 
00218             "WordRecognized",
00219             self.srw.naoqi_name
00220             )
00221         self.subscribed = False
00222         
00223         return EmptyResponse()
00224         
00225 
00226 #This class is meant to be used only by NaoSpeech
00227 #The speech recognition wrapper is lazily initialised       
00228 class SpeechRecognitionWrapper(ALModule):
00229     """ROS wrapper for Naoqi speech recognition"""
00230     def __init__(self, ip, port, publisher):
00231         
00232         # Get a (unique) name for naoqi module which is based on the node name
00233         # and is a valid Python identifier (will be useful later)
00234         self.naoqi_name = "ros" + rospy.get_name().replace("/", "_")
00235         
00236         #Start ALBroker (needed by ALModule)
00237         self.broker = ALBroker(self.naoqi_name + "_broker",
00238             "0.0.0.0",   # listen to anyone
00239             0,           # find a free port and use it
00240             ip,    # parent broker IP
00241             port   # parent broker port
00242             )
00243         
00244         #Init superclassALModule
00245         ALModule.__init__( self, self.naoqi_name )
00246         
00247         self.memory = ALProxy("ALMemory")
00248         self.proxy = ALProxy("ALSpeechRecognition")
00249         
00250         #Keep publisher to send word recognized
00251         self.pub = publisher
00252         
00253         #Install global variables needed by Naoqi
00254         self.install_naoqi_globals()
00255         
00256     # Install global variables needed for Naoqi callbacks to work
00257     def install_naoqi_globals(self):
00258         globals()[self.naoqi_name] = self
00259         globals()["memory"] = self.memory
00260         
00261     def on_word_recognised(self, key, value, subscriber_id ):
00262         """Publish the words recognized by NAO via ROS """
00263         
00264         #Create dictionary, by grouping into tuples the list in value
00265         temp_dict = dict( value[i:i+2] for i in range(0, len(value), 2) )
00266 
00267         #Delete empty string from dictionary
00268         if '' in temp_dict: 
00269             del(temp_dict[''])
00270         
00271         self.pub.publish(WordRecognized( temp_dict.keys(), temp_dict.values() ))
00272 
00273 
00274 if __name__ == '__main__':
00275     node = NaoSpeech()
00276     rospy.loginfo( node.NODE_NAME + " running..." )
00277     
00278     rospy.spin()
00279     
00280     #If speech recognition was started make sure we stop it
00281     if node.subscribed:
00282         node.stop( None )
00283     rospy.loginfo( node.NODE_NAME + " stopped." )
00284     
00285     exit(0)