nao_speech.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # ROS node to interface with Naoqi speech recognition and text-to-speech modules
5 # Tested with NaoQI: 1.12
6 #
7 # Copyright (c) 2012, 2013, Miguel Sarabia
8 # Imperial College London
9 #
10 # Redistribution and use in source and binary forms, with or without
11 # modification, are permitted provided that the following conditions are met:
12 #
13 # # Redistributions of source code must retain the above copyright
14 # notice, this list of conditions and the following disclaimer.
15 # # Redistributions in binary form must reproduce the above copyright
16 # notice, this list of conditions and the following disclaimer in the
17 # documentation and/or other materials provided with the distribution.
18 # # Neither the name of the Imperial College London nor the names of its
19 # contributors may be used to endorse or promote products derived from
20 # this software without specific prior written permission.
21 #
22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 # POSSIBILITY OF SUCH DAMAGE.
33 #
34 
35 import rospy
36 import actionlib
37 
38 from dynamic_reconfigure.server import Server as ReConfServer
39 import dynamic_reconfigure.client
40 from naoqi_driver_py.cfg import NaoqiSpeechConfig as NodeConfig
41 from naoqi_driver.naoqi_node import NaoqiNode
42 from naoqi import (ALBroker, ALProxy, ALModule)
43 
44 from std_msgs.msg import( String )
45 from std_srvs.srv import( Empty, EmptyResponse )
46 from naoqi_bridge_msgs.msg import(
47  WordRecognized,
48  SetSpeechVocabularyGoal,
49  SetSpeechVocabularyResult,
50  SetSpeechVocabularyAction,
51  SpeechWithFeedbackGoal,
52  SpeechWithFeedbackResult,
53  SpeechWithFeedbackFeedback,
54  SpeechWithFeedbackAction )
55 
56 
57 class Constants:
58  NODE_NAME = "nao_speech"
59  EVENT = "WordRecognized"
60  TEXT_STARTED_EVENT = "ALTextToSpeech/TextStarted"
61  TEXT_DONE_EVENT = "ALTextToSpeech/TextDone"
62 
63 
64 class Util:
65  @staticmethod
66  def parse_vocabulary( vocabulary ):
67  # Split string
68  vocabulary_list = vocabulary.split("/")
69  # Remove surrounding whitespace
70  vocabulary_list = [ entry.strip() for entry in vocabulary_list]
71  # Remove empty strings
72  return filter(None, vocabulary_list)
73 
74  # Methods for name conversion
75  @staticmethod
76  def to_naoqi_name(name):
77  return "ros{}_{}".format(
78  name.replace("/", "_"),
79  rospy.Time.now().to_sec() )
80 
82  def getOutputVolume(self):
83  return 0
84 
85  def setOutputVolume(self, vol):
86  pass
87 
88 class NaoSpeech(ALModule, NaoqiNode):
89 
90  def __init__( self, moduleName ):
91  # ROS Initialisation
92  NaoqiNode.__init__(self, Constants.NODE_NAME )
93 
94  # NAOQi Module initialization
95  self.moduleName = moduleName
96  # Causes ALBroker to fill in ip and find a unused port
97  self.ip = ""
98  self.port = 0
99  self.init_almodule()
100 
101  # Used for speech with feedback mode only
103 
104  # State variables
105  self.conf = None
106 
107  # Get Audio proxies
108  # Speech-recognition wrapper will be lazily initialized
109  self.srw = None
110 
111  # Subscription to the Proxy events
112  self.subscribe()
113 
114  # Start reconfigure server
115  self.reconf_server = ReConfServer(NodeConfig, self.reconfigure)
116  # Client for receiving the new information
117  self.reconf_client = dynamic_reconfigure.client.Client(rospy.get_name())
118 
119  #Subscribe to speech topic
120  self.sub = rospy.Subscriber("speech", String, self.say )
121 
122  # Advertise word recognise topic
123  self.pub = rospy.Publisher("word_recognized", WordRecognized )
124 
125  # Register ROS services
126  self.start_srv = rospy.Service(
127  "start_recognition",
128  Empty,
129  self.start )
130 
131  self.stop_srv = rospy.Service(
132  "stop_recognition",
133  Empty,
134  self.stop )
135 
136  # Actionlib server for altering the speech recognition vocabulary
137  self.setSpeechVocabularyServer = actionlib.SimpleActionServer("speech_vocabulary_action", SetSpeechVocabularyAction,
138  execute_cb=self.executeSpeechVocabularyAction,
139  auto_start=False)
140 
141  # Actionlib server for having speech with feedback
142  self.speechWithFeedbackServer = actionlib.SimpleActionServer("speech_action", SpeechWithFeedbackAction,
143  execute_cb=self.executeSpeechWithFeedbackAction,
144  auto_start=False)
145  # Start both actionlib servers
146  self.setSpeechVocabularyServer.start()
147  self.speechWithFeedbackServer.start()
148 
149  def init_almodule(self):
150  # before we can instantiate an ALModule, an ALBroker has to be created
151  rospy.loginfo("Connecting to NaoQi at %s:%d", self.pip, self.pport)
152  try:
153  self.broker = ALBroker("%sBroker" % self.moduleName, self.ip, self.port, self.pip, self.pport)
154  except RuntimeError,e:
155  print("Could not connect to NaoQi's main broker")
156  exit(1)
157  ALModule.__init__(self, self.moduleName)
158 
159  self.memProxy = ALProxy("ALMemory",self.pip,self.pport)
160  # TODO: check self.memProxy.version() for > 1.6
161  if self.memProxy is None:
162  rospy.logerr("Could not get a proxy to ALMemory on %s:%d", self.pip, self.pport)
163  exit(1)
164 
165  self.tts = self.get_proxy("ALTextToSpeech")
166  # TODO: check self.memProxy.version() for > 1.6
167  if self.tts is None:
168  rospy.logerr("Could not get a proxy to ALTextToSpeech on %s:%d", self.pip, self.pport)
169  exit(1)
170 
171  self.audio = self.get_proxy("ALAudioDevice")
172  if self.audio is None:
173  # When using simulated naoqi, audio device is not available,
174  # Use a dummy instead
175  rospy.logwarn("Proxy to ALAudioDevice not available, using dummy device (normal in simulation; volume controls disabled)")
176  self.audio = DummyAudioDevice()
177 
178  def subscribe(self):
179  # Subscription to the ALProxies events
180  self.memProxy.subscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName, "onTextDone")
181  self.memProxy.subscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName, "onTextStarted")
182 
183  def unsubscribe(self):
184  self.memProxy.unsubscribeToEvent(Constants.TEXT_DONE_EVENT, self.moduleName)
185  self.memProxy.unsubscribeToEvent(Constants.TEXT_STARTED_EVENT, self.moduleName)
186 
187  def onTextStarted(self, strVarName, value, strMessage):
188  # Called when NAO begins or ends the speech. On begin the value = 1
189  # Must work only on speech with feedback mode
190  if value == 0 or self.speech_with_feedback_flag == False:
191  return
192 
193  # Send feedback via the speech actionlib server
194  fb = SpeechWithFeedbackFeedback()
195  self.speechWithFeedbackServer.publish_feedback(fb)
196 
197  def onTextDone(self, strVarName, value, strMessage):
198  # Called when NAO begins or ends the speech. On end the value = 1
199  # Must work only on speech with feedback mode
200  if value == 0 or self.speech_with_feedback_flag == False:
201  return
202 
203  # Change the flag to inform the executeSpeechWithFeedbackAction function that
204  # the speaking process is over
205  self.speech_with_feedback_flag = False
206 
207 
209  # Gets the goal and begins the speech
210  self.speech_with_feedback_flag = True
211  saystr = goal.say
212  self.internalSay(saystr)
213 
214  # Wait till the onTextDone event is called or 2 mins are passed
215  counter = 0
216  while self.speech_with_feedback_flag == True and counter < 1200:
217  rospy.sleep(0.1)
218  counter += 1
219 
220  # Send the success feedback
221  self.speechWithFeedbackServer.set_succeeded()
222 
224  #~ Called by action client
225  rospy.loginfo("SetSpeechVocabulary action executing");
226 
227  words = goal.words
228  words_str = ""
229 
230  #~ Empty word list. Send failure.
231  if len(words) == 0:
232  setVocabularyResult = SetSpeechVocabularyResult()
233  setVocabularyResult.success = False
234  self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
235  return
236 
237  #~ Create the vocabulary string
238  for i in range(0, len(words) - 1):
239  words_str += str(words[i]) + "/"
240 
241  words_str += words[len(words) - 1]
242 
243  #~ Update the dynamic reconfigure vocabulary parameter
244  params = { 'vocabulary' : words_str }
245  self.reconf_client.update_configuration(params)
246 
247  #~ Send success
248  setVocabularyResult = SetSpeechVocabularyResult()
249  setVocabularyResult.success = True
250  self.setSpeechVocabularyServer.set_succeeded(setVocabularyResult)
251 
252  # RECONFIGURE THIS PROGRAM
253  def reconfigure( self, request, level ):
254  newConf = {}
255 
256  #Copy values
257  newConf["voice"] = request["voice"]
258  newConf["language"] = request["language"]
259  newConf["volume"] = request["volume"]
260  newConf["vocabulary"] = request["vocabulary"]
261  newConf["audio_expression"] = request["audio_expression"]
262  newConf["visual_expression"] = request["visual_expression"]
263  newConf["word_spotting"] = request["word_spotting"]
264 
265  # Check and update values
266  if not newConf["voice"]:
267  newConf["voice"] = self.tts.getVoice()
268  elif newConf["voice"] not in self.tts.getAvailableVoices():
269  rospy.logwarn(
270  "Unknown voice '{}'. Using current voice instead".format(
271  newConf["voice"] ) )
272  rospy.loginfo("Voices available: {}".format(
273  self.tts.getAvailableVoices()))
274  newConf["voice"] = self.tts.getVoice()
275 
276  if not newConf["language"]:
277  newConf["language"] = self.tts.getLanguage()
278  elif newConf["language"] not in self.tts.getAvailableLanguages():
279  newConf["language"] = self.tts.getLanguage()
280  rospy.logwarn(
281  "Unknown language '{}'. Using current language instead".format(
282  newConf["language"] ) )
283  rospy.loginfo("Languages available: {}".format(
284  self.tts.getAvailableLanguages()))
285 
286  # If first time and parameter not explicitly set
287  if not self.conf and not rospy.has_param("~volume"):
288  newConf["volume"] = self.audio.getOutputVolume()
289 
290  # if srw is running and the vocabulary request is invalid, ignore it
291  if self.srw and not Util.parse_vocabulary(newConf["vocabulary"]):
292  rospy.logwarn("Empty vocabulary. Using current vocabulary instead")
293  newConf["vocabulary"] = self.conf["vocabulary"]
294 
295  # Check if we need to restart srw
296  if self.srw and self.conf and (
297  newConf["language"] != self.conf["language"] or
298  newConf["vocabulary"] != self.conf["language"] or
299  newConf["audio_expression"] != self.conf["audio_expression"] or
300  newConf["visual_expression"] != self.conf["visual_expression"] or
301  newConf["word_spotting"] != self.conf["word_spotting"] ):
302  need_to_restart_speech = True
303  else:
304  need_to_restart_speech = False
305 
306  self.conf = newConf
307 
308  #If we have enabled the speech recognition wrapper, reconfigure it
309  if need_to_restart_speech:
310  self.stop()
311  self.start()
312 
313  return self.conf
314 
315 
316  # CALLBACK FOR SPEECH METHOD
317  def say( self, request ):
318  self.internalSay(request.data)
319 
320  # Used for internal use. Called to say one sentence either from the speech
321  # action goal callback or message callback
322  def internalSay( self, sentence ):
323  #Get current voice parameters
324  current_voice = self.tts.getVoice()
325  current_language = self.tts.getLanguage()
326  current_volume = self.audio.getOutputVolume()
327  current_gain = self.tts.getVolume()
328  target_gain = 1.0
329 
330  #Modify them if needed
331  if self.conf["voice"] != current_voice:
332  self.tts.setVoice( self.conf["voice"] )
333 
334  if self.conf["language"] != current_language:
335  self.tts.setLanguage( self.conf["language"] )
336 
337  if self.conf["volume"] != current_volume:
338  self.audio.setOutputVolume( self.conf["volume"] )
339 
340  if target_gain != current_gain:
341  self.tts.setVolume(target_gain)
342 
343  #Say whatever it is Nao needs to say
344  self.tts.say( sentence )
345 
346  #And restore them
347  if self.conf["voice"] != current_voice:
348  self.tts.setVoice( current_voice )
349 
350  if self.conf["language"] != current_language:
351  self.tts.setLanguage( current_language )
352 
353  if self.conf["volume"] != current_volume:
354  self.audio.setOutputVolume( current_volume )
355 
356  if target_gain != current_gain:
357  self.tts.setVolume(current_gain)
358 
359  # SPEECH RECOGNITION SERVICES
360  def start( self, request = None ):
361  if self.srw:
362  rospy.logwarn("Speech recognition already started. Restarting.")
363  self.srw.close()
364  # Start only if vocabulary is valid
365  if Util.parse_vocabulary( self.conf["vocabulary"] ):
367  self.pip,
368  self.pport,
369  self.pub,
370  self.conf )
371  else:
372  rospy.logwarn("Empty vocabulary. Ignoring request.")
373 
374  return EmptyResponse()
375 
376  def stop( self, request = None ):
377  if not self.srw:
378  rospy.logerr("Speech recognition was not started")
379  else:
380  self.srw.stop()
381  self.srw = None
382 
383  return EmptyResponse()
384 
385  def shutdown(self):
386  self.unsubscribe()
387  # Shutting down broker seems to be not necessary any more
388  # try:
389  # self.broker.shutdown()
390  # except RuntimeError,e:
391  # rospy.logwarn("Could not shut down Python Broker: %s", e)
392 
393 
394 #This class is meant to be used only by NaoSpeech
395 #The speech recognition wrapper is lazily initialised
396 class SpeechRecognitionWrapper(ALModule):
397 
398  """ROS wrapper for Naoqi speech recognition"""
399  def __init__(self, ip, port, publisher, config):
400 
401  # Get a (unique) name for naoqi module which is based on the node name
402  # and is a valid Python identifier (will be useful later)
403  self.naoqi_name = Util.to_naoqi_name( rospy.get_name() )
404 
405  #Start ALBroker (needed by ALModule)
406  self.broker = ALBroker(self.naoqi_name + "_broker",
407  "0.0.0.0", # listen to anyone
408  0, # find a free port and use it
409  ip, # parent broker IP
410  port ) # parent broker port
411 
412 
413  #Init superclass ALModule
414  ALModule.__init__( self, self.naoqi_name )
415 
416  # Start naoqi proxies
417  self.memory = ALProxy("ALMemory")
418  self.proxy = ALProxy("ALSpeechRecognition")
419 
420  #Keep publisher to send word recognized
421  self.pub = publisher
422 
423  #Install global variables needed by Naoqi
424  self.install_naoqi_globals()
425 
426  #Check no one else is subscribed to this event
427  subscribers = self.memory.getSubscribers(Constants.EVENT)
428  if subscribers:
429  rospy.logwarn("Speech recognition already in use by another node")
430  for module in subscribers:
431  self.stop(module)
432 
433  # Configure this instance
434  self.reconfigure(config)
435 
436  #And subscribe to the event raised by speech recognition
437  rospy.loginfo("Subscribing '{}' to NAO speech recognition".format(
438  self.naoqi_name) )
439  self.memory.subscribeToEvent(
440  Constants.EVENT,
441  self.naoqi_name,
442  self.on_word_recognised.func_name )
443 
444 
445  # Install global variables needed for Naoqi callbacks to work
447  globals()[self.naoqi_name] = self
448  globals()["memory"] = self.memory
449 
450 
451  def reconfigure(self, config):
452  self.proxy.setLanguage( config["language"] )
453  self.proxy.setAudioExpression( config["audio_expression"] )
454  self.proxy.setVisualExpression( config["visual_expression"] )
455  self.proxy.setVocabulary(
456  Util.parse_vocabulary( config["vocabulary"].encode('utf-8') ),
457  config["word_spotting"] )
458 
459 
460  def stop(self, module = None):
461  if module is None:
462  module = self.naoqi_name
463 
464  rospy.loginfo("Unsubscribing '{}' from NAO speech recognition".format(
465  module))
466  try:
467  self.memory.unsubscribeToEvent( Constants.EVENT, module )
468  except RuntimeError:
469  rospy.logwarn("Could not unsubscribe from NAO speech recognition")
470 
471 
472  def on_word_recognised(self, key, value, subscriber_id ):
473  """Publish the words recognized by NAO via ROS """
474 
475  #Create dictionary, by grouping into tuples the list in value
476  temp_dict = dict( value[i:i+2] for i in range(0, len(value), 2) )
477 
478  #Delete empty string from dictionary
479  if '' in temp_dict:
480  del(temp_dict[''])
481 
482  self.pub.publish(WordRecognized( temp_dict.keys(), temp_dict.values() ))
483 
484 
485 if __name__ == '__main__':
486 
487  ROSNaoSpeechModule = NaoSpeech("ROSNaoSpeechModule")
488  rospy.loginfo( "ROSNaoSpeechModule running..." )
489 
490  rospy.spin()
491 
492  rospy.loginfo("Stopping ROSNaoSpeechModule ...")
493  #If speech recognition was started make sure we stop it
494  if ROSNaoSpeechModule.srw:
495  ROSNaoSpeechModule.srw.stop()
496  # Shutdown the module
497  ROSNaoSpeechModule.shutdown();
498  rospy.loginfo("ROSNaoSpeechModule stopped.")
499 
500  exit(0)
def stop(self, module=None)
Definition: nao_speech.py:460
def parse_vocabulary(vocabulary)
Definition: nao_speech.py:66
def onTextStarted(self, strVarName, value, strMessage)
Definition: nao_speech.py:187
def stop(self, request=None)
Definition: nao_speech.py:376
def executeSpeechWithFeedbackAction(self, goal)
Definition: nao_speech.py:208
def reconfigure(self, request, level)
Definition: nao_speech.py:253
def internalSay(self, sentence)
Definition: nao_speech.py:322
def unsubscribe(self)
Definition: nao_speech.py:183
def say(self, request)
Definition: nao_speech.py:317
def __init__(self, ip, port, publisher, config)
Definition: nao_speech.py:399
def onTextDone(self, strVarName, value, strMessage)
Definition: nao_speech.py:197
def __init__(self, moduleName)
Definition: nao_speech.py:90
def init_almodule(self)
Definition: nao_speech.py:149
def start(self, request=None)
Definition: nao_speech.py:360
def to_naoqi_name(name)
Definition: nao_speech.py:76
def setOutputVolume(self, vol)
Definition: nao_speech.py:85
def on_word_recognised(self, key, value, subscriber_id)
Definition: nao_speech.py:472
def executeSpeechVocabularyAction(self, goal)
Definition: nao_speech.py:223
def get_proxy(self, name, warn=True)


nao_apps
Author(s):
autogenerated on Mon Jun 10 2019 14:04:55