speech_to_text.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 import actionlib
6 import rospy
7 try:
8  import speech_recognition as SR
9 except ImportError as e:
10  raise ImportError(str(e) + '\nplease try "pip install speechrecognition"')
11 
12 from actionlib_msgs.msg import GoalStatus, GoalStatusArray
13 from audio_common_msgs.msg import AudioData
14 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
15 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
16 
17 
18 class SpeechToText(object):
19  def __init__(self):
20  # format of input audio data
21  self.sample_rate = rospy.get_param("~sample_rate", 16000)
22  self.sample_width = rospy.get_param("~sample_width", 2)
23  # language of STT service
24  self.language = rospy.get_param("~language", "ja-JP")
25  # ignore voice input while the robot is speaking
26  self.self_cancellation = rospy.get_param("~self_cancellation", True)
27  # time to assume as SPEAKING after tts service is finished
28  self.tts_tolerance = rospy.Duration.from_sec(
29  rospy.get_param("~tts_tolerance", 1.0))
30  tts_action_names = rospy.get_param(
31  '~tts_action_names', ['sound_play'])
32 
33  self.recognizer = SR.Recognizer()
34 
35  self.tts_action = None
36  self.last_tts = None
37  self.is_canceling = False
38  self.tts_actions = []
39  if self.self_cancellation:
40  for tts_action_name in tts_action_names:
41  tts_action = actionlib.SimpleActionClient(
42  tts_action_name, SoundRequestAction)
43  if tts_action.wait_for_server(rospy.Duration(5.0)):
44  self.tts_actions.append(tts_action)
45  else:
46  rospy.logerr(
47  "action '{}' is not initialized."
48  .format(tts_action_name))
49  self.tts_timer = rospy.Timer(rospy.Duration(0.1), self.tts_timer_cb)
50 
51  self.pub_speech = rospy.Publisher(
52  "speech_to_text", SpeechRecognitionCandidates, queue_size=1)
53  self.sub_audio = rospy.Subscriber("audio", AudioData, self.audio_cb)
54 
55  def tts_timer_cb(self, event):
56  stamp = event.current_real
57  active = False
58  for tts_action in self.tts_actions:
59  for st in tts_action.action_client.last_status_msg.status_list:
60  if st.status == GoalStatus.ACTIVE:
61  active = True
62  break
63  if active:
64  break
65  if active:
66  if not self.is_canceling:
67  rospy.logdebug("START CANCELLATION")
68  self.is_canceling = True
69  self.last_tts = None
70  elif self.is_canceling:
71  if self.last_tts is None:
72  self.last_tts = stamp
73  if stamp - self.last_tts > self.tts_tolerance:
74  rospy.logdebug("END CANCELLATION")
75  self.is_canceling = False
76 
77  def audio_cb(self, msg):
78  if self.is_canceling:
79  rospy.loginfo("Speech is cancelled")
80  return
81  data = SR.AudioData(msg.data, self.sample_rate, self.sample_width)
82  try:
83  rospy.loginfo("Waiting for result %d" % len(data.get_raw_data()))
84  result = self.recognizer.recognize_google(
85  data, language=self.language)
86  msg = SpeechRecognitionCandidates(transcript=[result])
87  self.pub_speech.publish(msg)
88  except SR.UnknownValueError as e:
89  rospy.logerr("Failed to recognize: %s" % str(e))
90  except SR.RequestError as e:
91  rospy.logerr("Failed to recognize: %s" % str(e))
92 
93 
94 if __name__ == '__main__':
95  rospy.init_node("speech_to_text")
96  stt = SpeechToText()
97  rospy.spin()
def tts_timer_cb(self, event)


respeaker_ros
Author(s): Yuki Furuta
autogenerated on Tue May 11 2021 02:55:46