Go to the documentation of this file.00001
00002
00003
00004
00005 import actionlib
00006 import rospy
00007 try:
00008 import speech_recognition as SR
00009 except ImportError as e:
00010 raise ImportError(str(e) + '\nplease try "pip install speechrecognition"')
00011
00012 from actionlib_msgs.msg import GoalStatus, GoalStatusArray
00013 from audio_common_msgs.msg import AudioData
00014 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
00015 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
00016
00017
00018 class SpeechToText(object):
00019 def __init__(self):
00020
00021 self.sample_rate = rospy.get_param("~sample_rate", 16000)
00022 self.sample_width = rospy.get_param("~sample_width", 2L)
00023
00024 self.language = rospy.get_param("~language", "ja-JP")
00025
00026 self.self_cancellation = rospy.get_param("~self_cancellation", True)
00027
00028 self.tts_tolerance = rospy.Duration.from_sec(
00029 rospy.get_param("~tts_tolerance", 1.0))
00030 tts_action_names = rospy.get_param(
00031 '~tts_action_names', ['soundplay'])
00032
00033 self.recognizer = SR.Recognizer()
00034
00035 self.tts_action = None
00036 self.last_tts = None
00037 self.is_canceling = False
00038 self.tts_actions = []
00039 if self.self_cancellation:
00040 for tts_action_name in tts_action_names:
00041 tts_action = actionlib.SimpleActionClient(
00042 tts_action_name, SoundRequestAction)
00043 if tts_action.wait_for_server(rospy.Duration(5.0)):
00044 self.tts_actions.append(tts_action)
00045 else:
00046 rospy.logerr(
00047 "action '{}' is not initialized."
00048 .format(tts_action_name))
00049 self.tts_timer = rospy.Timer(rospy.Duration(0.1), self.tts_timer_cb)
00050
00051 self.pub_speech = rospy.Publisher(
00052 "speech_to_text", SpeechRecognitionCandidates, queue_size=1)
00053 self.sub_audio = rospy.Subscriber("audio", AudioData, self.audio_cb)
00054
00055 def tts_timer_cb(self, event):
00056 stamp = event.current_real
00057 active = False
00058 for tts_action in self.tts_actions:
00059 for st in tts_action.action_client.last_status_msg.status_list:
00060 if st.status == GoalStatus.ACTIVE:
00061 active = True
00062 break
00063 if active:
00064 break
00065 if active:
00066 if not self.is_canceling:
00067 rospy.logdebug("START CANCELLATION")
00068 self.is_canceling = True
00069 self.last_tts = None
00070 elif self.is_canceling:
00071 if self.last_tts is None:
00072 self.last_tts = stamp
00073 if stamp - self.last_tts > self.tts_tolerance:
00074 rospy.logdebug("END CANCELLATION")
00075 self.is_canceling = False
00076
00077 def audio_cb(self, msg):
00078 if self.is_canceling:
00079 rospy.loginfo("Speech is cancelled")
00080 return
00081 data = SR.AudioData(msg.data, self.sample_rate, self.sample_width)
00082 try:
00083 rospy.loginfo("Waiting for result %d" % len(data.get_raw_data()))
00084 result = self.recognizer.recognize_google(
00085 data, language=self.language)
00086 msg = SpeechRecognitionCandidates(transcript=[result])
00087 self.pub_speech.publish(msg)
00088 except SR.UnknownValueError as e:
00089 rospy.logerr("Failed to recognize: %s" % str(e))
00090 except SR.RequestError as e:
00091 rospy.logerr("Failed to recognize: %s" % str(e))
00092
00093
00094 if __name__ == '__main__':
00095 rospy.init_node("speech_to_text")
00096 stt = SpeechToText()
00097 rospy.spin()