7 import speech_recognition
as SR
9 from threading
import Lock
11 from audio_common_msgs.msg
import AudioData
12 from sound_play.msg
import SoundRequest, SoundRequestAction, SoundRequestGoal
14 from speech_recognition_msgs.msg
import SpeechRecognitionCandidates
15 from speech_recognition_msgs.srv
import SpeechRecognition
16 from speech_recognition_msgs.srv
import SpeechRecognitionResponse
18 from dynamic_reconfigure.server
import Server
19 from ros_speech_recognition.cfg
import SpeechRecognitionConfig
as Config
23 def __init__(self, topic_name="audio", depth=16, sample_rate=16000, chunk_size=1024, buffer_size=10240):
24 assert buffer_size > chunk_size
36 raise ValueError(
"depth must be 8, 16 or 32")
44 if self.
stream is not None:
57 def __exit__(self, exc_type, exc_value, traceback):
61 def __init__(self, topic_name, buffer_size=10240):
66 topic_name, AudioData, self.
audio_cb)
75 while not rospy.is_shutdown()
and len(self.
buffer) < size:
81 self.sub_audio.unregister()
88 self.
buffer += bytes(msg.data)
100 depth=rospy.get_param(
"~depth", 16),
101 sample_rate=rospy.get_param(
"~sample_rate", 16000))
105 if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
106 rospy.logwarn(
"Failed to find sound_play action. Disabled audio alert")
109 "start": rospy.get_param(
"~start_signal",
110 "/usr/share/sounds/ubuntu/stereo/bell.ogg"),
111 "recognized": rospy.get_param(
"~recognized_signal",
112 "/usr/share/sounds/ubuntu/stereo/button-toggle-on.ogg"),
113 "success": rospy.get_param(
"~success_signal",
114 "/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg"),
115 "timeout": rospy.get_param(
"~timeout_signal",
116 "/usr/share/sounds/ubuntu/stereo/window-slide.ogg"),
124 rospy.loginfo(
"Enabled continuous mode")
125 self.
pub = rospy.Publisher(
"/Tablet/voice",
126 SpeechRecognitionCandidates,
129 self.
srv = rospy.Service(
"speech_recognition",
136 if self.
engine != config.engine:
138 self.
engine = config.engine
143 config.energy_threshold = self.recognizer.energy_threshold
145 self.recognizer.energy_threshold = config.energy_threshold
146 self.recognizer.dynamic_energy_adjustment_damping = config.dynamic_energy_adjustment_damping
147 self.recognizer.dynamic_energy_ratio = config.dynamic_energy_ratio
150 if config.listen_timeout > 0.0:
154 if config.phrase_time_limit > 0.0:
158 if config.operation_timeout > 0.0:
159 self.recognizer.operation_timeout = config.operation_timeout
161 self.recognizer.operation_timeout =
None 164 if config.pause_threshold < config.non_speaking_duration:
165 config.pause_threshold = config.non_speaking_duration
166 self.recognizer.pause_threshold = config.pause_threshold
167 self.recognizer.non_speaking_duration = config.non_speaking_duration
168 self.recognizer.phrase_threshold = config.phrase_threshold
176 req.sound = SoundRequest.PLAY_FILE
177 req.command = SoundRequest.PLAY_ONCE
179 goal = SoundRequestGoal(sound_request=req)
180 self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
184 if self.
engine == Config.SpeechRecognition_Google:
186 self.
args = {
'key': rospy.get_param(
"~google_key",
None)}
187 recog_func = self.recognizer.recognize_google
188 elif self.
engine == Config.SpeechRecognition_GoogleCloud:
190 credentials_path = rospy.get_param(
"~google_cloud_credentials_json",
None)
191 if credentials_path
is not None:
192 with open(credentials_path)
as j:
193 credentials_json = j.read()
195 credentials_json =
None 196 self.
args = {
'credentials_json': credentials_json,
197 'preferred_phrases': rospy.get_param(
'~google_cloud_preferred_phrases',
None)}
198 recog_func = self.recognizer.recognize_google_cloud
199 elif self.
engine == Config.SpeechRecognition_Sphinx:
200 recog_func = self.recognizer.recognize_sphinx
201 elif self.
engine == Config.SpeechRecognition_Wit:
202 recog_func = self.recognizer.recognize_wit
203 elif self.
engine == Config.SpeechRecognition_Bing:
205 self.
args = {
'key': rospy.get_param(
"~bing_key")}
206 recog_func = self.recognizer.recognize_bing
207 elif self.
engine == Config.SpeechRecognition_Houndify:
208 recog_func = self.recognizer.recognize_houndify
209 elif self.
engine == Config.SpeechRecognition_IBM:
210 recog_func = self.recognizer.recognize_ibm
212 return recog_func(audio_data=audio, language=self.
language, **self.
args)
216 rospy.logdebug(
"Waiting for result... (Sent %d bytes)" % len(audio.get_raw_data()))
218 rospy.loginfo(
"Result: %s" % result.encode(
'utf-8'))
219 msg = SpeechRecognitionCandidates(transcript=[result])
220 self.pub.publish(msg)
221 except SR.UnknownValueError
as e:
223 self.recognizer.adjust_for_ambient_noise(self.
audio)
224 rospy.loginfo(
"Updated energy threshold to %f" % self.recognizer.energy_threshold)
225 except SR.RequestError
as e:
226 rospy.logerr(
"Failed to recognize: %s" % str(e))
230 with self.
audio as src:
231 self.recognizer.adjust_for_ambient_noise(src)
232 rospy.loginfo(
"Set minimum energy threshold to {}".format(
233 self.recognizer.energy_threshold))
234 self.
stop_fn = self.recognizer.listen_in_background(
243 res = SpeechRecognitionResponse()
245 duration = req.duration
249 with self.
audio as src:
251 self.recognizer.adjust_for_ambient_noise(src)
252 rospy.loginfo(
"Set minimum energy threshold to %f" % self.recognizer.energy_threshold)
257 start_time = rospy.Time.now()
258 while (rospy.Time.now() - start_time).to_sec() < duration:
259 rospy.loginfo(
"Waiting for speech...")
261 audio = self.recognizer.listen(
263 except SR.WaitTimeoutError
as e:
268 rospy.loginfo(
"Waiting for result... (Sent %d bytes)" % len(audio.get_raw_data()))
272 rospy.loginfo(
"Result: %s" % result.encode(
'utf-8'))
275 res.result = SpeechRecognitionCandidates(transcript=[result])
277 except SR.UnknownValueError:
279 self.recognizer.adjust_for_ambient_noise(src)
280 rospy.loginfo(
"Set minimum energy threshold to %f" % self.recognizer.energy_threshold)
281 except SR.RequestError
as e:
282 rospy.logerr(
"Failed to recognize: %s" % str(e))
284 if rospy.is_shutdown():
298 if __name__ ==
'__main__':
299 rospy.init_node(
"speech_recognition")
def start_speech_recognition(self)
def play_sound(self, key, timeout=5.0)
def __exit__(self, exc_type, exc_value, traceback)
def speech_recognition_srv_cb(self, req)
def __init__(self, topic_name, buffer_size=10240)
def config_callback(self, config, level)
def read_once(self, size)
def audio_cb(self, _, audio)
def __init__(self, topic_name="audio", depth=16, sample_rate=16000, chunk_size=1024, buffer_size=10240)
def recognize(self, audio)