7 import speech_recognition
as SR
12 from threading
import Lock
14 from audio_common_msgs.msg
import AudioData
15 from sound_play.msg
import SoundRequest, SoundRequestAction, SoundRequestGoal
17 from speech_recognition_msgs.msg
import SpeechRecognitionCandidates
18 from speech_recognition_msgs.srv
import SpeechRecognition
19 from speech_recognition_msgs.srv
import SpeechRecognitionResponse
21 from dynamic_reconfigure.server
import Server
22 from ros_speech_recognition.cfg
import SpeechRecognitionConfig
as Config
26 def __init__(self, topic_name="audio", depth=16, n_channel=1,
27 sample_rate=16000, chunk_size=1024, buffer_size=10240):
28 assert buffer_size > chunk_size
40 raise ValueError(
"depth must be 8, 16 or 32")
49 if self.
stream is not None:
64 def __exit__(self, exc_type, exc_value, traceback):
68 def __init__(self, topic_name, buffer_size=10240, depth=16,
69 n_channel=1, target_channel=0):
77 topic_name, AudioData, self.
audio_cb)
79 for code
in [
'b',
'h',
'i',
'l']:
80 self.
type_code[array.array(code).itemsize] = code
89 while not rospy.is_shutdown()
and len(self.
buffer) < size:
95 self.sub_audio.unregister()
104 data = array.array(dtype, bytes(msg.data)).tolist()
106 self.
buffer += array.array(dtype, chan_data).tostring()
118 depth=rospy.get_param(
"~depth", 16),
119 n_channel=rospy.get_param(
"~n_channel", 1),
120 sample_rate=rospy.get_param(
"~sample_rate", 16000),
121 buffer_size=rospy.get_param(
"~buffer_size", 10240))
125 if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
126 rospy.logwarn(
"Failed to find sound_play action. Disabled audio alert")
129 "start": rospy.get_param(
"~start_signal",
130 "/usr/share/sounds/ubuntu/stereo/bell.ogg"),
131 "recognized": rospy.get_param(
"~recognized_signal",
132 "/usr/share/sounds/ubuntu/stereo/button-toggle-on.ogg"),
133 "success": rospy.get_param(
"~success_signal",
134 "/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg"),
135 "timeout": rospy.get_param(
"~timeout_signal",
136 "/usr/share/sounds/ubuntu/stereo/window-slide.ogg"),
144 rospy.loginfo(
"Enabled continuous mode")
145 self.
pub = rospy.Publisher(
"/Tablet/voice",
146 SpeechRecognitionCandidates,
149 self.
srv = rospy.Service(
"speech_recognition",
156 if self.
engine != config.engine:
158 self.
engine = config.engine
163 config.energy_threshold = self.recognizer.energy_threshold
165 self.recognizer.energy_threshold = config.energy_threshold
166 self.recognizer.dynamic_energy_adjustment_damping = config.dynamic_energy_adjustment_damping
167 self.recognizer.dynamic_energy_ratio = config.dynamic_energy_ratio
170 if config.listen_timeout > 0.0:
174 if config.phrase_time_limit > 0.0:
178 if config.operation_timeout > 0.0:
179 self.recognizer.operation_timeout = config.operation_timeout
181 self.recognizer.operation_timeout =
None 184 if config.pause_threshold < config.non_speaking_duration:
185 config.pause_threshold = config.non_speaking_duration
186 self.recognizer.pause_threshold = config.pause_threshold
187 self.recognizer.non_speaking_duration = config.non_speaking_duration
188 self.recognizer.phrase_threshold = config.phrase_threshold
196 req.sound = SoundRequest.PLAY_FILE
197 req.command = SoundRequest.PLAY_ONCE
198 if hasattr(SoundRequest,
'volume'):
201 goal = SoundRequestGoal(sound_request=req)
202 self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
206 if self.
engine == Config.SpeechRecognition_Google:
208 self.
args = {
'key': rospy.get_param(
"~google_key",
None)}
209 recog_func = self.recognizer.recognize_google
210 elif self.
engine == Config.SpeechRecognition_GoogleCloud:
212 credentials_path = rospy.get_param(
"~google_cloud_credentials_json",
None)
213 if credentials_path
is not None:
214 with open(credentials_path)
as j:
215 credentials_json = j.read()
217 credentials_json =
None 218 self.
args = {
'credentials_json': credentials_json,
219 'preferred_phrases': rospy.get_param(
'~google_cloud_preferred_phrases',
None)}
220 if rospy.has_param(
'~diarizationConfig') :
221 self.args.update({
'user_config': {
'diarizationConfig': rospy.get_param(
'~diarizationConfig') }})
222 recog_func = self.recognizer.recognize_google_cloud
223 elif self.
engine == Config.SpeechRecognition_Sphinx:
224 recog_func = self.recognizer.recognize_sphinx
225 elif self.
engine == Config.SpeechRecognition_Wit:
226 recog_func = self.recognizer.recognize_wit
227 elif self.
engine == Config.SpeechRecognition_Bing:
229 self.
args = {
'key': rospy.get_param(
"~bing_key")}
230 recog_func = self.recognizer.recognize_bing
231 elif self.
engine == Config.SpeechRecognition_Houndify:
232 recog_func = self.recognizer.recognize_houndify
233 elif self.
engine == Config.SpeechRecognition_IBM:
234 recog_func = self.recognizer.recognize_ibm
236 return recog_func(audio_data=audio, language=self.
language, **self.
args)
240 rospy.logdebug(
"Waiting for result... (Sent %d bytes)" % len(audio.get_raw_data()))
242 rospy.loginfo(
"Result: %s" % result.encode(
'utf-8'))
243 msg = SpeechRecognitionCandidates(transcript=[result])
244 self.pub.publish(msg)
245 except SR.UnknownValueError
as e:
247 self.recognizer.adjust_for_ambient_noise(self.
audio)
248 rospy.loginfo(
"Updated energy threshold to %f" % self.recognizer.energy_threshold)
249 except SR.RequestError
as e:
250 rospy.logerr(
"Failed to recognize: %s" % str(e))
251 except SR.UnknownValueError
as e:
252 rospy.logerr(
"Failed to recognize: %s" % str(e))
254 rospy.logerr(
"Unexpected error: %s" % str(sys.exc_info()))
258 with self.
audio as src:
259 self.recognizer.adjust_for_ambient_noise(src)
260 rospy.loginfo(
"Set minimum energy threshold to {}".format(
261 self.recognizer.energy_threshold))
262 self.
stop_fn = self.recognizer.listen_in_background(
271 res = SpeechRecognitionResponse()
273 duration = req.duration
277 with self.
audio as src:
279 self.recognizer.adjust_for_ambient_noise(src)
280 rospy.loginfo(
"Set minimum energy threshold to %f" % self.recognizer.energy_threshold)
285 start_time = rospy.Time.now()
286 while (rospy.Time.now() - start_time).to_sec() < duration:
287 rospy.loginfo(
"Waiting for speech...")
289 audio = self.recognizer.listen(
291 except SR.WaitTimeoutError
as e:
296 rospy.loginfo(
"Waiting for result... (Sent %d bytes)" % len(audio.get_raw_data()))
300 rospy.loginfo(
"Result: %s" % result.encode(
'utf-8'))
303 res.result = SpeechRecognitionCandidates(transcript=[result])
305 except SR.UnknownValueError:
307 self.recognizer.adjust_for_ambient_noise(src)
308 rospy.loginfo(
"Set minimum energy threshold to %f" % self.recognizer.energy_threshold)
309 except SR.RequestError
as e:
310 rospy.logerr(
"Failed to recognize: %s" % str(e))
312 if rospy.is_shutdown():
326 if __name__ ==
'__main__':
327 rospy.init_node(
"speech_recognition")
def start_speech_recognition(self)
def play_sound(self, key, timeout=5.0)
def __exit__(self, exc_type, exc_value, traceback)
def speech_recognition_srv_cb(self, req)
def config_callback(self, config, level)
def read_once(self, size)
def audio_cb(self, _, audio)
def __init__(self, topic_name="audio", depth=16, n_channel=1, sample_rate=16000, chunk_size=1024, buffer_size=10240)
def recognize(self, audio)
def __init__(self, topic_name, buffer_size=10240, depth=16, n_channel=1, target_channel=0)