6 from threading
import Lock
10 from sound_play.msg
import SoundRequest, SoundRequestAction, SoundRequestGoal
17 from speech_recognition_msgs.msg
import Grammar
18 from speech_recognition_msgs.msg
import SpeechRecognitionCandidates
19 from speech_recognition_msgs.msg
import Vocabulary
20 from speech_recognition_msgs.srv
import SpeechRecognition
21 from speech_recognition_msgs.srv
import SpeechRecognitionResponse
22 from std_srvs.srv
import Empty, EmptyResponse
27 start_signal =
"/usr/share/sounds/ubuntu/stereo/bell.ogg" 28 success_signal =
"/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg" 29 timeout_signal =
"/usr/share/sounds/ubuntu/stereo/window-slide.ogg" 33 self.
encoding = rospy.get_param(
"~encoding",
"utf-8")
40 SpeechRecognitionCandidates,
45 if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
46 rospy.logwarn(
"Failed to find sound_play action. Disabled audio alert")
50 host = rospy.get_param(
"~host",
"localhost")
51 module_port = rospy.get_param(
"~module_port", 10500)
52 audio_port = rospy.get_param(
"~audio_port", 10501)
53 max_retry = rospy.get_param(
"~max_connection_retry", 0)
59 self.module.on_received_data(self.
julius_cb)
87 self.module.send_command([
"INPUTONCHANGE",
"PAUSE"])
90 self.module.send_command([
"VERSION"])
91 self.module.send_command([
"STATUS"])
92 self.module.send_command([
"GRAMINFO"])
93 return EmptyResponse()
99 req.sound = SoundRequest.PLAY_FILE
100 req.command = SoundRequest.PLAY_ONCE
101 if hasattr(SoundRequest,
'volume'):
104 goal = SoundRequestGoal(sound_request=req)
105 self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
109 self.module.send_command(cmd)
116 cmd = [
"ACTIVATEGRAM", name]
117 self.module.send_command(cmd)
120 rospy.loginfo(
"Successfully activated grammar")
122 rospy.logerr(
"Failed to activate grammar")
127 cmd = [
"DEACTIVATEGRAM", name]
128 rospy.loginfo(
"Deactivating %s" % name)
129 self.module.send_command(cmd)
132 rospy.loginfo(
"Successfully deactivated grammar")
134 rospy.logerr(
"Failed to deactivate grammar")
142 cmd = [
"%s %s" % (cmd_name, name)]
144 cmd += [
' ' + l.strip()
for l
in dfa
if l.strip()]
146 cmd += [
' ' + l.strip()
for l
in dic
if l.strip()]
150 rospy.loginfo(
"%s Success" % cmd_name)
152 rospy.logerr(
"%s Failed" % cmd_name)
156 ok = self.
do_gram(
"ADDGRAM", name, dfa, dic)
161 return self.
do_gram(
"CHANGEGRAM", name, dfa, dic)
167 rospy.logwarn(
"Name of grammar is empty. Use 'unknown'")
173 rospy.logerr(
"Failed to make dfa from grammar message")
176 ok = self.
add_gram(name, dfa.split(os.linesep), dic.split(os.linesep))
178 self.
vocabularies[name] = list(set(e
for v
in msg.vocabularies
for e
in v.words))
180 rospy.logerr(
"Failed to change vocabulary")
186 rospy.logwarn(
"Name of grammar is empty. Use 'unknown'")
188 if len(msg.phonemes) == 0
or not msg.phonemes[0]:
191 phonemes = msg.phonemes
192 dic = [
" %s\t%s" % (w, p)
for w, p
in zip(msg.words, phonemes)]
197 rospy.logerr(
"Failed to change vocabulary")
200 res = SpeechRecognitionResponse()
207 rospy.logerr(
"failed to activate grammar %s" % req.grammar_name)
211 elif req.grammar.rules:
219 rospy.logerr(
"Failed to make dfa from grammar message")
222 ok = self.
change_gram(g.name, dfa.split(os.linesep), dic.split(os.linesep))
224 rospy.logerr(
"Failed to change grammar")
226 self.
vocabularies[g.name] = list(set(e
for v
in msg.vocabularies
for e
in v.words))
228 elif req.vocabulary.words:
232 if len(v.phonemes) == 0
or not v.phonemes[0]:
234 dic = [
" %s\t%s" % (w, p)
for w, p
in zip(v.words, v.phonemes)]
237 rospy.logerr(
"Failed to change vocabulary")
242 rospy.logerr(
"Invalid request: 'grammar_name', 'grammar' or 'vocabulary' must be filled")
245 duration = req.duration
249 threshold = req.threshold
250 if threshold <= 0.0
or threshold > 1.0:
255 start_time = rospy.Time.now()
257 while (rospy.Time.now() - start_time).to_sec() < duration:
260 if not self.last_speech.transcript:
263 ok = speech.transcript[0]
in candidate_words
and speech.confidence[0] >= threshold
265 ok = speech.confidence[0] >= threshold
267 t0 = speech.transcript[0]
268 c0 = speech.confidence[0]
269 rospy.loginfo(
"Recognized %s (%f)..." % (t0, c0))
276 rospy.logerr(
"Timed out")
283 for shypo
in data.xpath(
"//SHYPO"):
286 for whypo
in shypo.xpath(
"./WHYPO"):
287 if sys.version_info.major < 3:
288 word = whypo.attrib[
"WORD"].encode(self.
encoding)
290 word = whypo.attrib[
"WORD"]
291 if word.startswith(
"<"):
293 transcript.append(word)
294 confidence += float(whypo.attrib[
"CM"])
295 confidence /= len(transcript)
296 transcript =
" ".join(transcript)
297 results[confidence] = transcript
299 msg = SpeechRecognitionCandidates()
300 debug_str = [
"Recognized:"]
301 for i, result
in enumerate(sorted(results.items(), reverse=
True)):
303 debug_str += [
"%d: %s (%.2f)" % (i+1, t, c)]
304 msg.transcript.append(t)
305 msg.confidence.append(c)
306 self.pub_speech_recognition.publish(msg)
308 rospy.logdebug(os.linesep.join(debug_str))
315 status, detail = data
316 rospy.logdebug(
"status: %s" % status)
317 rospy.logdebug(
"detail: %s" % lxml.etree.tostring(detail))
318 if status ==
'ENGINEINFO':
319 version = detail.attrib[
"VERSION"]
320 conf = detail.attrib[
"CONF"]
321 rospy.loginfo(
"Version: %s (%s)" % (version, conf))
322 elif status ==
'SYSINFO':
323 rospy.loginfo(
"Status: %s" % detail.attrib[
"PROCESS"])
324 elif status ==
'GRAMINFO':
325 rospy.loginfo(
"Grammar Information:\n%s" % detail.text.strip())
326 elif status ==
'STARTPROC':
327 rospy.loginfo(
"Julius Engine initialized")
328 elif status ==
'ENDPROC':
329 rospy.loginfo(
"Julius Engine stopped")
330 elif status ==
'STARTRECOG':
331 rospy.logdebug(
"Start Recognize")
332 elif status ==
'ENDRECOG':
333 rospy.logdebug(
"End Recognize")
334 elif status ==
'RECOGFAIL':
335 rospy.logerr(
"Recognition Failed")
336 elif status ==
'RECOGOUT':
338 elif status ==
'INPUT':
339 substat = detail.attrib[
"STATUS"]
340 if substat ==
'STARTREC':
341 rospy.logdebug(
"Detect speak start")
342 elif substat ==
'ENDREC':
343 rospy.logdebug(
"Detect speak end")
344 elif status ==
'INPUTPARAM':
345 input_frame = int(detail.attrib[
"FRAMES"])
346 if input_frame >= 2000:
347 rospy.logwarn(
"Audio segment is too long!! Please volume down your microphone.")
348 elif status ==
'GRAMMAR':
349 substat = detail.attrib[
"STATUS"]
350 if substat ==
'RECEIVED':
352 elif substat ==
"ERROR":
353 reason = detail.attrib[
"REASON"]
354 rospy.logerr(
"Failed to change grammar: %s" % reason)
357 rospy.logwarn(
"Received %s" % status)
358 rospy.logwarn(
"%s", lxml.etree.tostring(detail))
360 if __name__ ==
'__main__':
361 rospy.init_node(
"julius_client")
def grammar_cb(self, msg)
def change_gram(self, name, dfa, dic)
def make_phonemes_from_words(words)
def deactivate_gram(self, name)
def speech_recognition_cb(self, req)
def do_gram(self, cmd_name, name, dfa, dic)
def add_gram(self, name, dfa, dic)
def make_dfa(grammar, voca)
def process_result(self, data)
def make_grammar_from_rules(rules)
def send_grammar_cmd(self, cmd)
start_signal_action_timeout
def vocabulary_cb(self, msg)
def status(self, args=None)
def julius_cb(self, data)
def activate_gram(self, name)
def play_sound(self, path, timeout=5.0)
def make_voca_from_categories(cats, vocas)