00001
00002
00003
00004
00005 import actionlib
00006 from threading import Lock
00007 import os
00008 import rospy
00009 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
00010 from julius_ros.utils import make_phonemes_from_words
00011 from julius_ros.utils import make_grammar_from_rules
00012 from julius_ros.utils import make_voca_from_categories
00013 from julius_ros.utils import make_dfa
00014 from julius_ros.audio_transport import AudioTransport
00015 from julius_ros.module_client import ModuleClient
00016 from speech_recognition_msgs.msg import Grammar
00017 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
00018 from speech_recognition_msgs.msg import Vocabulary
00019 from speech_recognition_msgs.srv import SpeechRecognition
00020 from speech_recognition_msgs.srv import SpeechRecognitionResponse
00021 from std_srvs.srv import Empty, EmptyResponse
00022 import lxml.etree
00023
00024
00025 class JuliusClient(object):
00026 start_signal = "/usr/share/sounds/ubuntu/stereo/bell.ogg"
00027 success_signal = "/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg"
00028 timeout_signal = "/usr/share/sounds/ubuntu/stereo/window-slide.ogg"
00029
00030 def __init__(self):
00031
00032 self.encoding = rospy.get_param("~encoding", "utf-8")
00033 self.default_duration = rospy.get_param("~duration", 3.0)
00034 self.default_threshold = rospy.get_param("~threshold", 0.8)
00035 self.use_isolated_word = rospy.get_param("~use_isolated_word", True)
00036 self.start_signal_action_timeout = rospy.get_param("~start_signal_action_timeout", 0.3)
00037
00038 self.pub_speech_recognition = rospy.Publisher("speech_to_text",
00039 SpeechRecognitionCandidates,
00040 queue_size=1)
00041
00042
00043 self.act_sound = actionlib.SimpleActionClient("sound_play", SoundRequestAction)
00044 if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
00045 rospy.logwarn("Failed to find sound_play action. Disabled audio alert")
00046 self.act_sound = None
00047
00048
00049 host = rospy.get_param("~host", "localhost")
00050 module_port = rospy.get_param("~module_port", 10500)
00051 audio_port = rospy.get_param("~audio_port", 10501)
00052 max_retry = rospy.get_param("~max_connection_retry", 0)
00053
00054 self.module = ModuleClient(host, module_port, max_retry, self.encoding)
00055 self.audio = AudioTransport(host, audio_port, max_retry, "audio")
00056
00057 rospy.on_shutdown(self.shutdown_cb)
00058 self.module.on_received_data(self.julius_cb)
00059
00060 self.module.start()
00061 self.audio.start()
00062
00063 self.status()
00064 self.start()
00065
00066
00067 self.lock = Lock()
00068 self.last_speech = SpeechRecognitionCandidates()
00069 self.vocabularies = {}
00070
00071 self.srv_show_status = rospy.Service("show_julius_status",
00072 Empty, self.status)
00073
00074 if self.use_isolated_word:
00075 self.sub_vocabulary = rospy.Subscriber("vocabulary", Vocabulary,
00076 self.vocabulary_cb)
00077 else:
00078 self.sub_grammar = rospy.Subscriber("grammar", Grammar,
00079 self.grammar_cb)
00080 self.srv_speech_recognition = rospy.Service("speech_recognition",
00081 SpeechRecognition,
00082 self.speech_recognition_cb)
00083
00084 def start(self):
00085 self.grammar_changed = None
00086 self.module.send_command(["INPUTONCHANGE", "PAUSE"])
00087
00088 def status(self, args=None):
00089 self.module.send_command(["VERSION"])
00090 self.module.send_command(["STATUS"])
00091 self.module.send_command(["GRAMINFO"])
00092 return EmptyResponse()
00093
00094 def play_sound(self, path, timeout=5.0):
00095 if self.act_sound is None:
00096 return
00097 req = SoundRequest()
00098 req.sound = SoundRequest.PLAY_FILE
00099 req.command = SoundRequest.PLAY_ONCE
00100 req.arg = path
00101 goal = SoundRequestGoal(sound_request=req)
00102 self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
00103
00104 def send_grammar_cmd(self, cmd):
00105 self.grammar_changed = None
00106 self.module.send_command(cmd)
00107 while self.grammar_changed is None:
00108 rospy.sleep(0.01)
00109 return self.grammar_changed
00110
00111 def activate_gram(self, name):
00112 with self.lock:
00113 cmd = ["ACTIVATEGRAM", name]
00114 self.module.send_command(cmd)
00115 ok = True
00116 if ok:
00117 rospy.loginfo("Successfully activated grammar")
00118 else:
00119 rospy.logerr("Failed to activate grammar")
00120 return ok
00121
00122 def deactivate_gram(self, name):
00123 with self.lock:
00124 cmd = ["DEACTIVATEGRAM", name]
00125 rospy.loginfo("Deactivating %s" % name)
00126 self.module.send_command(cmd)
00127 ok = True
00128 if ok:
00129 rospy.loginfo("Successfully deactivated grammar")
00130 else:
00131 rospy.logerr("Failed to deactivate grammar")
00132 return ok
00133
00134 def do_gram(self, cmd_name, name, dfa, dic):
00135 with self.lock:
00136 assert cmd_name
00137 assert name
00138 assert dic
00139 cmd = ["%s %s" % (cmd_name, name)]
00140 if dfa is not None:
00141 cmd += [' ' + l.strip() for l in dfa if l.strip()]
00142 cmd += ["DFAEND"]
00143 cmd += [' ' + l.strip() for l in dic if l.strip()]
00144 cmd += ["DICEND"]
00145 ok = self.send_grammar_cmd(cmd)
00146 if ok:
00147 rospy.loginfo("%s Success" % cmd_name)
00148 else:
00149 rospy.logerr("%s Failed" % cmd_name)
00150 return ok
00151
00152 def add_gram(self, name, dfa, dic):
00153 ok = self.do_gram("ADDGRAM", name, dfa, dic)
00154 ok &= self.deactivate_gram(name)
00155 return ok
00156
00157 def change_gram(self, name, dfa, dic):
00158 return self.do_gram("CHANGEGRAM", name, dfa, dic)
00159
00160 def grammar_cb(self, msg):
00161 if msg.name:
00162 name = msg.name
00163 else:
00164 rospy.logwarn("Name of grammar is empty. Use 'unknown'")
00165 name = 'unknown'
00166 grammar = make_grammar_from_rules(msg.rules)
00167 voca = make_voca_from_categories(msg.categories, msg.vocabularies)
00168 result = make_dfa(grammar, voca)
00169 if result is None:
00170 rospy.logerr("Failed to make dfa from grammar message")
00171 return
00172 dfa, dic = result
00173 ok = self.add_gram(name, dfa.split(os.linesep), dic.split(os.linesep))
00174 if ok:
00175 self.vocabularies[name] = list(set(e for v in msg.vocabularies for e in v.words))
00176 else:
00177 rospy.logerr("Failed to change vocabulary")
00178
00179 def vocabulary_cb(self, msg):
00180 if msg.name:
00181 name = msg.name
00182 else:
00183 rospy.logwarn("Name of grammar is empty. Use 'unknown'")
00184 name = 'unknown'
00185 if len(msg.phonemes) == 0 or not msg.phonemes[0]:
00186 phonemes = make_phonemes_from_words(msg.words)
00187 else:
00188 phonemes = msg.phonemes
00189 dic = [" %s\t%s" % (w, p) for w, p in zip(msg.words, phonemes)]
00190 ok = self.add_gram(name, None, dic)
00191 if ok:
00192 self.vocabularies[name] = list(set(msg.words))
00193 else:
00194 rospy.logerr("Failed to change vocabulary")
00195
00196 def speech_recognition_cb(self, req):
00197 res = SpeechRecognitionResponse()
00198
00199
00200 candidate_words = []
00201 if req.grammar_name:
00202 ok = self.activate_gram(req.grammar_name)
00203 if not ok:
00204 rospy.logerr("failed to activate grammar %s" % req.grammar_name)
00205 return res
00206 if req.grammar_name in self.vocabularies:
00207 candidate_words = self.vocabularies[req.grammar_name]
00208 elif req.grammar.rules:
00209 g = req.grammar
00210 if not g.name:
00211 g.name = 'unknown'
00212 grammar = make_grammar_from_rules(g.rules)
00213 voca = make_voca_from_categories(g.categories, g.vocabularies)
00214 result = make_dfa(grammar, voca)
00215 if result is None:
00216 rospy.logerr("Failed to make dfa from grammar message")
00217 return res
00218 dfa, dic = result
00219 ok = self.change_gram(g.name, dfa.split(os.linesep), dic.split(os.linesep))
00220 if not ok:
00221 rospy.logerr("Failed to change grammar")
00222 return res
00223 self.vocabularies[g.name] = list(set(e for v in msg.vocabularies for e in v.words))
00224 candidate_words = self.vocabularies[g.name]
00225 elif req.vocabulary.words:
00226 v = req.vocabulary
00227 if not v.name:
00228 v.name = 'unknown'
00229 if len(v.phonemes) == 0 or not v.phonemes[0]:
00230 v.phonemes = make_phonemes_from_words(v.words)
00231 dic = [" %s\t%s" % (w, p) for w, p in zip(v.words, v.phonemes)]
00232 ok = self.change_gram(v.name, None, dic)
00233 if not ok:
00234 rospy.logerr("Failed to change vocabulary")
00235 return res
00236 self.vocabularies[v.name] = list(set(v.words))
00237 candidate_words = self.vocabularies[v.name]
00238 else:
00239 rospy.logerr("Invalid request: 'grammar_name', 'grammar' or 'vocabulary' must be filled")
00240 return res
00241
00242 duration = req.duration
00243 if duration <= 0.0:
00244 duration = self.default_duration
00245
00246 threshold = req.threshold
00247 if threshold <= 0.0 or threshold > 1.0:
00248 threshold = self.default_threshold
00249
00250 if not req.quiet:
00251 self.play_sound(self.start_signal, self.start_signal_action_timeout)
00252 start_time = rospy.Time.now()
00253 self.last_speech = SpeechRecognitionCandidates()
00254 while (rospy.Time.now() - start_time).to_sec() < duration:
00255 rospy.sleep(0.1)
00256 speech = self.last_speech
00257 if not self.last_speech.transcript:
00258 continue
00259 if candidate_words:
00260 ok = speech.transcript[0] in candidate_words and speech.confidence[0] >= threshold
00261 else:
00262 ok = speech.confidence[0] >= threshold
00263 if ok:
00264 t0 = speech.transcript[0]
00265 c0 = speech.confidence[0]
00266 rospy.loginfo("Recognized %s (%f)..." % (t0, c0))
00267 if not req.quiet:
00268 self.play_sound(self.success_signal, 0.1)
00269 res.result = speech
00270 return res
00271
00272
00273 rospy.logerr("Timed out")
00274 if not req.quiet:
00275 self.play_sound(self.timeout_signal, 0.1)
00276 return res
00277
00278 def process_result(self, data):
00279 results = {}
00280 for shypo in data.xpath("//SHYPO"):
00281 transcript = []
00282 confidence = 0.0
00283 for whypo in shypo.xpath("./WHYPO"):
00284 word = whypo.attrib["WORD"].encode(self.encoding)
00285 if word.startswith("<"):
00286 continue
00287 transcript.append(word)
00288 confidence += float(whypo.attrib["CM"])
00289 confidence /= len(transcript)
00290 transcript = " ".join(transcript)
00291 results[confidence] = transcript
00292
00293 msg = SpeechRecognitionCandidates()
00294 debug_str = ["Recognized:"]
00295 for i, result in enumerate(sorted(results.items(), reverse=True)):
00296 c, t = result
00297 debug_str += ["%d: %s (%.2f)" % (i+1, t, c)]
00298 msg.transcript.append(t)
00299 msg.confidence.append(c)
00300 self.pub_speech_recognition.publish(msg)
00301 self.last_speech = msg
00302 rospy.logdebug(os.linesep.join(debug_str))
00303
00304 def shutdown_cb(self):
00305 self.module.join()
00306 self.audio.join()
00307
00308 def julius_cb(self, data):
00309 status, detail = data
00310 rospy.logdebug("status: %s" % status)
00311 rospy.logdebug("detail: %s" % lxml.etree.tostring(detail))
00312 if status == 'ENGINEINFO':
00313 version = detail.attrib["VERSION"]
00314 conf = detail.attrib["CONF"]
00315 rospy.loginfo("Version: %s (%s)" % (version, conf))
00316 elif status == 'SYSINFO':
00317 rospy.loginfo("Status: %s" % detail.attrib["PROCESS"])
00318 elif status == 'GRAMINFO':
00319 rospy.loginfo("Grammar Information:\n%s" % detail.text.strip())
00320 elif status == 'STARTPROC':
00321 rospy.loginfo("Julius Engine initialized")
00322 elif status == 'ENDPROC':
00323 rospy.loginfo("Julius Engine stopped")
00324 elif status == 'STARTRECOG':
00325 rospy.logdebug("Start Recognize")
00326 elif status == 'ENDRECOG':
00327 rospy.logdebug("End Recognize")
00328 elif status == 'RECOGFAIL':
00329 rospy.logerr("Recognition Failed")
00330 elif status == 'RECOGOUT':
00331 self.process_result(detail)
00332 elif status == 'INPUT':
00333 substat = detail.attrib["STATUS"]
00334 if substat == 'STARTREC':
00335 rospy.logdebug("Detect speak start")
00336 elif substat == 'ENDREC':
00337 rospy.logdebug("Detect speak end")
00338 elif status == 'INPUTPARAM':
00339 input_frame = int(detail.attrib["FRAMES"])
00340 if input_frame >= 2000:
00341 rospy.logwarn("Audio segment is too long!! Please volume down your microphone.")
00342 elif status == 'GRAMMAR':
00343 substat = detail.attrib["STATUS"]
00344 if substat == 'RECEIVED':
00345 self.grammar_changed = True
00346 elif substat == "ERROR":
00347 reason = detail.attrib["REASON"]
00348 rospy.logerr("Failed to change grammar: %s" % reason)
00349 self.grammar_changed = False
00350 else:
00351 rospy.logwarn("Received %s" % status)
00352 rospy.logwarn("%s", lxml.etree.tostring(detail))
00353
00354 if __name__ == '__main__':
00355 rospy.init_node("julius_client")
00356 client = JuliusClient()
00357 rospy.spin()