julius_client.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Copyright: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 import actionlib
6 from threading import Lock
7 import os
8 import rospy
9 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
10 from julius_ros.utils import make_phonemes_from_words
11 from julius_ros.utils import make_grammar_from_rules
12 from julius_ros.utils import make_voca_from_categories
13 from julius_ros.utils import make_dfa
14 from julius_ros.audio_transport import AudioTransport
15 from julius_ros.module_client import ModuleClient
16 from speech_recognition_msgs.msg import Grammar
17 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
18 from speech_recognition_msgs.msg import Vocabulary
19 from speech_recognition_msgs.srv import SpeechRecognition
20 from speech_recognition_msgs.srv import SpeechRecognitionResponse
21 from std_srvs.srv import Empty, EmptyResponse
22 import lxml.etree
23 
24 
25 class JuliusClient(object):
26  start_signal = "/usr/share/sounds/ubuntu/stereo/bell.ogg"
27  success_signal = "/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg"
28  timeout_signal = "/usr/share/sounds/ubuntu/stereo/window-slide.ogg"
29 
30  def __init__(self):
31  # load parameters
32  self.encoding = rospy.get_param("~encoding", "utf-8")
33  self.default_duration = rospy.get_param("~duration", 3.0)
34  self.default_threshold = rospy.get_param("~threshold", 0.8)
35  self.use_isolated_word = rospy.get_param("~use_isolated_word", True)
36  self.start_signal_action_timeout = rospy.get_param("~start_signal_action_timeout", 0.3)
37 
38  self.pub_speech_recognition = rospy.Publisher("speech_to_text",
39  SpeechRecognitionCandidates,
40  queue_size=1)
41 
42  # initialize sound play
43  self.act_sound = actionlib.SimpleActionClient("sound_play", SoundRequestAction)
44  if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
45  rospy.logwarn("Failed to find sound_play action. Disabled audio alert")
46  self.act_sound = None
47 
48  # setup julius
49  host = rospy.get_param("~host", "localhost")
50  module_port = rospy.get_param("~module_port", 10500)
51  audio_port = rospy.get_param("~audio_port", 10501)
52  max_retry = rospy.get_param("~max_connection_retry", 0)
53 
54  self.module = ModuleClient(host, module_port, max_retry, self.encoding)
55  self.audio = AudioTransport(host, audio_port, max_retry, "audio")
56 
57  rospy.on_shutdown(self.shutdown_cb)
58  self.module.on_received_data(self.julius_cb)
59 
60  self.module.start()
61  self.audio.start()
62 
63  self.status()
64  self.start()
65 
66  # start subscribe
67  self.lock = Lock()
68  self.last_speech = SpeechRecognitionCandidates()
69  self.vocabularies = {}
70 
71  self.srv_show_status = rospy.Service("show_julius_status",
72  Empty, self.status)
73 
74  if self.use_isolated_word:
75  self.sub_vocabulary = rospy.Subscriber("vocabulary", Vocabulary,
76  self.vocabulary_cb)
77  else:
78  self.sub_grammar = rospy.Subscriber("grammar", Grammar,
79  self.grammar_cb)
80  self.srv_speech_recognition = rospy.Service("speech_recognition",
81  SpeechRecognition,
83 
84  def start(self):
85  self.grammar_changed = None
86  self.module.send_command(["INPUTONCHANGE", "PAUSE"])
87 
88  def status(self, args=None):
89  self.module.send_command(["VERSION"])
90  self.module.send_command(["STATUS"])
91  self.module.send_command(["GRAMINFO"])
92  return EmptyResponse()
93 
94  def play_sound(self, path, timeout=5.0):
95  if self.act_sound is None:
96  return
97  req = SoundRequest()
98  req.sound = SoundRequest.PLAY_FILE
99  req.command = SoundRequest.PLAY_ONCE
100  req.arg = path
101  goal = SoundRequestGoal(sound_request=req)
102  self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
103 
104  def send_grammar_cmd(self, cmd):
105  self.grammar_changed = None
106  self.module.send_command(cmd)
107  while self.grammar_changed is None:
108  rospy.sleep(0.01)
109  return self.grammar_changed
110 
111  def activate_gram(self, name):
112  with self.lock:
113  cmd = ["ACTIVATEGRAM", name]
114  self.module.send_command(cmd)
115  ok = True
116  if ok:
117  rospy.loginfo("Successfully activated grammar")
118  else:
119  rospy.logerr("Failed to activate grammar")
120  return ok
121 
122  def deactivate_gram(self, name):
123  with self.lock:
124  cmd = ["DEACTIVATEGRAM", name]
125  rospy.loginfo("Deactivating %s" % name)
126  self.module.send_command(cmd)
127  ok = True
128  if ok:
129  rospy.loginfo("Successfully deactivated grammar")
130  else:
131  rospy.logerr("Failed to deactivate grammar")
132  return ok
133 
134  def do_gram(self, cmd_name, name, dfa, dic):
135  with self.lock:
136  assert cmd_name
137  assert name
138  assert dic
139  cmd = ["%s %s" % (cmd_name, name)]
140  if dfa is not None:
141  cmd += [' ' + l.strip() for l in dfa if l.strip()]
142  cmd += ["DFAEND"]
143  cmd += [' ' + l.strip() for l in dic if l.strip()]
144  cmd += ["DICEND"]
145  ok = self.send_grammar_cmd(cmd)
146  if ok:
147  rospy.loginfo("%s Success" % cmd_name)
148  else:
149  rospy.logerr("%s Failed" % cmd_name)
150  return ok
151 
152  def add_gram(self, name, dfa, dic):
153  ok = self.do_gram("ADDGRAM", name, dfa, dic)
154  ok &= self.deactivate_gram(name)
155  return ok
156 
157  def change_gram(self, name, dfa, dic):
158  return self.do_gram("CHANGEGRAM", name, dfa, dic)
159 
160  def grammar_cb(self, msg):
161  if msg.name:
162  name = msg.name
163  else:
164  rospy.logwarn("Name of grammar is empty. Use 'unknown'")
165  name = 'unknown'
166  grammar = make_grammar_from_rules(msg.rules)
167  voca = make_voca_from_categories(msg.categories, msg.vocabularies)
168  result = make_dfa(grammar, voca)
169  if result is None:
170  rospy.logerr("Failed to make dfa from grammar message")
171  return
172  dfa, dic = result
173  ok = self.add_gram(name, dfa.split(os.linesep), dic.split(os.linesep))
174  if ok:
175  self.vocabularies[name] = list(set(e for v in msg.vocabularies for e in v.words))
176  else:
177  rospy.logerr("Failed to change vocabulary")
178 
179  def vocabulary_cb(self, msg):
180  if msg.name:
181  name = msg.name
182  else:
183  rospy.logwarn("Name of grammar is empty. Use 'unknown'")
184  name = 'unknown'
185  if len(msg.phonemes) == 0 or not msg.phonemes[0]:
186  phonemes = make_phonemes_from_words(msg.words)
187  else:
188  phonemes = msg.phonemes
189  dic = [" %s\t%s" % (w, p) for w, p in zip(msg.words, phonemes)]
190  ok = self.add_gram(name, None, dic)
191  if ok:
192  self.vocabularies[name] = list(set(msg.words))
193  else:
194  rospy.logerr("Failed to change vocabulary")
195 
196  def speech_recognition_cb(self, req):
197  res = SpeechRecognitionResponse()
198 
199  # change grammar
200  candidate_words = []
201  if req.grammar_name:
202  ok = self.activate_gram(req.grammar_name)
203  if not ok:
204  rospy.logerr("failed to activate grammar %s" % req.grammar_name)
205  return res
206  if req.grammar_name in self.vocabularies:
207  candidate_words = self.vocabularies[req.grammar_name]
208  elif req.grammar.rules:
209  g = req.grammar
210  if not g.name:
211  g.name = 'unknown'
212  grammar = make_grammar_from_rules(g.rules)
213  voca = make_voca_from_categories(g.categories, g.vocabularies)
214  result = make_dfa(grammar, voca)
215  if result is None:
216  rospy.logerr("Failed to make dfa from grammar message")
217  return res
218  dfa, dic = result
219  ok = self.change_gram(g.name, dfa.split(os.linesep), dic.split(os.linesep))
220  if not ok:
221  rospy.logerr("Failed to change grammar")
222  return res
223  self.vocabularies[g.name] = list(set(e for v in msg.vocabularies for e in v.words))
224  candidate_words = self.vocabularies[g.name]
225  elif req.vocabulary.words:
226  v = req.vocabulary
227  if not v.name:
228  v.name = 'unknown'
229  if len(v.phonemes) == 0 or not v.phonemes[0]:
230  v.phonemes = make_phonemes_from_words(v.words)
231  dic = [" %s\t%s" % (w, p) for w, p in zip(v.words, v.phonemes)]
232  ok = self.change_gram(v.name, None, dic)
233  if not ok:
234  rospy.logerr("Failed to change vocabulary")
235  return res
236  self.vocabularies[v.name] = list(set(v.words))
237  candidate_words = self.vocabularies[v.name]
238  else:
239  rospy.logerr("Invalid request: 'grammar_name', 'grammar' or 'vocabulary' must be filled")
240  return res
241 
242  duration = req.duration
243  if duration <= 0.0:
244  duration = self.default_duration
245 
246  threshold = req.threshold
247  if threshold <= 0.0 or threshold > 1.0:
248  threshold = self.default_threshold
249 
250  if not req.quiet:
252  start_time = rospy.Time.now()
253  self.last_speech = SpeechRecognitionCandidates()
254  while (rospy.Time.now() - start_time).to_sec() < duration:
255  rospy.sleep(0.1)
256  speech = self.last_speech
257  if not self.last_speech.transcript:
258  continue
259  if candidate_words:
260  ok = speech.transcript[0] in candidate_words and speech.confidence[0] >= threshold
261  else:
262  ok = speech.confidence[0] >= threshold
263  if ok:
264  t0 = speech.transcript[0]
265  c0 = speech.confidence[0]
266  rospy.loginfo("Recognized %s (%f)..." % (t0, c0))
267  if not req.quiet:
268  self.play_sound(self.success_signal, 0.1)
269  res.result = speech
270  return res
271 
272  # timeout
273  rospy.logerr("Timed out")
274  if not req.quiet:
275  self.play_sound(self.timeout_signal, 0.1)
276  return res
277 
278  def process_result(self, data):
279  results = {}
280  for shypo in data.xpath("//SHYPO"):
281  transcript = []
282  confidence = 0.0
283  for whypo in shypo.xpath("./WHYPO"):
284  word = whypo.attrib["WORD"].encode(self.encoding)
285  if word.startswith("<"):
286  continue
287  transcript.append(word)
288  confidence += float(whypo.attrib["CM"])
289  confidence /= len(transcript)
290  transcript = " ".join(transcript)
291  results[confidence] = transcript
292 
293  msg = SpeechRecognitionCandidates()
294  debug_str = ["Recognized:"]
295  for i, result in enumerate(sorted(results.items(), reverse=True)):
296  c, t = result
297  debug_str += ["%d: %s (%.2f)" % (i+1, t, c)]
298  msg.transcript.append(t)
299  msg.confidence.append(c)
300  self.pub_speech_recognition.publish(msg)
301  self.last_speech = msg
302  rospy.logdebug(os.linesep.join(debug_str))
303 
304  def shutdown_cb(self):
305  self.module.join()
306  self.audio.join()
307 
308  def julius_cb(self, data):
309  status, detail = data
310  rospy.logdebug("status: %s" % status)
311  rospy.logdebug("detail: %s" % lxml.etree.tostring(detail))
312  if status == 'ENGINEINFO':
313  version = detail.attrib["VERSION"]
314  conf = detail.attrib["CONF"]
315  rospy.loginfo("Version: %s (%s)" % (version, conf))
316  elif status == 'SYSINFO':
317  rospy.loginfo("Status: %s" % detail.attrib["PROCESS"])
318  elif status == 'GRAMINFO':
319  rospy.loginfo("Grammar Information:\n%s" % detail.text.strip())
320  elif status == 'STARTPROC':
321  rospy.loginfo("Julius Engine initialized")
322  elif status == 'ENDPROC':
323  rospy.loginfo("Julius Engine stopped")
324  elif status == 'STARTRECOG':
325  rospy.logdebug("Start Recognize")
326  elif status == 'ENDRECOG':
327  rospy.logdebug("End Recognize")
328  elif status == 'RECOGFAIL':
329  rospy.logerr("Recognition Failed")
330  elif status == 'RECOGOUT':
331  self.process_result(detail)
332  elif status == 'INPUT':
333  substat = detail.attrib["STATUS"]
334  if substat == 'STARTREC':
335  rospy.logdebug("Detect speak start")
336  elif substat == 'ENDREC':
337  rospy.logdebug("Detect speak end")
338  elif status == 'INPUTPARAM':
339  input_frame = int(detail.attrib["FRAMES"])
340  if input_frame >= 2000:
341  rospy.logwarn("Audio segment is too long!! Please volume down your microphone.")
342  elif status == 'GRAMMAR':
343  substat = detail.attrib["STATUS"]
344  if substat == 'RECEIVED':
345  self.grammar_changed = True
346  elif substat == "ERROR":
347  reason = detail.attrib["REASON"]
348  rospy.logerr("Failed to change grammar: %s" % reason)
349  self.grammar_changed = False
350  else:
351  rospy.logwarn("Received %s" % status)
352  rospy.logwarn("%s", lxml.etree.tostring(detail))
353 
354 if __name__ == '__main__':
355  rospy.init_node("julius_client")
356  client = JuliusClient()
357  rospy.spin()
def change_gram(self, name, dfa, dic)
def make_phonemes_from_words(words)
Definition: utils.py:59
def deactivate_gram(self, name)
def speech_recognition_cb(self, req)
def do_gram(self, cmd_name, name, dfa, dic)
def add_gram(self, name, dfa, dic)
def make_dfa(grammar, voca)
Definition: utils.py:110
def process_result(self, data)
def make_grammar_from_rules(rules)
Definition: utils.py:83
def send_grammar_cmd(self, cmd)
def vocabulary_cb(self, msg)
def status(self, args=None)
def activate_gram(self, name)
def play_sound(self, path, timeout=5.0)
def make_voca_from_categories(cats, vocas)
Definition: utils.py:93


julius_ros
Author(s): Yuki Furuta
autogenerated on Wed Jul 10 2019 03:47:05