julius_client.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Copyright: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 import actionlib
6 from threading import Lock
7 import os
8 import sys
9 import rospy
10 from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
11 from julius_ros.utils import make_phonemes_from_words
12 from julius_ros.utils import make_grammar_from_rules
13 from julius_ros.utils import make_voca_from_categories
14 from julius_ros.utils import make_dfa
15 from julius_ros.audio_transport import AudioTransport
16 from julius_ros.module_client import ModuleClient
17 from speech_recognition_msgs.msg import Grammar
18 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
19 from speech_recognition_msgs.msg import Vocabulary
20 from speech_recognition_msgs.srv import SpeechRecognition
21 from speech_recognition_msgs.srv import SpeechRecognitionResponse
22 from std_srvs.srv import Empty, EmptyResponse
23 import lxml.etree
24 
25 
26 class JuliusClient(object):
27  start_signal = "/usr/share/sounds/ubuntu/stereo/bell.ogg"
28  success_signal = "/usr/share/sounds/ubuntu/stereo/message-new-instant.ogg"
29  timeout_signal = "/usr/share/sounds/ubuntu/stereo/window-slide.ogg"
30 
31  def __init__(self):
32  # load parameters
33  self.encoding = rospy.get_param("~encoding", "utf-8")
34  self.default_duration = rospy.get_param("~duration", 3.0)
35  self.default_threshold = rospy.get_param("~threshold", 0.8)
36  self.use_isolated_word = rospy.get_param("~use_isolated_word", True)
37  self.start_signal_action_timeout = rospy.get_param("~start_signal_action_timeout", 0.3)
38 
39  self.pub_speech_recognition = rospy.Publisher("speech_to_text",
40  SpeechRecognitionCandidates,
41  queue_size=1)
42 
43  # initialize sound play
44  self.act_sound = actionlib.SimpleActionClient("sound_play", SoundRequestAction)
45  if not self.act_sound.wait_for_server(rospy.Duration(5.0)):
46  rospy.logwarn("Failed to find sound_play action. Disabled audio alert")
47  self.act_sound = None
48 
49  # setup julius
50  host = rospy.get_param("~host", "localhost")
51  module_port = rospy.get_param("~module_port", 10500)
52  audio_port = rospy.get_param("~audio_port", 10501)
53  max_retry = rospy.get_param("~max_connection_retry", 0)
54 
55  self.module = ModuleClient(host, module_port, max_retry, self.encoding)
56  self.audio = AudioTransport(host, audio_port, max_retry, "audio")
57 
58  rospy.on_shutdown(self.shutdown_cb)
59  self.module.on_received_data(self.julius_cb)
60 
61  self.module.start()
62  self.audio.start()
63 
64  self.status()
65  self.start()
66 
67  # start subscribe
68  self.lock = Lock()
69  self.last_speech = SpeechRecognitionCandidates()
70  self.vocabularies = {}
71 
72  self.srv_show_status = rospy.Service("show_julius_status",
73  Empty, self.status)
74 
75  if self.use_isolated_word:
76  self.sub_vocabulary = rospy.Subscriber("vocabulary", Vocabulary,
77  self.vocabulary_cb)
78  else:
79  self.sub_grammar = rospy.Subscriber("grammar", Grammar,
80  self.grammar_cb)
81  self.srv_speech_recognition = rospy.Service("speech_recognition",
82  SpeechRecognition,
84 
85  def start(self):
86  self.grammar_changed = None
87  self.module.send_command(["INPUTONCHANGE", "PAUSE"])
88 
89  def status(self, args=None):
90  self.module.send_command(["VERSION"])
91  self.module.send_command(["STATUS"])
92  self.module.send_command(["GRAMINFO"])
93  return EmptyResponse()
94 
95  def play_sound(self, path, timeout=5.0):
96  if self.act_sound is None:
97  return
98  req = SoundRequest()
99  req.sound = SoundRequest.PLAY_FILE
100  req.command = SoundRequest.PLAY_ONCE
101  if hasattr(SoundRequest, 'volume'): # volume is added from 0.3.0 https://github.com/ros-drivers/audio_common/commit/da9623414f381642e52f59701c09928c72a54be7#diff-fe2d85580f1ccfed4e23a608df44a7f7
102  req.volume = 1.0
103  req.arg = path
104  goal = SoundRequestGoal(sound_request=req)
105  self.act_sound.send_goal_and_wait(goal, rospy.Duration(timeout))
106 
107  def send_grammar_cmd(self, cmd):
108  self.grammar_changed = None
109  self.module.send_command(cmd)
110  while self.grammar_changed is None:
111  rospy.sleep(0.01)
112  return self.grammar_changed
113 
114  def activate_gram(self, name):
115  with self.lock:
116  cmd = ["ACTIVATEGRAM", name]
117  self.module.send_command(cmd)
118  ok = True
119  if ok:
120  rospy.loginfo("Successfully activated grammar")
121  else:
122  rospy.logerr("Failed to activate grammar")
123  return ok
124 
125  def deactivate_gram(self, name):
126  with self.lock:
127  cmd = ["DEACTIVATEGRAM", name]
128  rospy.loginfo("Deactivating %s" % name)
129  self.module.send_command(cmd)
130  ok = True
131  if ok:
132  rospy.loginfo("Successfully deactivated grammar")
133  else:
134  rospy.logerr("Failed to deactivate grammar")
135  return ok
136 
137  def do_gram(self, cmd_name, name, dfa, dic):
138  with self.lock:
139  assert cmd_name
140  assert name
141  assert dic
142  cmd = ["%s %s" % (cmd_name, name)]
143  if dfa is not None:
144  cmd += [' ' + l.strip() for l in dfa if l.strip()]
145  cmd += ["DFAEND"]
146  cmd += [' ' + l.strip() for l in dic if l.strip()]
147  cmd += ["DICEND"]
148  ok = self.send_grammar_cmd(cmd)
149  if ok:
150  rospy.loginfo("%s Success" % cmd_name)
151  else:
152  rospy.logerr("%s Failed" % cmd_name)
153  return ok
154 
155  def add_gram(self, name, dfa, dic):
156  ok = self.do_gram("ADDGRAM", name, dfa, dic)
157  ok &= self.deactivate_gram(name)
158  return ok
159 
160  def change_gram(self, name, dfa, dic):
161  return self.do_gram("CHANGEGRAM", name, dfa, dic)
162 
163  def grammar_cb(self, msg):
164  if msg.name:
165  name = msg.name
166  else:
167  rospy.logwarn("Name of grammar is empty. Use 'unknown'")
168  name = 'unknown'
169  grammar = make_grammar_from_rules(msg.rules)
170  voca = make_voca_from_categories(msg.categories, msg.vocabularies)
171  result = make_dfa(grammar, voca)
172  if result is None:
173  rospy.logerr("Failed to make dfa from grammar message")
174  return
175  dfa, dic = result
176  ok = self.add_gram(name, dfa.split(os.linesep), dic.split(os.linesep))
177  if ok:
178  self.vocabularies[name] = list(set(e for v in msg.vocabularies for e in v.words))
179  else:
180  rospy.logerr("Failed to change vocabulary")
181 
182  def vocabulary_cb(self, msg):
183  if msg.name:
184  name = msg.name
185  else:
186  rospy.logwarn("Name of grammar is empty. Use 'unknown'")
187  name = 'unknown'
188  if len(msg.phonemes) == 0 or not msg.phonemes[0]:
189  phonemes = make_phonemes_from_words(msg.words)
190  else:
191  phonemes = msg.phonemes
192  dic = [" %s\t%s" % (w, p) for w, p in zip(msg.words, phonemes)]
193  ok = self.add_gram(name, None, dic)
194  if ok:
195  self.vocabularies[name] = list(set(msg.words))
196  else:
197  rospy.logerr("Failed to change vocabulary")
198 
199  def speech_recognition_cb(self, req):
200  res = SpeechRecognitionResponse()
201 
202  # change grammar
203  candidate_words = []
204  if req.grammar_name:
205  ok = self.activate_gram(req.grammar_name)
206  if not ok:
207  rospy.logerr("failed to activate grammar %s" % req.grammar_name)
208  return res
209  if req.grammar_name in self.vocabularies:
210  candidate_words = self.vocabularies[req.grammar_name]
211  elif req.grammar.rules:
212  g = req.grammar
213  if not g.name:
214  g.name = 'unknown'
215  grammar = make_grammar_from_rules(g.rules)
216  voca = make_voca_from_categories(g.categories, g.vocabularies)
217  result = make_dfa(grammar, voca)
218  if result is None:
219  rospy.logerr("Failed to make dfa from grammar message")
220  return res
221  dfa, dic = result
222  ok = self.change_gram(g.name, dfa.split(os.linesep), dic.split(os.linesep))
223  if not ok:
224  rospy.logerr("Failed to change grammar")
225  return res
226  self.vocabularies[g.name] = list(set(e for v in msg.vocabularies for e in v.words))
227  candidate_words = self.vocabularies[g.name]
228  elif req.vocabulary.words:
229  v = req.vocabulary
230  if not v.name:
231  v.name = 'unknown'
232  if len(v.phonemes) == 0 or not v.phonemes[0]:
233  v.phonemes = make_phonemes_from_words(v.words)
234  dic = [" %s\t%s" % (w, p) for w, p in zip(v.words, v.phonemes)]
235  ok = self.change_gram(v.name, None, dic)
236  if not ok:
237  rospy.logerr("Failed to change vocabulary")
238  return res
239  self.vocabularies[v.name] = list(set(v.words))
240  candidate_words = self.vocabularies[v.name]
241  else:
242  rospy.logerr("Invalid request: 'grammar_name', 'grammar' or 'vocabulary' must be filled")
243  return res
244 
245  duration = req.duration
246  if duration <= 0.0:
247  duration = self.default_duration
248 
249  threshold = req.threshold
250  if threshold <= 0.0 or threshold > 1.0:
251  threshold = self.default_threshold
252 
253  if not req.quiet:
255  start_time = rospy.Time.now()
256  self.last_speech = SpeechRecognitionCandidates()
257  while (rospy.Time.now() - start_time).to_sec() < duration:
258  rospy.sleep(0.1)
259  speech = self.last_speech
260  if not self.last_speech.transcript:
261  continue
262  if candidate_words:
263  ok = speech.transcript[0] in candidate_words and speech.confidence[0] >= threshold
264  else:
265  ok = speech.confidence[0] >= threshold
266  if ok:
267  t0 = speech.transcript[0]
268  c0 = speech.confidence[0]
269  rospy.loginfo("Recognized %s (%f)..." % (t0, c0))
270  if not req.quiet:
271  self.play_sound(self.success_signal, 0.1)
272  res.result = speech
273  return res
274 
275  # timeout
276  rospy.logerr("Timed out")
277  if not req.quiet:
278  self.play_sound(self.timeout_signal, 0.1)
279  return res
280 
281  def process_result(self, data):
282  results = {}
283  for shypo in data.xpath("//SHYPO"):
284  transcript = []
285  confidence = 0.0
286  for whypo in shypo.xpath("./WHYPO"):
287  if sys.version_info.major < 3:
288  word = whypo.attrib["WORD"].encode(self.encoding)
289  else:
290  word = whypo.attrib["WORD"]
291  if word.startswith("<"):
292  continue
293  transcript.append(word)
294  confidence += float(whypo.attrib["CM"])
295  confidence /= len(transcript)
296  transcript = " ".join(transcript)
297  results[confidence] = transcript
298 
299  msg = SpeechRecognitionCandidates()
300  debug_str = ["Recognized:"]
301  for i, result in enumerate(sorted(results.items(), reverse=True)):
302  c, t = result
303  debug_str += ["%d: %s (%.2f)" % (i+1, t, c)]
304  msg.transcript.append(t)
305  msg.confidence.append(c)
306  self.pub_speech_recognition.publish(msg)
307  self.last_speech = msg
308  rospy.logdebug(os.linesep.join(debug_str))
309 
310  def shutdown_cb(self):
311  self.module.join()
312  self.audio.join()
313 
314  def julius_cb(self, data):
315  status, detail = data
316  rospy.logdebug("status: %s" % status)
317  rospy.logdebug("detail: %s" % lxml.etree.tostring(detail))
318  if status == 'ENGINEINFO':
319  version = detail.attrib["VERSION"]
320  conf = detail.attrib["CONF"]
321  rospy.loginfo("Version: %s (%s)" % (version, conf))
322  elif status == 'SYSINFO':
323  rospy.loginfo("Status: %s" % detail.attrib["PROCESS"])
324  elif status == 'GRAMINFO':
325  rospy.loginfo("Grammar Information:\n%s" % detail.text.strip())
326  elif status == 'STARTPROC':
327  rospy.loginfo("Julius Engine initialized")
328  elif status == 'ENDPROC':
329  rospy.loginfo("Julius Engine stopped")
330  elif status == 'STARTRECOG':
331  rospy.logdebug("Start Recognize")
332  elif status == 'ENDRECOG':
333  rospy.logdebug("End Recognize")
334  elif status == 'RECOGFAIL':
335  rospy.logerr("Recognition Failed")
336  elif status == 'RECOGOUT':
337  self.process_result(detail)
338  elif status == 'INPUT':
339  substat = detail.attrib["STATUS"]
340  if substat == 'STARTREC':
341  rospy.logdebug("Detect speak start")
342  elif substat == 'ENDREC':
343  rospy.logdebug("Detect speak end")
344  elif status == 'INPUTPARAM':
345  input_frame = int(detail.attrib["FRAMES"])
346  if input_frame >= 2000:
347  rospy.logwarn("Audio segment is too long!! Please volume down your microphone.")
348  elif status == 'GRAMMAR':
349  substat = detail.attrib["STATUS"]
350  if substat == 'RECEIVED':
351  self.grammar_changed = True
352  elif substat == "ERROR":
353  reason = detail.attrib["REASON"]
354  rospy.logerr("Failed to change grammar: %s" % reason)
355  self.grammar_changed = False
356  else:
357  rospy.logwarn("Received %s" % status)
358  rospy.logwarn("%s", lxml.etree.tostring(detail))
359 
360 if __name__ == '__main__':
361  rospy.init_node("julius_client")
362  client = JuliusClient()
363  rospy.spin()
def change_gram(self, name, dfa, dic)
def make_phonemes_from_words(words)
Definition: utils.py:75
def deactivate_gram(self, name)
def speech_recognition_cb(self, req)
def do_gram(self, cmd_name, name, dfa, dic)
def add_gram(self, name, dfa, dic)
def make_dfa(grammar, voca)
Definition: utils.py:141
def process_result(self, data)
def make_grammar_from_rules(rules)
Definition: utils.py:99
def send_grammar_cmd(self, cmd)
def vocabulary_cb(self, msg)
def status(self, args=None)
def activate_gram(self, name)
def play_sound(self, path, timeout=5.0)
def make_voca_from_categories(cats, vocas)
Definition: utils.py:118


julius_ros
Author(s): Yuki Furuta
autogenerated on Wed Sep 2 2020 03:53:29