utils.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Copyright: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 from cStringIO import StringIO
6 import os
7 import re
8 import rospkg
9 import rospy
10 import subprocess
11 import tempfile
12 from speech_recognition_msgs.msg import Grammar
13 from speech_recognition_msgs.msg import PhraseRule
14 from speech_recognition_msgs.msg import Vocabulary
15 
16 
17 _REGEX_HIRAGANA = re.compile(r'^(?:\xE3\x81[\x81-\xBF]|\xE3\x82[\x80-\x93])+$')
18 
19 
20 def is_hiragana(s):
21  return _REGEX_HIRAGANA.search(s) is not None
22 
23 
25  assert os.path.exists(path)
26  return subprocess.check_output(["nkf", "-w", path]).split(os.linesep)
27 
28 
29 def load_grammar(path, name):
30  assert os.path.isdir(path)
31  g = Grammar()
32  grammar_path = os.path.join(path, "%s.grammar" % name)
33  for l in readlines_with_utf8(grammar_path):
34  l = l.strip()
35  if l:
36  sym, defi = l.strip().split(':')
37  r = PhraseRule()
38  r.symbol = sym.strip()
39  r.definition = [d.strip() for d in defi.split()]
40  g.rules.append(r)
41  voca_path = os.path.join(path, "%s.voca" % name)
42  voca = None
43  for l in readlines_with_utf8(voca_path):
44  l = l.strip()
45  if l.startswith('%'):
46  g.categories.append(l[1:].strip())
47  if voca:
48  g.vocabularies.append(voca)
49  voca = Vocabulary()
50  elif l:
51  sp = l.strip().split()
52  voca.words.append(sp[0])
53  voca.phonemes.append(' '.join(sp[1:]))
54  if voca:
55  g.vocabularies.append(voca)
56  return g
57 
58 
60  cmd = ["rosrun", "julius", "yomi2voca.pl"]
61  stdin = os.linesep.join(["%s %s" % (w, w) for w in words]) + os.linesep
62  rospy.logdebug("Executing %s" % cmd)
63  p = subprocess.Popen(["rosrun", "julius", "yomi2voca.pl"],
64  stdin=subprocess.PIPE,
65  stdout=subprocess.PIPE,
66  stderr=subprocess.PIPE)
67  result, error = p.communicate(unicode(stdin, 'utf-8').encode('euc-jp'))
68  rospy.logdebug("STDOUT: %s" % result)
69  rospy.logdebug("STDERR: %s" % error)
70 
71  if error and "Error:" in error:
72  error = unicode(error, 'euc-jp').encode('utf-8')
73  rospy.logerr("Error: %s" % error)
74  return None
75 
76  result = unicode(result, 'euc-jp').encode('utf-8')
77  result = result.split(os.linesep)[:-1]
78  result = [r.split("\t")[1] for r in result]
79 
80  return result
81 
82 
84  ss = StringIO()
85  for r in rules:
86  ss.write("{symbol}: {definition}{linesep}".format(
87  symbol=r.symbol,
88  definition=" ".join(r.definition),
89  linesep=os.linesep))
90  return ss.getvalue()
91 
92 
93 def make_voca_from_categories(cats, vocas):
94  ss = StringIO()
95  for c, vs in zip(cats, vocas):
96  ss.write("% {category}{linesep}".format(
97  category=c,
98  linesep=os.linesep))
99  phonemes = vs.phonemes
100  if len(phonemes) == 0:
101  phonemes = make_phonemes_from_words(vs.words)
102  for w, p in zip(vs.words, phonemes):
103  ss.write("{word}\t{phoneme}{linesep}{linesep}".format(
104  word=w,
105  phoneme=p,
106  linesep=os.linesep))
107  return ss.getvalue()
108 
109 
110 def make_dfa(grammar, voca):
111  name = "data"
112  temp_dir = tempfile.mkdtemp(prefix="mkdfa")
113  rospy.logdebug("created temp dir: %s" % temp_dir)
114  with open(os.path.join(temp_dir, "{name}.grammar".format(name=name)), "w") as f:
115  f.write(grammar)
116  with open(os.path.join(temp_dir, "{name}.voca".format(name=name)), "w") as f:
117  f.write(voca)
118 
119  cmd = ["rosrun", "julius", "mkdfa.pl", name]
120  rospy.logdebug("Executing %s" % cmd)
121  p = subprocess.Popen(cmd,
122  stdin=subprocess.PIPE,
123  stdout=subprocess.PIPE,
124  stderr=subprocess.PIPE,
125  cwd=temp_dir)
126  result, error = p.communicate(temp_dir)
127  rospy.logdebug("STDOUT: %s" % result)
128  rospy.logdebug("STDERR: %s" % error)
129 
130  if "generated:" not in result:
131  rospy.logerr("Failed to compile grammar to DFA: %s" % error.strip())
132  return None
133 
134  with open(os.path.join(temp_dir, "{name}.dfa".format(name=name)), "r") as f:
135  dfa = f.read()
136  with open(os.path.join(temp_dir, "{name}.dict".format(name=name)), "r") as f:
137  dic = f.read()
138  return dfa, dic
139 
140 
141 if __name__ == '__main__':
142  result = make_phonemes_from_words(["うどん", "そば"])
143  assert result[0] == "u d o N"
144  assert result[1] == "s o b a"
def readlines_with_utf8(path)
Definition: utils.py:24
def make_phonemes_from_words(words)
Definition: utils.py:59
def load_grammar(path, name)
Definition: utils.py:29
def make_dfa(grammar, voca)
Definition: utils.py:110
def is_hiragana(s)
Definition: utils.py:20
def make_grammar_from_rules(rules)
Definition: utils.py:83
def make_voca_from_categories(cats, vocas)
Definition: utils.py:93


julius_ros
Author(s): Yuki Furuta
autogenerated on Wed Jul 10 2019 03:47:05