mimic_tts.py
Go to the documentation of this file.
1 # Copyright 2017 Mycroft AI Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 import os
16 import stat
17 import subprocess
18 from threading import Thread
19 from time import time, sleep
20 
21 import os.path
22 from os.path import exists, join, expanduser
23 
24 from mycroft import MYCROFT_ROOT_PATH
25 from mycroft.api import DeviceApi
26 from mycroft.configuration import Configuration
27 from mycroft.tts import TTS, TTSValidator
28 from mycroft.util.download import download
29 from mycroft.util.log import LOG
30 
31 config = Configuration.get().get("tts").get("mimic")
32 data_dir = expanduser(Configuration.get()['data_dir'])
33 
34 BIN = config.get("path",
35  os.path.join(MYCROFT_ROOT_PATH, 'mimic', 'bin', 'mimic'))
36 
37 if not os.path.isfile(BIN):
38  # Search for mimic on the path
39  import distutils.spawn
40 
41  BIN = distutils.spawn.find_executable("mimic")
42 
43 SUBSCRIBER_VOICES = {'trinity': join(data_dir, 'voices/mimic_tn')}
44 
45 
46 def download_subscriber_voices(selected_voice):
47  """
48  Function to download all premium voices, starting with
49  the currently selected if applicable
50  """
51 
52  def make_executable(dest):
53  """ Call back function to make the downloaded file executable. """
54  LOG.info('Make executable')
55  # make executable
56  st = os.stat(dest)
57  os.chmod(dest, st.st_mode | stat.S_IEXEC)
58 
59  # First download the selected voice if needed
60  voice_file = SUBSCRIBER_VOICES.get(selected_voice)
61  if voice_file is not None and not exists(voice_file):
62  LOG.info('voice doesn\'t exist, downloading')
63  url = DeviceApi().get_subscriber_voice_url(selected_voice)
64  # Check we got an url
65  if url:
66  dl = download(url, voice_file, make_executable)
67  # Wait for completion
68  while not dl.done:
69  sleep(1)
70  else:
71  LOG.debug('{} is not available for this architecture'
72  .format(selected_voice))
73 
74  # Download the rest of the subsciber voices as needed
75  for voice in SUBSCRIBER_VOICES:
76  voice_file = SUBSCRIBER_VOICES[voice]
77  if not exists(voice_file):
78  url = DeviceApi().get_subscriber_voice_url(voice)
79  # Check we got an url
80  if url:
81  dl = download(url, voice_file, make_executable)
82  # Wait for completion
83  while not dl.done:
84  sleep(1)
85  else:
86  LOG.debug('{} is not available for this architecture'
87  .format(voice))
88 
89 
90 class Mimic(TTS):
91  def __init__(self, lang, config):
92  super(Mimic, self).__init__(
93  lang, config, MimicValidator(self), 'wav',
94  ssml_tags=["speak", "ssml", "phoneme", "voice", "audio", "prosody"]
95  )
96  self.dl = None
97  self.clear_cache()
98 
99  # Download subscriber voices if needed
100  self.is_subscriber = DeviceApi().is_subscriber
101  if self.is_subscriber:
102  t = Thread(target=download_subscriber_voices, args=[self.voice])
103  t.daemon = True
104  t.start()
105 
106  def modify_tag(self, tag):
107  for key, value in [
108  ('x-slow', '0.4'),
109  ('slow', '0.7'),
110  ('medium', '1.0'),
111  ('high', '1.3'),
112  ('x-high', '1.6'),
113  ('speed', 'rate')
114  ]:
115  tag = tag.replace(key, value)
116  return tag
117 
118  @property
119  def args(self):
120  """ Build mimic arguments. """
121  if (self.voice in SUBSCRIBER_VOICES and
122  exists(SUBSCRIBER_VOICES[self.voice]) and self.is_subscriber):
123  # Use subscriber voice
124  mimic_bin = SUBSCRIBER_VOICES[self.voice]
125  voice = self.voice
126  elif self.voice in SUBSCRIBER_VOICES:
127  # Premium voice but bin doesn't exist, use ap while downloading
128  mimic_bin = BIN
129  voice = 'ap'
130  else:
131  # Normal case use normal binary and selected voice
132  mimic_bin = BIN
133  voice = self.voice
134 
135  args = [mimic_bin, '-voice', voice, '-psdur', '-ssml']
136 
137  stretch = config.get('duration_stretch', None)
138  if stretch:
139  args += ['--setf', 'duration_stretch=' + stretch]
140  return args
141 
142  def get_tts(self, sentence, wav_file):
143  # Generate WAV and phonemes
144  phonemes = subprocess.check_output(self.args + ['-o', wav_file,
145  '-t', sentence])
146  return wav_file, phonemes.decode()
147 
148  def viseme(self, output):
149  visemes = []
150  start = time()
151  pairs = str(output).split(" ")
152  for pair in pairs:
153  pho_dur = pair.split(":") # phoneme:duration
154  if len(pho_dur) == 2:
155  visemes.append((VISIMES.get(pho_dur[0], '4'),
156  float(pho_dur[1])))
157  return visemes
158 
159 
161  def __init__(self, tts):
162  super(MimicValidator, self).__init__(tts)
163 
164  def validate_lang(self):
165  # TODO: Verify version of mimic can handle the requested language
166  pass
167 
169  try:
170  subprocess.call([BIN, '--version'])
171  except Exception:
172  LOG.info("Failed to find mimic at: " + BIN)
173  raise Exception(
174  'Mimic was not found. Run install-mimic.sh to install it.')
175 
176  def get_tts_class(self):
177  return Mimic
178 
179 
180 # Mapping based on Jeffers phoneme to viseme map, seen in table 1 from:
181 # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.221.6377&rep=rep1&type=pdf
182 #
183 # Mycroft unit visemes based on images found at:
184 # http://www.web3.lu/wp-content/uploads/2014/09/visemes.jpg
185 #
186 # Mapping was created partially based on the "12 mouth shapes visuals seen at:
187 # https://wolfpaulus.com/journal/software/lipsynchronization/
188 
189 VISIMES = {
190  # /A group
191  'v': '5',
192  'f': '5',
193  # /B group
194  'uh': '2',
195  'w': '2',
196  'uw': '2',
197  'er': '2',
198  'r': '2',
199  'ow': '2',
200  # /C group
201  'b': '4',
202  'p': '4',
203  'm': '4',
204  # /D group
205  'aw': '1',
206  # /E group
207  'th': '3',
208  'dh': '3',
209  # /F group
210  'zh': '3',
211  'ch': '3',
212  'sh': '3',
213  'jh': '3',
214  # /G group
215  'oy': '6',
216  'ao': '6',
217  # /Hgroup
218  'z': '3',
219  's': '3',
220  # /I group
221  'ae': '0',
222  'eh': '0',
223  'ey': '0',
224  'ah': '0',
225  'ih': '0',
226  'y': '0',
227  'iy': '0',
228  'aa': '0',
229  'ay': '0',
230  'ax': '0',
231  'hh': '0',
232  # /J group
233  'n': '3',
234  't': '3',
235  'd': '3',
236  'l': '3',
237  # /K group
238  'g': '3',
239  'ng': '3',
240  'k': '3',
241  # blank mouth
242  'pau': '4',
243 }
def download_subscriber_voices(selected_voice)
Definition: mimic_tts.py:46
def download(url, dest, complete_action=None, header=None)
Definition: download.py:124
def __init__(self, lang, config)
Definition: mimic_tts.py:91
def viseme(self, output)
Definition: mimic_tts.py:148
def get(phrase, lang=None, context=None)
def modify_tag(self, tag)
Definition: mimic_tts.py:106
def get_tts(self, sentence, wav_file)
Definition: mimic_tts.py:142


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40