scripts/mycroft/tts/__init__.py
Go to the documentation of this file.
1 # Copyright 2017 Mycroft AI Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 import hashlib
16 import os
17 import random
18 import re
19 import sys
20 from abc import ABCMeta, abstractmethod
21 from threading import Thread
22 from time import time, sleep
23 
24 import os.path
25 from os.path import dirname, exists, isdir, join
26 
27 import mycroft.util
28 from mycroft.enclosure.api import EnclosureAPI
29 from mycroft.configuration import Configuration
30 from mycroft.messagebus.message import Message
31 from mycroft.metrics import report_timing, Stopwatch
32 from mycroft.util import (
33  play_wav, play_mp3, check_for_signal, create_signal, resolve_resource_file
34 )
35 from mycroft.util.log import LOG
36 from queue import Queue, Empty
37 
38 
39 def send_playback_metric(stopwatch, ident):
40  """
41  Send playback metrics in a background thread
42  """
43 
44  def do_send(stopwatch, ident):
45  report_timing(ident, 'speech_playback', stopwatch)
46 
47  t = Thread(target=do_send, args=(stopwatch, ident))
48  t.daemon = True
49  t.start()
50 
51 
52 class PlaybackThread(Thread):
53  """
54  Thread class for playing back tts audio and sending
55  viseme data to enclosure.
56  """
57 
58  def __init__(self, queue):
59  super(PlaybackThread, self).__init__()
60  self.queue = queue
61  self._terminated = False
62  self._processing_queue = False
63 
64  def init(self, tts):
65  self.tts = tts
66 
67  def clear_queue(self):
68  """
69  Remove all pending playbacks.
70  """
71  while not self.queue.empty():
72  self.queue.get()
73  try:
74  self.p.terminate()
75  except Exception:
76  pass
77 
78  def run(self):
79  """
80  Thread main loop. get audio and viseme data from queue
81  and play.
82  """
83  while not self._terminated:
84  try:
85  snd_type, data, visemes, ident = self.queue.get(timeout=2)
86  self.blink(0.5)
87  if not self._processing_queue:
88  self._processing_queue = True
89  self.tts.begin_audio()
90 
91  stopwatch = Stopwatch()
92  with stopwatch:
93  if snd_type == 'wav':
94  self.p = play_wav(data)
95  elif snd_type == 'mp3':
96  self.p = play_mp3(data)
97 
98  if visemes:
99  self.show_visemes(visemes)
100  self.p.communicate()
101  self.p.wait()
102  send_playback_metric(stopwatch, ident)
103 
104  if self.queue.empty():
105  self.tts.end_audio()
106  self._processing_queue = False
107  self.blink(0.2)
108  except Empty:
109  pass
110  except Exception as e:
111  LOG.exception(e)
112  if self._processing_queue:
113  self.tts.end_audio()
114  self._processing_queue = False
115 
116  def show_visemes(self, pairs):
117  """
118  Send viseme data to enclosure
119 
120  Args:
121  pairs(list): Visime and timing pair
122 
123  Returns:
124  True if button has been pressed.
125  """
126  if self.enclosure:
127  self.enclosure.mouth_viseme(time(), pairs)
128 
129  def clear(self):
130  """ Clear all pending actions for the TTS playback thread. """
131  self.clear_queue()
132 
133  def blink(self, rate=1.0):
134  """ Blink mycroft's eyes """
135  if self.enclosure and random.random() < rate:
136  self.enclosure.eyes_blink("b")
137 
138  def stop(self):
139  """ Stop thread """
140  self._terminated = True
141  self.clear_queue()
142 
143 
144 class TTS:
145  """
146  TTS abstract class to be implemented by all TTS engines.
147 
148  It aggregates the minimum required parameters and exposes
149  ``execute(sentence)`` and ``validate_ssml(sentence)`` functions.
150 
151  Args:
152  lang (str):
153  config (dict): Configuration for this specific tts engine
154  validator (TTSValidator): Used to verify proper installation
155  phonetic_spelling (bool): Whether to spell certain words phonetically
156  ssml_tags (list): Supported ssml properties. Ex. ['speak', 'prosody']
157  """
158  __metaclass__ = ABCMeta
159 
160  def __init__(self, lang, config, validator, audio_ext='wav',
161  phonetic_spelling=True, ssml_tags=None):
162  super(TTS, self).__init__()
163  self.bus = None # initalized in "init" step
164  self.lang = lang or 'en-us'
165  self.config = config
166  self.validator = validator
167  self.phonetic_spelling = phonetic_spelling
168  self.audio_ext = audio_ext
169  self.ssml_tags = ssml_tags or []
170 
171  self.voice = config.get("voice")
172  self.filename = '/tmp/tts.wav'
173  self.enclosure = None
174  random.seed()
175  self.queue = Queue()
177  self.playback.start()
178  self.clear_cache()
179  self.spellings = self.load_spellings()
180  self.tts_name = type(self).__name__
181 
182  def load_spellings(self):
183  """Load phonetic spellings of words as dictionary"""
184  path = join('text', self.lang, 'phonetic_spellings.txt')
185  spellings_file = resolve_resource_file(path)
186  if not spellings_file:
187  return {}
188  try:
189  with open(spellings_file) as f:
190  lines = filter(bool, f.read().split('\n'))
191  lines = [i.split(':') for i in lines]
192  return {key.strip(): value.strip() for key, value in lines}
193  except ValueError:
194  LOG.exception('Failed to load phonetic spellings.')
195  return {}
196 
197  def begin_audio(self):
198  """Helper function for child classes to call in execute()"""
199  # Create signals informing start of speech
200  self.bus.emit(Message("recognizer_loop:audio_output_start"))
201 
202  def end_audio(self):
203  """
204  Helper function for child classes to call in execute().
205 
206  Sends the recognizer_loop:audio_output_end message, indicating
207  that speaking is done for the moment. It also checks if cache
208  directory needs cleaning to free up disk space.
209  """
210 
211  self.bus.emit(Message("recognizer_loop:audio_output_end"))
212  # Clean the cache as needed
213  cache_dir = mycroft.util.get_cache_directory("tts/" + self.tts_name)
214  mycroft.util.curate_cache(cache_dir, min_free_percent=100)
215 
216  # This check will clear the "signal"
217  check_for_signal("isSpeaking")
218 
219  def init(self, bus):
220  """ Performs intial setup of TTS object.
221 
222  Arguments:
223  bus: Mycroft messagebus connection
224  """
225  self.bus = bus
226  self.playback.init(self)
227  self.enclosure = EnclosureAPI(self.bus)
228  self.playback.enclosure = self.enclosure
229 
230  def get_tts(self, sentence, wav_file):
231  """
232  Abstract method that a tts implementation needs to implement.
233  Should get data from tts.
234 
235  Args:
236  sentence(str): Sentence to synthesize
237  wav_file(str): output file
238 
239  Returns:
240  tuple: (wav_file, phoneme)
241  """
242  pass
243 
244  def modify_tag(self, tag):
245  """Override to modify each supported ssml tag"""
246  return tag
247 
248  @staticmethod
249  def remove_ssml(text):
250  return re.sub('<[^>]*>', '', text).replace(' ', ' ')
251 
252  def validate_ssml(self, utterance):
253  """
254  Check if engine supports ssml, if not remove all tags
255  Remove unsupported / invalid tags
256 
257  Args:
258  utterance(str): Sentence to validate
259 
260  Returns: validated_sentence (str)
261  """
262  # if ssml is not supported by TTS engine remove all tags
263  if not self.ssml_tags:
264  return self.remove_ssml(utterance)
265 
266  # find ssml tags in string
267  tags = re.findall('<[^>]*>', utterance)
268 
269  for tag in tags:
270  if any(supported in tag for supported in self.ssml_tags):
271  utterance = utterance.replace(tag, self.modify_tag(tag))
272  else:
273  # remove unsupported tag
274  utterance = utterance.replace(tag, "")
275 
276  # return text with supported ssml tags only
277  return utterance.replace(" ", " ")
278 
279  def _preprocess_sentence(self, sentence):
280  """ Default preprocessing is no preprocessing.
281 
282  This method can be overridden to create chunks suitable to the
283  TTS engine in question.
284 
285  Arguments:
286  sentence (str): sentence to preprocess
287 
288  Returns:
289  list: list of sentence parts
290  """
291  return [sentence]
292 
293  def execute(self, sentence, ident=None):
294  """
295  Convert sentence to speech, preprocessing out unsupported ssml
296 
297  The method caches results if possible using the hash of the
298  sentence.
299 
300  Args:
301  sentence: Sentence to be spoken
302  ident: Id reference to current interaction
303  """
304  sentence = self.validate_ssml(sentence)
305 
306  create_signal("isSpeaking")
307  if self.phonetic_spelling:
308  for word in re.findall(r"[\w']+", sentence):
309  if word.lower() in self.spellings:
310  sentence = sentence.replace(word,
311  self.spellings[word.lower()])
312 
313  chunks = self._preprocess_sentence(sentence)
314  for sentence in chunks:
315  key = str(hashlib.md5(
316  sentence.encode('utf-8', 'ignore')).hexdigest())
317  wav_file = os.path.join(
319  key + '.' + self.audio_ext)
320 
321  if os.path.exists(wav_file):
322  LOG.debug("TTS cache hit")
323  phonemes = self.load_phonemes(key)
324  else:
325  wav_file, phonemes = self.get_tts(sentence, wav_file)
326  if phonemes:
327  self.save_phonemes(key, phonemes)
328 
329  vis = self.viseme(phonemes)
330  self.queue.put((self.audio_ext, wav_file, vis, ident))
331 
332  def viseme(self, phonemes):
333  """
334  Create visemes from phonemes. Needs to be implemented for all
335  tts backend
336 
337  Args:
338  phonemes(str): String with phoneme data
339  """
340  return None
341 
342  def clear_cache(self):
343  """ Remove all cached files. """
344  if not os.path.exists(mycroft.util.get_cache_directory('tts')):
345  return
346  for d in os.listdir(mycroft.util.get_cache_directory("tts")):
347  dir_path = os.path.join(mycroft.util.get_cache_directory("tts"), d)
348  if os.path.isdir(dir_path):
349  for f in os.listdir(dir_path):
350  file_path = os.path.join(dir_path, f)
351  if os.path.isfile(file_path):
352  os.unlink(file_path)
353  # If no sub-folders are present, check if it is a file & clear it
354  elif os.path.isfile(dir_path):
355  os.unlink(dir_path)
356 
357  def save_phonemes(self, key, phonemes):
358  """
359  Cache phonemes
360 
361  Args:
362  key: Hash key for the sentence
363  phonemes: phoneme string to save
364  """
365  cache_dir = mycroft.util.get_cache_directory("tts/" + self.tts_name)
366  pho_file = os.path.join(cache_dir, key + ".pho")
367  try:
368  with open(pho_file, "w") as cachefile:
369  cachefile.write(phonemes)
370  except Exception:
371  LOG.exception("Failed to write {} to cache".format(pho_file))
372  pass
373 
374  def load_phonemes(self, key):
375  """
376  Load phonemes from cache file.
377 
378  Args:
379  Key: Key identifying phoneme cache
380  """
381  pho_file = os.path.join(
383  key + ".pho")
384  if os.path.exists(pho_file):
385  try:
386  with open(pho_file, "r") as cachefile:
387  phonemes = cachefile.read().strip()
388  return phonemes
389  except Exception:
390  LOG.debug("Failed to read .PHO from cache")
391  return None
392 
393  def __del__(self):
394  self.playback.stop()
395  self.playback.join()
396 
397 
399  """
400  TTS Validator abstract class to be implemented by all TTS engines.
401 
402  It exposes and implements ``validate(tts)`` function as a template to
403  validate the TTS engines.
404  """
405  __metaclass__ = ABCMeta
406 
407  def __init__(self, tts):
408  self.tts = tts
409 
410  def validate(self):
411  self.validate_dependencies()
412  self.validate_instance()
413  self.validate_filename()
414  self.validate_lang()
415  self.validate_connection()
416 
418  pass
419 
420  def validate_instance(self):
421  clazz = self.get_tts_class()
422  if not isinstance(self.tts, clazz):
423  raise AttributeError('tts must be instance of ' + clazz.__name__)
424 
425  def validate_filename(self):
426  filename = self.tts.filename
427  if not (filename and filename.endswith('.wav')):
428  raise AttributeError('file: %s must be in .wav format!' % filename)
429 
430  dir_path = dirname(filename)
431  if not (exists(dir_path) and isdir(dir_path)):
432  raise AttributeError('filename: %s is not valid!' % filename)
433 
434  @abstractmethod
435  def validate_lang(self):
436  pass
437 
438  @abstractmethod
440  pass
441 
442  @abstractmethod
443  def get_tts_class(self):
444  pass
445 
446 
447 class TTSFactory:
448  from mycroft.tts.espeak_tts import ESpeak
449  from mycroft.tts.fa_tts import FATTS
450  from mycroft.tts.google_tts import GoogleTTS
451  from mycroft.tts.mary_tts import MaryTTS
452  from mycroft.tts.mimic_tts import Mimic
453  from mycroft.tts.spdsay_tts import SpdSay
454  from mycroft.tts.bing_tts import BingTTS
455  from mycroft.tts.ibm_tts import WatsonTTS
456  from mycroft.tts.responsive_voice_tts import ResponsiveVoice
457  from mycroft.tts.mimic2_tts import Mimic2
458 
459  CLASSES = {
460  "mimic": Mimic,
461  "mimic2": Mimic2,
462  "google": GoogleTTS,
463  "marytts": MaryTTS,
464  "fatts": FATTS,
465  "espeak": ESpeak,
466  "spdsay": SpdSay,
467  "watson": WatsonTTS,
468  "bing": BingTTS,
469  "responsive_voice": ResponsiveVoice
470  }
471 
472  @staticmethod
473  def create():
474  """
475  Factory method to create a TTS engine based on configuration.
476 
477  The configuration file ``mycroft.conf`` contains a ``tts`` section with
478  the name of a TTS module to be read by this method.
479 
480  "tts": {
481  "module": <engine_name>
482  }
483  """
484  config = Configuration.get()
485  lang = config.get("lang", "en-us")
486  tts_module = config.get('tts', {}).get('module', 'mimic')
487  tts_config = config.get('tts', {}).get(tts_module, {})
488  tts_lang = tts_config.get('lang', lang)
489  clazz = TTSFactory.CLASSES.get(tts_module)
490  tts = clazz(tts_lang, tts_config)
491  tts.validator.validate()
492  return tts
def report_timing(ident, system, timing, additional_data=None)
def execute(self, sentence, ident=None)
def resolve_resource_file(res_name)
def check_for_signal(signal_name, sec_lifetime=0)
Definition: signal.py:105
def validate_ssml(self, utterance)
def viseme(self, phonemes)
def get_tts(self, sentence, wav_file)
def curate_cache(directory, min_free_percent=5.0, min_free_disk=50)
def create_signal(signal_name)
Definition: signal.py:90
def _preprocess_sentence(self, sentence)
def send_playback_metric(stopwatch, ident)
def save_phonemes(self, key, phonemes)
def __init__(self, lang, config, validator, audio_ext='wav', phonetic_spelling=True, ssml_tags=None)
def get_cache_directory(domain=None)
def get(phrase, lang=None, context=None)


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40