scripts/mycroft/stt/__init__.py
Go to the documentation of this file.
1 # Copyright 2017 Mycroft AI Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 import re
16 import json
17 from abc import ABCMeta, abstractmethod
18 from requests import post, put, exceptions
19 from speech_recognition import Recognizer
20 from queue import Queue
21 from threading import Thread
22 
23 from mycroft.api import STTApi
24 from mycroft.configuration import Configuration
25 from mycroft.util.log import LOG
26 
27 
28 class STT:
29  __metaclass__ = ABCMeta
30 
31  def __init__(self):
32  config_core = Configuration.get()
33  self.lang = str(self.init_language(config_core))
34  config_stt = config_core.get("stt", {})
35  self.config = config_stt.get(config_stt.get("module"), {})
36  self.credential = self.config.get("credential", {})
37  self.recognizer = Recognizer()
38  self.can_stream = False
39 
40  @staticmethod
41  def init_language(config_core):
42  lang = config_core.get("lang", "en-US")
43  langs = lang.split("-")
44  if len(langs) == 2:
45  return langs[0].lower() + "-" + langs[1].upper()
46  return lang
47 
48  @abstractmethod
49  def execute(self, audio, language=None):
50  pass
51 
52  def stream_start(self):
53  pass
54 
55  def stream_data(self, data):
56  pass
57 
58  def stream_stop(self):
59  pass
60 
61 
62 class TokenSTT(STT):
63  __metaclass__ = ABCMeta
64 
65  def __init__(self):
66  super(TokenSTT, self).__init__()
67  self.token = str(self.credential.get("token"))
68 
69 
71  __metaclass__ = ABCMeta
72 
73  def __init__(self):
74  super(GoogleJsonSTT, self).__init__()
75  self.json_credentials = json.dumps(self.credential.get("json"))
76 
77 
78 class BasicSTT(STT):
79  __metaclass__ = ABCMeta
80 
81  def __init__(self):
82  super(BasicSTT, self).__init__()
83  self.username = str(self.credential.get("username"))
84  self.password = str(self.credential.get("password"))
85 
86 
87 class KeySTT(STT):
88  __metaclass__ = ABCMeta
89 
90  def __init__(self):
91  super(KeySTT, self).__init__()
92  self.id = str(self.credential.get("client_id"))
93  self.key = str(self.credential.get("client_key"))
94 
95 
97  def __init__(self):
98  super(GoogleSTT, self).__init__()
99 
100  def execute(self, audio, language=None):
101  self.lang = language or self.lang
102  return self.recognizer.recognize_google(audio, self.token, self.lang)
103 
104 
106  def __init__(self):
107  super(GoogleCloudSTT, self).__init__()
108  # override language with module specific language selection
109  self.lang = self.config.get('lang') or self.lang
110 
111  def execute(self, audio, language=None):
112  self.lang = language or self.lang
113  return self.recognizer.recognize_google_cloud(audio,
114  self.json_credentials,
115  self.lang)
116 
117 
119  def __init__(self):
120  super(WITSTT, self).__init__()
121 
122  def execute(self, audio, language=None):
123  LOG.warning("WITSTT language should be configured at wit.ai settings.")
124  return self.recognizer.recognize_wit(audio, self.token)
125 
126 
128  def __init__(self):
129  super(IBMSTT, self).__init__()
130 
131  def execute(self, audio, language=None):
132  self.lang = language or self.lang
133  return self.recognizer.recognize_ibm(audio, self.username,
134  self.password, self.lang)
135 
136 
138  def __init__(self):
139  super(MycroftSTT, self).__init__()
140  self.api = STTApi("stt")
141 
142  def execute(self, audio, language=None):
143  self.lang = language or self.lang
144  try:
145  return self.api.stt(audio.get_flac_data(convert_rate=16000),
146  self.lang, 1)[0]
147  except Exception:
148  return self.api.stt(audio.get_flac_data(), self.lang, 1)[0]
149 
150 
152  """Mycroft Hosted DeepSpeech"""
153  def __init__(self):
154  super(MycroftDeepSpeechSTT, self).__init__()
155  self.api = STTApi("deepspeech")
156 
157  def execute(self, audio, language=None):
158  language = language or self.lang
159  if not language.startswith("en"):
160  raise ValueError("Deepspeech is currently english only")
161  return self.api.stt(audio.get_wav_data(), self.lang, 1)
162 
163 
165  """
166  STT interface for the deepspeech-server:
167  https://github.com/MainRo/deepspeech-server
168  use this if you want to host DeepSpeech yourself
169  """
170  def __init__(self):
171  super(DeepSpeechServerSTT, self).__init__()
172 
173  def execute(self, audio, language=None):
174  language = language or self.lang
175  if not language.startswith("en"):
176  raise ValueError("Deepspeech is currently english only")
177  response = post(self.config.get("uri"), data=audio.get_wav_data())
178  return response.text
179 
180 
181 class StreamThread(Thread):
182  def __init__(self, url, queue):
183  super().__init__()
184  self.url = url
185  self.queue = queue
186  self.response = None
187 
188  def _get_data(self):
189  while True:
190  d = self.queue.get()
191  if d is None:
192  break
193  yield d
194  self.queue.task_done()
195 
196  def run(self):
197  self.response = post(self.url, data=self._get_data(), stream=True)
198 
199 
201  """
202  Streaming STT interface for the deepspeech-server:
203  https://github.com/JPEWdev/deep-dregs
204  use this if you want to host DeepSpeech yourself
205  """
206  def __init__(self):
207  super().__init__()
208  self.stream = None
209  self.can_stream = self.config.get('stream_uri') is not None
210 
211  def execute(self, audio, language=None):
212  if self.stream is None:
213  return super().execute(audio, language)
214  return self.stream_stop()
215 
216  def stream_stop(self):
217  if self.stream is not None:
218  self.queue.put(None)
219  self.stream.join()
220 
221  response = self.stream.response
222 
223  self.stream = None
224  self.queue = None
225  if response is None:
226  return None
227  return response.text
228  return None
229 
230  def stream_data(self, data):
231  self.queue.put(data)
232 
233  def stream_start(self, language=None):
234  self.stream_stop()
235  language = language or self.lang
236  if not language.startswith("en"):
237  raise ValueError("Deepspeech is currently english only")
238  self.queue = Queue()
239  self.stream = StreamThread(self.config.get("stream_uri"), self.queue)
240  self.stream.start()
241 
242 
243 class KaldiSTT(STT):
244  def __init__(self):
245  super(KaldiSTT, self).__init__()
246 
247  def execute(self, audio, language=None):
248  language = language or self.lang
249  response = post(self.config.get("uri"), data=audio.get_wav_data())
250  return self.get_response(response)
251 
252  def get_response(self, response):
253  try:
254  hypotheses = response.json()["hypotheses"]
255  return re.sub(r'\s*\[noise\]\s*', '', hypotheses[0]["utterance"])
256  except Exception:
257  return None
258 
259 
261  def __init__(self):
262  super(BingSTT, self).__init__()
263 
264  def execute(self, audio, language=None):
265  self.lang = language or self.lang
266  return self.recognizer.recognize_bing(audio, self.token,
267  self.lang)
268 
269 
271  def __init__(self):
272  super(HoundifySTT, self).__init__()
273 
274  def execute(self, audio, language=None):
275  self.lang = language or self.lang
276  return self.recognizer.recognize_houndify(audio, self.id, self.key)
277 
278 
280  def __init__(self):
281  super(GoVivaceSTT, self).__init__()
282  self.default_uri = "https://services.govivace.com:49149/telephony"
283 
284  if not self.lang.startswith("en") and not self.lang.startswith("es"):
285  LOG.error("GoVivace STT only supports english and spanish")
286  raise NotImplementedError
287 
288  def execute(self, audio, language=None):
289  url = self.config.get("uri", self.default_uri) + "?key=" + \
290  self.token + "&action=find&format=8K_PCM16&validation_string="
291  response = put(url,
292  data=audio.get_wav_data(convert_rate=8000))
293  return self.get_response(response)
294 
295  def get_response(self, response):
296  return response.json()["result"]["hypotheses"][0]["transcript"]
297 
298 
300  CLASSES = {
301  "mycroft": MycroftSTT,
302  "google": GoogleSTT,
303  "google_cloud": GoogleCloudSTT,
304  "wit": WITSTT,
305  "ibm": IBMSTT,
306  "kaldi": KaldiSTT,
307  "bing": BingSTT,
308  "govivace": GoVivaceSTT,
309  "houndify": HoundifySTT,
310  "deepspeech_server": DeepSpeechServerSTT,
311  "deepspeech_stream_server": DeepSpeechStreamServerSTT,
312  "mycroft_deepspeech": MycroftDeepSpeechSTT
313  }
314 
315  @staticmethod
316  def create():
317  config = Configuration.get().get("stt", {})
318  module = config.get("module", "mycroft")
319  clazz = STTFactory.CLASSES.get(module)
320  return clazz()
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def execute(self, audio, language=None)
def get(phrase, lang=None, context=None)
def init_language(config_core)
def execute(self, audio, language=None)


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40