listener.py
Go to the documentation of this file.
1 # Copyright 2017 Mycroft AI Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 import time
16 from threading import Thread
17 import sys
18 import speech_recognition as sr
19 from pyee import EventEmitter
20 from requests import RequestException, HTTPError
21 from requests.exceptions import ConnectionError
22 
23 from mycroft import dialog
24 from mycroft.client.speech.hotword_factory import HotWordFactory
25 from mycroft.client.speech.mic import MutableMicrophone, ResponsiveRecognizer
26 from mycroft.configuration import Configuration
27 from mycroft.metrics import MetricsAggregator, Stopwatch, report_timing
28 from mycroft.session import SessionManager
29 from mycroft.stt import STTFactory
30 from mycroft.util import connected
31 from mycroft.util.log import LOG
32 from mycroft.util import find_input_device
33 from queue import Queue, Empty
34 
35 AUDIO_DATA = 0
36 STREAM_START = 1
37 STREAM_DATA = 2
38 STREAM_STOP = 3
39 
40 
41 class AudioStreamHandler(object):
42  def __init__(self, queue):
43  self.queue = queue
44 
45  def stream_start(self):
46  self.queue.put((STREAM_START, None))
47 
48  def stream_chunk(self, chunk):
49  self.queue.put((STREAM_DATA, chunk))
50 
51  def stream_stop(self):
52  self.queue.put((STREAM_STOP, None))
53 
54 
55 class AudioProducer(Thread):
56  """
57  AudioProducer
58  given a mic and a recognizer implementation, continuously listens to the
59  mic for potential speech chunks and pushes them onto the queue.
60  """
61 
62  def __init__(self, state, queue, mic, recognizer, emitter, stream_handler):
63  super(AudioProducer, self).__init__()
64  self.daemon = True
65  self.state = state
66  self.queue = queue
67  self.mic = mic
68  self.recognizer = recognizer
69  self.emitter = emitter
70  self.stream_handler = stream_handler
71 
72  def run(self):
73  with self.mic as source:
74  self.recognizer.adjust_for_ambient_noise(source)
75  while self.state.running:
76  try:
77  audio = self.recognizer.listen(source, self.emitter,
78  self.stream_handler)
79  self.queue.put((AUDIO_DATA, audio))
80  except IOError as e:
81  # NOTE: Audio stack on raspi is slightly different, throws
82  # IOError every other listen, almost like it can't handle
83  # buffering audio between listen loops.
84  # The internet was not helpful.
85  # http://stackoverflow.com/questions/10733903/pyaudio-input-overflowed
86  self.emitter.emit("recognizer_loop:ioerror", e)
87  finally:
88  if self.stream_handler is not None:
89  self.stream_handler.stream_stop()
90 
91  def stop(self):
92  """
93  Stop producer thread.
94  """
95  self.state.running = False
96  self.recognizer.stop()
97 
98 
99 class AudioConsumer(Thread):
100  """
101  AudioConsumer
102  Consumes AudioData chunks off the queue
103  """
104 
105  # In seconds, the minimum audio size to be sent to remote STT
106  MIN_AUDIO_SIZE = 0.5
107 
108  def __init__(self, state, queue, emitter, stt,
109  wakeup_recognizer, wakeword_recognizer):
110  super(AudioConsumer, self).__init__()
111  self.daemon = True
112  self.queue = queue
113  self.state = state
114  self.emitter = emitter
115  self.stt = stt
116  self.wakeup_recognizer = wakeup_recognizer
117  self.wakeword_recognizer = wakeword_recognizer
119 
120  def run(self):
121  while self.state.running:
122  self.read()
123 
124  def read(self):
125  try:
126  message = self.queue.get(timeout=0.5)
127  except Empty:
128  return
129 
130  if message is None:
131  return
132 
133  tag, data = message
134 
135  if tag == AUDIO_DATA:
136  if self.state.sleeping:
137  self.wake_up(data)
138  else:
139  self.process(data)
140  elif tag == STREAM_START:
141  self.stt.stream_start()
142  elif tag == STREAM_DATA:
143  self.stt.stream_data(data)
144  elif tag == STREAM_STOP:
145  self.stt.stream_stop()
146  else:
147  LOG.error("Unknown audio queue type %r" % message)
148 
149  # TODO: Localization
150  def wake_up(self, audio):
151  if self.wakeup_recognizer.found_wake_word(audio.frame_data):
152  SessionManager.touch()
153  self.state.sleeping = False
154  self.emitter.emit('recognizer_loop:awoken')
155  self.metrics.increment("mycroft.wakeup")
156 
157  @staticmethod
158  def _audio_length(audio):
159  return float(len(audio.frame_data)) / (
160  audio.sample_rate * audio.sample_width)
161 
162  # TODO: Localization
163  def process(self, audio):
164  SessionManager.touch()
165  payload = {
166  'utterance': self.wakeword_recognizer.key_phrase,
167  'session': SessionManager.get().session_id,
168  }
169  self.emitter.emit("recognizer_loop:wakeword", payload)
170 
171  if self._audio_length(audio) < self.MIN_AUDIO_SIZE:
172  LOG.warning("Audio too short to be processed")
173  else:
174  stopwatch = Stopwatch()
175  with stopwatch:
176  transcription = self.transcribe(audio)
177  if transcription:
178  ident = str(stopwatch.timestamp) + str(hash(transcription))
179  # STT succeeded, send the transcribed speech on for processing
180  payload = {
181  'utterances': [transcription],
182  'lang': self.stt.lang,
183  'session': SessionManager.get().session_id,
184  'ident': ident
185  }
186  self.emitter.emit("recognizer_loop:utterance", payload)
187  self.metrics.attr('utterances', [transcription])
188  else:
189  ident = str(stopwatch.timestamp)
190  # Report timing metrics
191  report_timing(ident, 'stt', stopwatch,
192  {'transcription': transcription,
193  'stt': self.stt.__class__.__name__})
194 
195  def transcribe(self, audio):
196  try:
197  # Invoke the STT engine on the audio clip
198  text = self.stt.execute(audio).lower().strip()
199  LOG.debug("STT: " + text)
200  return text
201  except sr.RequestError as e:
202  LOG.error("Could not request Speech Recognition {0}".format(e))
203  except ConnectionError as e:
204  LOG.error("Connection Error: {0}".format(e))
205 
206  self.emitter.emit("recognizer_loop:no_internet")
207  except HTTPError as e:
208  if e.response.status_code == 401:
209  LOG.warning("Access Denied at mycroft.ai")
210  return "pair my device" # phrase to start the pairing process
211  else:
212  LOG.error(e.__class__.__name__ + ': ' + str(e))
213  except RequestException as e:
214  LOG.error(e.__class__.__name__ + ': ' + str(e))
215  except Exception as e:
216  self.emitter.emit('recognizer_loop:speech.recognition.unknown')
217  if isinstance(e, IndexError):
218  LOG.info('no words were transcribed')
219  else:
220  LOG.error(e)
221  LOG.error("Speech Recognition could not understand audio")
222  return None
223  if connected():
224  dialog_name = 'backend.down'
225  else:
226  dialog_name = 'not connected to the internet'
227  self.emitter.emit('speak', {'utterance': dialog.get(dialog_name)})
228 
229  def __speak(self, utterance):
230  payload = {
231  'utterance': utterance,
232  'session': SessionManager.get().session_id
233  }
234  self.emitter.emit("speak", payload)
235 
236 
238  def __init__(self):
239  self.running = False
240  self.sleeping = False
241 
242 
243 class RecognizerLoop(EventEmitter):
244  """
245  EventEmitter loop running speech recognition. Local wake word
246  recognizer and remote general speech recognition.
247  """
248 
249  def __init__(self):
250  super(RecognizerLoop, self).__init__()
251  self.mute_calls = 0
252  self._load_config()
253 
254  def _load_config(self):
255  """
256  Load configuration parameters from configuration
257  """
258  config = Configuration.get()
259  self.config_core = config
260  self._config_hash = hash(str(config))
261  self.lang = config.get('lang')
262  self.config = config.get('listener')
263  rate = self.config.get('sample_rate')
264 
265  device_index = self.config.get('device_index')
266  device_name = self.config.get('device_name')
267  if not device_index and device_name:
268  device_index = find_input_device(device_name)
269 
270  LOG.debug('Using microphone (None = default): '+str(device_index))
271 
272  self.microphone = MutableMicrophone(device_index, rate,
273  mute=self.mute_calls > 0)
274 
276  # TODO - localization
279  self.wakeword_recognizer)
281 
283  # Create a local recognizer to hear the wakeup word, e.g. 'Hey Mycroft'
284  LOG.info("creating wake word engine")
285  word = self.config.get("wake_word", "hey mycroft")
286  # TODO remove this, only for server settings compatibility
287  phonemes = self.config.get("phonemes")
288  thresh = self.config.get("threshold")
289  config = self.config_core.get("hotwords", {word: {}})
290 
291  if word not in config:
292  config[word] = {'module': 'precise'}
293  if phonemes:
294  config[word]["phonemes"] = phonemes
295  if thresh:
296  config[word]["threshold"] = thresh
297  if phonemes is None or thresh is None:
298  config = None
299  return HotWordFactory.create_hotword(
300  word, config, self.lang, loop=self
301  )
302 
304  LOG.info("creating stand up word engine")
305  word = self.config.get("stand_up_word", "wake up")
306  return HotWordFactory.create_hotword(word, lang=self.lang, loop=self)
307 
308  def start_async(self):
309  """
310  Start consumer and producer threads
311  """
312  self.state.running = True
313  stt = STTFactory.create()
314  queue = Queue()
315  stream_handler = None
316  if stt.can_stream:
317  stream_handler = AudioStreamHandler(queue)
318  self.producer = AudioProducer(self.state, queue, self.microphone,
319  self.responsive_recognizer, self,
320  stream_handler)
321  self.producer.start()
322  self.consumer = AudioConsumer(self.state, queue, self,
323  stt, self.wakeup_recognizer,
324  self.wakeword_recognizer)
325  self.consumer.start()
326 
327  def stop(self):
328  self.state.running = False
329  self.producer.stop()
330  # wait for threads to shutdown
331  self.producer.join()
332  self.consumer.join()
333 
334  def mute(self):
335  """
336  Mute microphone and increase number of requests to mute
337  """
338  self.mute_calls += 1
339  if self.microphone:
340  self.microphone.mute()
341 
342  def unmute(self):
343  """
344  Unmute mic if as many unmute calls as mute calls have been
345  received.
346  """
347  if self.mute_calls > 0:
348  self.mute_calls -= 1
349 
350  if self.mute_calls <= 0 and self.microphone:
351  self.microphone.unmute()
352  self.mute_calls = 0
353 
354  def force_unmute(self):
355  """
356  Completely unmute mic regardless of the number of calls to mute
357  """
358  self.mute_calls = 0
359  self.unmute()
360 
361  def is_muted(self):
362  if self.microphone:
363  return self.microphone.is_muted()
364  else:
365  return True # consider 'no mic' muted
366 
367  def sleep(self):
368  self.state.sleeping = True
369 
370  def awaken(self):
371  self.state.sleeping = False
372 
373  def run(self):
374  self.start_async()
375  while self.state.running:
376  try:
377  time.sleep(1)
378  if self._config_hash != hash(
379  str(Configuration().get())):
380  LOG.debug('Config has changed, reloading...')
381  self.reload()
382  except KeyboardInterrupt as e:
383  LOG.error(e)
384  self.stop()
385  raise # Re-raise KeyboardInterrupt
386 
387  def reload(self):
388  """
389  Reload configuration and restart consumer and producer
390  """
391  self.stop()
392  self.wakeword_recognizer.stop()
393  # load config
394  self._load_config()
395  # restart
396  self.start_async()
def report_timing(ident, system, timing, additional_data=None)
def find_input_device(device_name)
def __init__(self, state, queue, mic, recognizer, emitter, stream_handler)
Definition: listener.py:62
def get(phrase, lang=None, context=None)
def __init__(self, state, queue, emitter, stt, wakeup_recognizer, wakeword_recognizer)
Definition: listener.py:109


mycroft_ros
Author(s):
autogenerated on Mon Apr 26 2021 02:35:40