respeaker_node.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: furushchev <furushchev@jsk.imi.i.u-tokyo.ac.jp>
4 
5 import angles
6 from contextlib import contextmanager
7 import usb.core
8 import usb.util
9 import pyaudio
10 import math
11 import numpy as np
12 import tf.transformations as T
13 import os
14 import rospy
15 import struct
16 import sys
17 import time
18 from audio_common_msgs.msg import AudioData
19 from geometry_msgs.msg import PoseStamped
20 from std_msgs.msg import Bool, Int32, ColorRGBA
21 from dynamic_reconfigure.server import Server
22 try:
23  from pixel_ring import usb_pixel_ring_v2
24 except IOError as e:
25  rospy.logerr(e)
26  raise RuntimeError("Check the device is connected and recognized")
27 
28 try:
29  from respeaker_ros.cfg import RespeakerConfig
30 except Exception as e:
31  rospy.logerr(e)
32  raise RuntimeError("Need to run respeaker_gencfg.py first")
33 
34 
35 # suppress error messages from ALSA
36 # https://stackoverflow.com/questions/7088672/pyaudio-working-but-spits-out-error-messages-each-time
37 # https://stackoverflow.com/questions/36956083/how-can-the-terminal-output-of-executables-run-by-python-functions-be-silenced-i
38 @contextmanager
39 def ignore_stderr(enable=True):
40  if enable:
41  devnull = None
42  try:
43  devnull = os.open(os.devnull, os.O_WRONLY)
44  stderr = os.dup(2)
45  sys.stderr.flush()
46  os.dup2(devnull, 2)
47  try:
48  yield
49  finally:
50  os.dup2(stderr, 2)
51  os.close(stderr)
52  finally:
53  if devnull is not None:
54  os.close(devnull)
55  else:
56  yield
57 
58 
59 # Partly copied from https://github.com/respeaker/usb_4_mic_array
60 # parameter list
61 # name: (id, offset, type, max, min , r/w, info)
62 PARAMETERS = {
63  'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
64  'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
65  'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
66  'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
67  'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
68  'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
69  'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
70  'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
71  'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
72  'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
73  'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
74  'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
75  'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
76  'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
77  'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
78  'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
79  'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
80  'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
81  'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
82  'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
83  'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
84  'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
85  'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
86  'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
87  'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
88  'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
89  'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
90  'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
91  'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
92  'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
93  'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
94  'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
95  'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
96  'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
97  'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
98  'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
99  'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
100  'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
101  'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
102  # 'KEYWORDDETECT': (20, 0, 'int', 1, 0, 'ro', 'Keyword detected. Current value so needs polling.'),
103  'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
104 }
105 
106 
107 class RespeakerInterface(object):
108  VENDOR_ID = 0x2886
109  PRODUCT_ID = 0x0018
110  TIMEOUT = 100000
111 
112  def __init__(self):
113  self.dev = usb.core.find(idVendor=self.VENDOR_ID,
114  idProduct=self.PRODUCT_ID)
115  if not self.dev:
116  raise RuntimeError("Failed to find Respeaker device")
117  rospy.loginfo("Initializing Respeaker device")
118  self.dev.reset()
119  self.pixel_ring = usb_pixel_ring_v2.PixelRing(self.dev)
120  self.set_led_think()
121  time.sleep(5) # it will take 5 seconds to re-recognize as audio device
122  self.set_led_trace()
123  rospy.loginfo("Respeaker device initialized (Version: %s)" % self.version)
124 
125  def __del__(self):
126  try:
127  self.close()
128  except:
129  pass
130  finally:
131  self.dev = None
132 
133  def write(self, name, value):
134  try:
135  data = PARAMETERS[name]
136  except KeyError:
137  return
138 
139  if data[5] == 'ro':
140  raise ValueError('{} is read-only'.format(name))
141 
142  id = data[0]
143 
144  # 4 bytes offset, 4 bytes value, 4 bytes type
145  if data[2] == 'int':
146  payload = struct.pack(b'iii', data[1], int(value), 1)
147  else:
148  payload = struct.pack(b'ifi', data[1], float(value), 0)
149 
150  self.dev.ctrl_transfer(
151  usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
152  0, 0, id, payload, self.TIMEOUT)
153 
154  def read(self, name):
155  try:
156  data = PARAMETERS[name]
157  except KeyError:
158  return
159 
160  id = data[0]
161 
162  cmd = 0x80 | data[1]
163  if data[2] == 'int':
164  cmd |= 0x40
165 
166  length = 8
167 
168  try:
169  response = self.dev.ctrl_transfer(
170  usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
171  0, cmd, id, length, self.TIMEOUT)
172  except usb.core.USBError as e:
173  rospy.logerr(e)
174  rospy.signal_shutdown('Shutdown this node because of USBError')
175 
176  response = struct.unpack(b'ii', response.tostring())
177 
178  if data[2] == 'int':
179  result = response[0]
180  else:
181  result = response[0] * (2.**response[1])
182 
183  return result
184 
185  def set_led_think(self):
186  self.pixel_ring.set_brightness(10)
187  self.pixel_ring.think()
188 
189  def set_led_trace(self):
190  self.pixel_ring.set_brightness(20)
191  self.pixel_ring.trace()
192 
193  def set_led_color(self, r, g, b, a):
194  self.pixel_ring.set_brightness(int(20 * a))
195  self.pixel_ring.set_color(r=int(r*255), g=int(g*255), b=int(b*255))
196 
197  def set_vad_threshold(self, db):
198  self.write('GAMMAVAD_SR', db)
199 
200  def is_voice(self):
201  return self.read('VOICEACTIVITY')
202 
203  @property
204  def direction(self):
205  return self.read('DOAANGLE')
206 
207  @property
208  def version(self):
209  return self.dev.ctrl_transfer(
210  usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
211  0, 0x80, 0, 1, self.TIMEOUT)[0]
212 
213  def close(self):
214  """
215  close the interface
216  """
217  usb.util.dispose_resources(self.dev)
218 
219 
220 class RespeakerAudio(object):
221  def __init__(self, on_audio, channel=0, suppress_error=True):
222  self.on_audio = on_audio
223  with ignore_stderr(enable=suppress_error):
224  self.pyaudio = pyaudio.PyAudio()
225  self.channels = None
226  self.channel = channel
227  self.device_index = None
228  self.rate = 16000
229  self.bitwidth = 2
230  self.bitdepth = 16
231 
232  # find device
233  count = self.pyaudio.get_device_count()
234  rospy.logdebug("%d audio devices found" % count)
235  for i in range(count):
236  info = self.pyaudio.get_device_info_by_index(i)
237  name = info["name"].encode("utf-8")
238  chan = info["maxInputChannels"]
239  rospy.logdebug(" - %d: %s" % (i, name))
240  if name.lower().find("respeaker") >= 0:
241  self.channels = chan
242  self.device_index = i
243  rospy.loginfo("Found %d: %s (channels: %d)" % (i, name, chan))
244  break
245  if self.device_index is None:
246  rospy.logwarn("Failed to find respeaker device by name. Using default input")
247  info = self.pyaudio.get_default_input_device_info()
248  self.channels = info["maxInputChannels"]
249  self.device_index = info["index"]
250 
251  if self.channels != 6:
252  rospy.logwarn("%d channel is found for respeaker" % self.channels)
253  rospy.logwarn("You may have to update firmware.")
254  self.channel = min(self.channels - 1, max(0, self.channel))
255 
256  self.stream = self.pyaudio.open(
257  input=True, start=False,
258  format=pyaudio.paInt16,
259  channels=self.channels,
260  rate=self.rate,
261  frames_per_buffer=1024,
262  stream_callback=self.stream_callback,
263  input_device_index=self.device_index,
264  )
265 
266  def __del__(self):
267  self.stop()
268  try:
269  self.stream.close()
270  except:
271  pass
272  finally:
273  self.stream = None
274  try:
275  self.pyaudio.terminate()
276  except:
277  pass
278 
279  def stream_callback(self, in_data, frame_count, time_info, status):
280  # split channel
281  data = np.fromstring(in_data, dtype=np.int16)
282  chunk_per_channel = len(data) / self.channels
283  data = np.reshape(data, (chunk_per_channel, self.channels))
284  chan_data = data[:, self.channel]
285  # invoke callback
286  self.on_audio(chan_data.tostring())
287  return None, pyaudio.paContinue
288 
289  def start(self):
290  if self.stream.is_stopped():
291  self.stream.start_stream()
292 
293  def stop(self):
294  if self.stream.is_active():
295  self.stream.stop_stream()
296 
297 
298 class RespeakerNode(object):
299  def __init__(self):
300  rospy.on_shutdown(self.on_shutdown)
301  self.update_rate = rospy.get_param("~update_rate", 10.0)
302  self.sensor_frame_id = rospy.get_param("~sensor_frame_id", "respeaker_base")
303  self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0)
304  self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0)
305  self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5)
306  self.speech_continuation = rospy.get_param("~speech_continuation", 0.5)
307  self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
308  self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
309  suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
310  #
312  self.speech_audio_buffer = str()
313  self.is_speeching = False
314  self.speech_stopped = rospy.Time(0)
315  self.prev_is_voice = None
316  self.prev_doa = None
317  # advertise
318  self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
319  self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
320  self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
321  self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
322  self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
323  # init config
324  self.config = None
325  self.dyn_srv = Server(RespeakerConfig, self.on_config)
326  # start
327  self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
329  self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
331  self.respeaker_audio.start()
332  self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
333  self.on_timer)
334  self.timer_led = None
335  self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
336 
337  def on_shutdown(self):
338  try:
339  self.respeaker.close()
340  except:
341  pass
342  finally:
343  self.respeaker = None
344  try:
345  self.respeaker_audio.stop()
346  except:
347  pass
348  finally:
349  self.respeaker_audio = None
350 
351  def on_config(self, config, level):
352  if self.config is None:
353  # first get value from device and set them as ros parameters
354  for name in config.keys():
355  config[name] = self.respeaker.read(name)
356  else:
357  # if there is different values, write them to device
358  for name, value in config.items():
359  prev_val = self.config[name]
360  if prev_val != value:
361  self.respeaker.write(name, value)
362  self.config = config
363  return config
364 
365  def on_status_led(self, msg):
366  self.respeaker.set_led_color(r=msg.r, g=msg.g, b=msg.b, a=msg.a)
367  if self.timer_led and self.timer_led.is_alive():
368  self.timer_led.shutdown()
369  self.timer_led = rospy.Timer(rospy.Duration(3.0),
370  lambda e: self.respeaker.set_led_trace(),
371  oneshot=True)
372 
373  def on_audio(self, data):
374  self.pub_audio.publish(AudioData(data=data))
375  if self.is_speeching:
376  if len(self.speech_audio_buffer) == 0:
378  self.speech_audio_buffer += data
379  else:
380  self.speech_prefetch_buffer += data
382 
383  def on_timer(self, event):
384  stamp = event.current_real or rospy.Time.now()
385  is_voice = self.respeaker.is_voice()
386  doa_rad = math.radians(self.respeaker.direction - 180.0)
388  doa_rad, math.radians(self.doa_yaw_offset))
389  doa = math.degrees(doa_rad)
390 
391  # vad
392  if is_voice != self.prev_is_voice:
393  self.pub_vad.publish(Bool(data=is_voice))
394  self.prev_is_voice = is_voice
395 
396  # doa
397  if doa != self.prev_doa:
398  self.pub_doa_raw.publish(Int32(data=doa))
399  self.prev_doa = doa
400 
401  msg = PoseStamped()
402  msg.header.frame_id = self.sensor_frame_id
403  msg.header.stamp = stamp
404  ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
405  msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
406  msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
407  msg.pose.orientation.w = ori[0]
408  msg.pose.orientation.x = ori[1]
409  msg.pose.orientation.y = ori[2]
410  msg.pose.orientation.z = ori[3]
411  self.pub_doa.publish(msg)
412 
413  # speech audio
414  if is_voice:
415  self.speech_stopped = stamp
416  if stamp - self.speech_stopped < rospy.Duration(self.speech_continuation):
417  self.is_speeching = True
418  elif self.is_speeching:
419  buf = self.speech_audio_buffer
420  self.speech_audio_buffer = str()
421  self.is_speeching = False
422  duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
423  duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
424  rospy.loginfo("Speech detected for %.3f seconds" % duration)
425  if self.speech_min_duration <= duration < self.speech_max_duration:
426 
427  self.pub_speech_audio.publish(AudioData(data=buf))
428 
429 
430 if __name__ == '__main__':
431  rospy.init_node("respeaker_node")
433  rospy.spin()
def on_config(self, config, level)
static double shortest_angular_distance(double from, double to)
def __init__(self, on_audio, channel=0, suppress_error=True)
def write(self, name, value)
def ignore_stderr(enable=True)
def stream_callback(self, in_data, frame_count, time_info, status)
def set_led_color(self, r, g, b, a)


respeaker_ros
Author(s): Yuki Furuta
autogenerated on Tue May 11 2021 02:55:46