respeaker_ros: respeaker_node.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 # Author: furushchev <furushchev@jsk.imi.i.u-tokyo.ac.jp>
00004 
00005 import angles
00006 from contextlib import contextmanager
00007 import usb.core
00008 import usb.util
00009 import pyaudio
00010 import math
00011 import numpy as np
00012 import tf.transformations as T
00013 import os
00014 import rospy
00015 import struct
00016 import sys
00017 import time
00018 from audio_common_msgs.msg import AudioData
00019 from geometry_msgs.msg import PoseStamped
00020 from std_msgs.msg import Bool, Int32, ColorRGBA
00021 from dynamic_reconfigure.server import Server
00022 try:
00023     from pixel_ring import usb_pixel_ring_v2
00024 except IOError as e:
00025     print e
00026     raise RuntimeError("Check the device is connected and recognized")
00027 
00028 try:
00029     from respeaker_ros.cfg import RespeakerConfig
00030 except Exception as e:
00031     print e
00032     raise RuntimeError("Need to run respeaker_gencfg.py first")
00033 
00034 
00035 # suppress error messages from ALSA
00036 # https://stackoverflow.com/questions/7088672/pyaudio-working-but-spits-out-error-messages-each-time
00037 # https://stackoverflow.com/questions/36956083/how-can-the-terminal-output-of-executables-run-by-python-functions-be-silenced-i
00038 @contextmanager
00039 def ignore_stderr(enable=True):
00040     if enable:
00041         devnull = None
00042         try:
00043             devnull = os.open(os.devnull, os.O_WRONLY)
00044             stderr = os.dup(2)
00045             sys.stderr.flush()
00046             os.dup2(devnull, 2)
00047             try:
00048                 yield
00049             finally:
00050                 os.dup2(stderr, 2)
00051                 os.close(stderr)
00052         finally:
00053             if devnull is not None:
00054                 os.close(devnull)
00055     else:
00056         yield
00057 
00058 
00059 # Partly copied from https://github.com/respeaker/usb_4_mic_array
00060 # parameter list
00061 # name: (id, offset, type, max, min , r/w, info)
00062 PARAMETERS = {
00063     'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
00064     'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
00065     'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
00066     'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
00067     'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
00068     'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
00069     'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
00070     'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
00071     'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
00072     'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
00073     'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
00074     'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
00075     'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
00076     'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
00077     'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
00078     'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
00079     'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
00080     'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
00081     'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
00082     'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
00083     'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
00084     'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
00085     'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
00086     'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
00087     'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
00088     'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
00089     'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
00090     'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
00091     'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
00092     'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
00093     'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
00094     'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
00095     'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
00096     'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
00097     'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
00098     'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
00099     'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
00100     'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
00101     'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
00102     # 'KEYWORDDETECT': (20, 0, 'int', 1, 0, 'ro', 'Keyword detected. Current value so needs polling.'),
00103     'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
00104 }
00105 
00106 
00107 class RespeakerInterface(object):
00108     VENDOR_ID = 0x2886
00109     PRODUCT_ID = 0x0018
00110     TIMEOUT = 100000
00111 
00112     def __init__(self):
00113         self.dev = usb.core.find(idVendor=self.VENDOR_ID,
00114                                  idProduct=self.PRODUCT_ID)
00115         if not self.dev:
00116             raise RuntimeError("Failed to find Respeaker device")
00117         rospy.loginfo("Initializing Respeaker device")
00118         self.dev.reset()
00119         self.pixel_ring = usb_pixel_ring_v2.PixelRing(self.dev)
00120         self.set_led_think()
00121         time.sleep(5)  # it will take 5 seconds to re-recognize as audio device
00122         self.set_led_trace()
00123         rospy.loginfo("Respeaker device initialized (Version: %s)" % self.version)
00124 
00125     def __del__(self):
00126         try:
00127             self.close()
00128         except:
00129             pass
00130         finally:
00131             self.dev = None
00132 
00133     def write(self, name, value):
00134         try:
00135             data = PARAMETERS[name]
00136         except KeyError:
00137             return
00138 
00139         if data[5] == 'ro':
00140             raise ValueError('{} is read-only'.format(name))
00141 
00142         id = data[0]
00143 
00144         # 4 bytes offset, 4 bytes value, 4 bytes type
00145         if data[2] == 'int':
00146             payload = struct.pack(b'iii', data[1], int(value), 1)
00147         else:
00148             payload = struct.pack(b'ifi', data[1], float(value), 0)
00149 
00150         self.dev.ctrl_transfer(
00151             usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00152             0, 0, id, payload, self.TIMEOUT)
00153 
00154     def read(self, name):
00155         try:
00156             data = PARAMETERS[name]
00157         except KeyError:
00158             return
00159 
00160         id = data[0]
00161 
00162         cmd = 0x80 | data[1]
00163         if data[2] == 'int':
00164             cmd |= 0x40
00165 
00166         length = 8
00167 
00168         response = self.dev.ctrl_transfer(
00169             usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00170             0, cmd, id, length, self.TIMEOUT)
00171 
00172         response = struct.unpack(b'ii', response.tostring())
00173 
00174         if data[2] == 'int':
00175             result = response[0]
00176         else:
00177             result = response[0] * (2.**response[1])
00178 
00179         return result
00180 
00181     def set_led_think(self):
00182         self.pixel_ring.set_brightness(10)
00183         self.pixel_ring.think()
00184 
00185     def set_led_trace(self):
00186         self.pixel_ring.set_brightness(20)
00187         self.pixel_ring.trace()
00188 
00189     def set_led_color(self, r, g, b, a):
00190         self.pixel_ring.set_brightness(int(20 * a))
00191         self.pixel_ring.set_color(r=int(r*255), g=int(g*255), b=int(b*255))
00192 
00193     def set_vad_threshold(self, db):
00194         self.write('GAMMAVAD_SR', db)
00195 
00196     def is_voice(self):
00197         return self.read('VOICEACTIVITY')
00198 
00199     @property
00200     def direction(self):
00201         return self.read('DOAANGLE')
00202 
00203     @property
00204     def version(self):
00205         return self.dev.ctrl_transfer(
00206             usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00207             0, 0x80, 0, 1, self.TIMEOUT)[0]
00208 
00209     def close(self):
00210         """
00211         close the interface
00212         """
00213         usb.util.dispose_resources(self.dev)
00214 
00215 
00216 class RespeakerAudio(object):
00217     def __init__(self, on_audio, channel=0, suppress_error=True):
00218         self.on_audio = on_audio
00219         with ignore_stderr(enable=suppress_error):
00220             self.pyaudio = pyaudio.PyAudio()
00221         self.channels = None
00222         self.channel = channel
00223         self.device_index = None
00224         self.rate = 16000
00225         self.bitwidth = 2
00226         self.bitdepth = 16
00227 
00228         # find device
00229         count = self.pyaudio.get_device_count()
00230         rospy.logdebug("%d audio devices found" % count)
00231         for i in range(count):
00232             info = self.pyaudio.get_device_info_by_index(i)
00233             name = info["name"].encode("utf-8")
00234             chan = info["maxInputChannels"]
00235             rospy.logdebug(" - %d: %s" % (i, name))
00236             if name.lower().find("respeaker") >= 0:
00237                 self.channels = chan
00238                 self.device_index = i
00239                 rospy.loginfo("Found %d: %s (channels: %d)" % (i, name, chan))
00240                 break
00241         if self.device_index is None:
00242             rospy.logwarn("Failed to find respeaker device by name. Using default input")
00243             info = self.pyaudio.get_default_input_device_info()
00244             self.channels = info["maxInputChannels"]
00245             self.device_index = info["index"]
00246 
00247         if self.channels != 6:
00248             rospy.logwarn("%d channel is found for respeaker" % self.channels)
00249             rospy.logwarn("You may have to update firmware.")
00250         self.channel = min(self.channels - 1, max(0, self.channel))
00251 
00252         self.stream = self.pyaudio.open(
00253             input=True, start=False,
00254             format=pyaudio.paInt16,
00255             channels=self.channels,
00256             rate=self.rate,
00257             frames_per_buffer=1024,
00258             stream_callback=self.stream_callback,
00259             input_device_index=self.device_index,
00260         )
00261 
00262     def __del__(self):
00263         self.stop()
00264         try:
00265             self.stream.close()
00266         except:
00267             pass
00268         finally:
00269             self.stream = None
00270         try:
00271             self.pyaudio.terminate()
00272         except:
00273             pass
00274 
00275     def stream_callback(self, in_data, frame_count, time_info, status):
00276         # split channel
00277         data = np.fromstring(in_data, dtype=np.int16)
00278         chunk_per_channel = len(data) / self.channels
00279         data = np.reshape(data, (chunk_per_channel, self.channels))
00280         chan_data = data[:, self.channel]
00281         # invoke callback
00282         self.on_audio(chan_data.tostring())
00283         return None, pyaudio.paContinue
00284 
00285     def start(self):
00286         if self.stream.is_stopped():
00287             self.stream.start_stream()
00288 
00289     def stop(self):
00290         if self.stream.is_active():
00291             self.stream.stop_stream()
00292 
00293 
00294 class RespeakerNode(object):
00295     def __init__(self):
00296         rospy.on_shutdown(self.on_shutdown)
00297         self.update_rate = rospy.get_param("~update_rate", 10.0)
00298         self.sensor_frame_id = rospy.get_param("~sensor_frame_id", "respeaker_base")
00299         self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0)
00300         self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0)
00301         self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5)
00302         self.speech_continuation = rospy.get_param("~speech_continuation", 0.5)
00303         self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
00304         self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
00305         suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
00306         #
00307         self.respeaker = RespeakerInterface()
00308         self.speech_audio_buffer = str()
00309         self.is_speeching = False
00310         self.speech_stopped = rospy.Time(0)
00311         self.prev_is_voice = None
00312         self.prev_doa = None
00313         # advertise
00314         self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
00315         self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
00316         self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
00317         self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
00318         self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
00319         # init config
00320         self.config = None
00321         self.dyn_srv = Server(RespeakerConfig, self.on_config)
00322         # start
00323         self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
00324         self.speech_prefetch_bytes = int(
00325             self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
00326         self.speech_prefetch_buffer = str()
00327         self.respeaker_audio.start()
00328         self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
00329                                       self.on_timer)
00330         self.timer_led = None
00331         self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
00332 
00333     def on_shutdown(self):
00334         try:
00335             self.respeaker.close()
00336         except:
00337             pass
00338         finally:
00339             self.respeaker = None
00340         try:
00341             self.respeaker_audio.stop()
00342         except:
00343             pass
00344         finally:
00345             self.respeaker_audio = None
00346 
00347     def on_config(self, config, level):
00348         if self.config is None:
00349             # first get value from device and set them as ros parameters
00350             for name in config.keys():
00351                 config[name] = self.respeaker.read(name)
00352         else:
00353             # if there is different values, write them to device
00354             for name, value in config.items():
00355                 prev_val = self.config[name]
00356                 if prev_val != value:
00357                     self.respeaker.write(name, value)
00358         self.config = config
00359         return config
00360 
00361     def on_status_led(self, msg):
00362         self.respeaker.set_led_color(r=msg.r, g=msg.g, b=msg.b, a=msg.a)
00363         if self.timer_led and self.timer_led.is_alive():
00364             self.timer_led.shutdown()
00365         self.timer_led = rospy.Timer(rospy.Duration(3.0),
00366                                        lambda e: self.respeaker.set_led_trace(),
00367                                        oneshot=True)
00368 
00369     def on_audio(self, data):
00370         self.pub_audio.publish(AudioData(data=data))
00371         if self.is_speeching:
00372             if len(self.speech_audio_buffer) == 0:
00373                 self.speech_audio_buffer = self.speech_prefetch_buffer
00374             self.speech_audio_buffer += data
00375         else:
00376             self.speech_prefetch_buffer += data
00377             self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
00378 
00379     def on_timer(self, event):
00380         stamp = event.current_real or rospy.Time.now()
00381         is_voice = self.respeaker.is_voice()
00382         doa_rad = math.radians(self.respeaker.direction - 180.0)
00383         doa_rad = angles.shortest_angular_distance(
00384             doa_rad, math.radians(self.doa_yaw_offset))
00385         doa = math.degrees(doa_rad)
00386 
00387         # vad
00388         if is_voice != self.prev_is_voice:
00389             self.pub_vad.publish(Bool(data=is_voice))
00390             self.prev_is_voice = is_voice
00391 
00392         # doa
00393         if doa != self.prev_doa:
00394             self.pub_doa_raw.publish(Int32(data=doa))
00395             self.prev_doa = doa
00396 
00397             msg = PoseStamped()
00398             msg.header.frame_id = self.sensor_frame_id
00399             msg.header.stamp = stamp
00400             ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
00401             msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
00402             msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
00403             msg.pose.orientation.w = ori[0]
00404             msg.pose.orientation.x = ori[1]
00405             msg.pose.orientation.y = ori[2]
00406             msg.pose.orientation.z = ori[3]
00407             self.pub_doa.publish(msg)
00408 
00409         # speech audio
00410         if is_voice:
00411             self.speech_stopped = stamp
00412         if stamp - self.speech_stopped < rospy.Duration(self.speech_continuation):
00413             self.is_speeching = True
00414         elif self.is_speeching:
00415             buf = self.speech_audio_buffer
00416             self.speech_audio_buffer = str()
00417             self.is_speeching = False
00418             duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
00419             duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
00420             rospy.loginfo("Speech detected for %.3f seconds" % duration)
00421             if self.speech_min_duration <= duration < self.speech_max_duration:
00422 
00423                 self.pub_speech_audio.publish(AudioData(data=buf))
00424 
00425 
00426 if __name__ == '__main__':
00427     rospy.init_node("respeaker_node")
00428     n = RespeakerNode()
00429     rospy.spin()