00001
00002
00003
00004
00005 import angles
00006 from contextlib import contextmanager
00007 import usb.core
00008 import usb.util
00009 import pyaudio
00010 import math
00011 import numpy as np
00012 import tf.transformations as T
00013 import os
00014 import rospy
00015 import struct
00016 import sys
00017 import time
00018 from audio_common_msgs.msg import AudioData
00019 from geometry_msgs.msg import PoseStamped
00020 from std_msgs.msg import Bool, Int32, ColorRGBA
00021 from dynamic_reconfigure.server import Server
00022 try:
00023 from pixel_ring import usb_pixel_ring_v2
00024 except IOError as e:
00025 print e
00026 raise RuntimeError("Check the device is connected and recognized")
00027
00028 try:
00029 from respeaker_ros.cfg import RespeakerConfig
00030 except Exception as e:
00031 print e
00032 raise RuntimeError("Need to run respeaker_gencfg.py first")
00033
00034
00035
00036
00037
00038 @contextmanager
00039 def ignore_stderr(enable=True):
00040 if enable:
00041 devnull = None
00042 try:
00043 devnull = os.open(os.devnull, os.O_WRONLY)
00044 stderr = os.dup(2)
00045 sys.stderr.flush()
00046 os.dup2(devnull, 2)
00047 try:
00048 yield
00049 finally:
00050 os.dup2(stderr, 2)
00051 os.close(stderr)
00052 finally:
00053 if devnull is not None:
00054 os.close(devnull)
00055 else:
00056 yield
00057
00058
00059
00060
00061
00062 PARAMETERS = {
00063 'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
00064 'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
00065 'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
00066 'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
00067 'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
00068 'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
00069 'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
00070 'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
00071 'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
00072 'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
00073 'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
00074 'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
00075 'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
00076 'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
00077 'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
00078 'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
00079 'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
00080 'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
00081 'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
00082 'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
00083 'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
00084 'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
00085 'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
00086 'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
00087 'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
00088 'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
00089 'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
00090 'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
00091 'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
00092 'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
00093 'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
00094 'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
00095 'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
00096 'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
00097 'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
00098 'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
00099 'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
00100 'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
00101 'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
00102
00103 'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
00104 }
00105
00106
00107 class RespeakerInterface(object):
00108 VENDOR_ID = 0x2886
00109 PRODUCT_ID = 0x0018
00110 TIMEOUT = 100000
00111
00112 def __init__(self):
00113 self.dev = usb.core.find(idVendor=self.VENDOR_ID,
00114 idProduct=self.PRODUCT_ID)
00115 if not self.dev:
00116 raise RuntimeError("Failed to find Respeaker device")
00117 rospy.loginfo("Initializing Respeaker device")
00118 self.dev.reset()
00119 self.pixel_ring = usb_pixel_ring_v2.PixelRing(self.dev)
00120 self.set_led_think()
00121 time.sleep(5)
00122 self.set_led_trace()
00123 rospy.loginfo("Respeaker device initialized (Version: %s)" % self.version)
00124
00125 def __del__(self):
00126 try:
00127 self.close()
00128 except:
00129 pass
00130 finally:
00131 self.dev = None
00132
00133 def write(self, name, value):
00134 try:
00135 data = PARAMETERS[name]
00136 except KeyError:
00137 return
00138
00139 if data[5] == 'ro':
00140 raise ValueError('{} is read-only'.format(name))
00141
00142 id = data[0]
00143
00144
00145 if data[2] == 'int':
00146 payload = struct.pack(b'iii', data[1], int(value), 1)
00147 else:
00148 payload = struct.pack(b'ifi', data[1], float(value), 0)
00149
00150 self.dev.ctrl_transfer(
00151 usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00152 0, 0, id, payload, self.TIMEOUT)
00153
00154 def read(self, name):
00155 try:
00156 data = PARAMETERS[name]
00157 except KeyError:
00158 return
00159
00160 id = data[0]
00161
00162 cmd = 0x80 | data[1]
00163 if data[2] == 'int':
00164 cmd |= 0x40
00165
00166 length = 8
00167
00168 response = self.dev.ctrl_transfer(
00169 usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00170 0, cmd, id, length, self.TIMEOUT)
00171
00172 response = struct.unpack(b'ii', response.tostring())
00173
00174 if data[2] == 'int':
00175 result = response[0]
00176 else:
00177 result = response[0] * (2.**response[1])
00178
00179 return result
00180
00181 def set_led_think(self):
00182 self.pixel_ring.set_brightness(10)
00183 self.pixel_ring.think()
00184
00185 def set_led_trace(self):
00186 self.pixel_ring.set_brightness(20)
00187 self.pixel_ring.trace()
00188
00189 def set_led_color(self, r, g, b, a):
00190 self.pixel_ring.set_brightness(int(20 * a))
00191 self.pixel_ring.set_color(r=int(r*255), g=int(g*255), b=int(b*255))
00192
00193 def set_vad_threshold(self, db):
00194 self.write('GAMMAVAD_SR', db)
00195
00196 def is_voice(self):
00197 return self.read('VOICEACTIVITY')
00198
00199 @property
00200 def direction(self):
00201 return self.read('DOAANGLE')
00202
00203 @property
00204 def version(self):
00205 return self.dev.ctrl_transfer(
00206 usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
00207 0, 0x80, 0, 1, self.TIMEOUT)[0]
00208
00209 def close(self):
00210 """
00211 close the interface
00212 """
00213 usb.util.dispose_resources(self.dev)
00214
00215
00216 class RespeakerAudio(object):
00217 def __init__(self, on_audio, channel=0, suppress_error=True):
00218 self.on_audio = on_audio
00219 with ignore_stderr(enable=suppress_error):
00220 self.pyaudio = pyaudio.PyAudio()
00221 self.channels = None
00222 self.channel = channel
00223 self.device_index = None
00224 self.rate = 16000
00225 self.bitwidth = 2
00226 self.bitdepth = 16
00227
00228
00229 count = self.pyaudio.get_device_count()
00230 rospy.logdebug("%d audio devices found" % count)
00231 for i in range(count):
00232 info = self.pyaudio.get_device_info_by_index(i)
00233 name = info["name"].encode("utf-8")
00234 chan = info["maxInputChannels"]
00235 rospy.logdebug(" - %d: %s" % (i, name))
00236 if name.lower().find("respeaker") >= 0:
00237 self.channels = chan
00238 self.device_index = i
00239 rospy.loginfo("Found %d: %s (channels: %d)" % (i, name, chan))
00240 break
00241 if self.device_index is None:
00242 rospy.logwarn("Failed to find respeaker device by name. Using default input")
00243 info = self.pyaudio.get_default_input_device_info()
00244 self.channels = info["maxInputChannels"]
00245 self.device_index = info["index"]
00246
00247 if self.channels != 6:
00248 rospy.logwarn("%d channel is found for respeaker" % self.channels)
00249 rospy.logwarn("You may have to update firmware.")
00250 self.channel = min(self.channels - 1, max(0, self.channel))
00251
00252 self.stream = self.pyaudio.open(
00253 input=True, start=False,
00254 format=pyaudio.paInt16,
00255 channels=self.channels,
00256 rate=self.rate,
00257 frames_per_buffer=1024,
00258 stream_callback=self.stream_callback,
00259 input_device_index=self.device_index,
00260 )
00261
00262 def __del__(self):
00263 self.stop()
00264 try:
00265 self.stream.close()
00266 except:
00267 pass
00268 finally:
00269 self.stream = None
00270 try:
00271 self.pyaudio.terminate()
00272 except:
00273 pass
00274
00275 def stream_callback(self, in_data, frame_count, time_info, status):
00276
00277 data = np.fromstring(in_data, dtype=np.int16)
00278 chunk_per_channel = len(data) / self.channels
00279 data = np.reshape(data, (chunk_per_channel, self.channels))
00280 chan_data = data[:, self.channel]
00281
00282 self.on_audio(chan_data.tostring())
00283 return None, pyaudio.paContinue
00284
00285 def start(self):
00286 if self.stream.is_stopped():
00287 self.stream.start_stream()
00288
00289 def stop(self):
00290 if self.stream.is_active():
00291 self.stream.stop_stream()
00292
00293
00294 class RespeakerNode(object):
00295 def __init__(self):
00296 rospy.on_shutdown(self.on_shutdown)
00297 self.update_rate = rospy.get_param("~update_rate", 10.0)
00298 self.sensor_frame_id = rospy.get_param("~sensor_frame_id", "respeaker_base")
00299 self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0)
00300 self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0)
00301 self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5)
00302 self.speech_continuation = rospy.get_param("~speech_continuation", 0.5)
00303 self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
00304 self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
00305 suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
00306
00307 self.respeaker = RespeakerInterface()
00308 self.speech_audio_buffer = str()
00309 self.is_speeching = False
00310 self.speech_stopped = rospy.Time(0)
00311 self.prev_is_voice = None
00312 self.prev_doa = None
00313
00314 self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
00315 self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
00316 self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
00317 self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
00318 self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
00319
00320 self.config = None
00321 self.dyn_srv = Server(RespeakerConfig, self.on_config)
00322
00323 self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
00324 self.speech_prefetch_bytes = int(
00325 self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
00326 self.speech_prefetch_buffer = str()
00327 self.respeaker_audio.start()
00328 self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate),
00329 self.on_timer)
00330 self.timer_led = None
00331 self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led)
00332
00333 def on_shutdown(self):
00334 try:
00335 self.respeaker.close()
00336 except:
00337 pass
00338 finally:
00339 self.respeaker = None
00340 try:
00341 self.respeaker_audio.stop()
00342 except:
00343 pass
00344 finally:
00345 self.respeaker_audio = None
00346
00347 def on_config(self, config, level):
00348 if self.config is None:
00349
00350 for name in config.keys():
00351 config[name] = self.respeaker.read(name)
00352 else:
00353
00354 for name, value in config.items():
00355 prev_val = self.config[name]
00356 if prev_val != value:
00357 self.respeaker.write(name, value)
00358 self.config = config
00359 return config
00360
00361 def on_status_led(self, msg):
00362 self.respeaker.set_led_color(r=msg.r, g=msg.g, b=msg.b, a=msg.a)
00363 if self.timer_led and self.timer_led.is_alive():
00364 self.timer_led.shutdown()
00365 self.timer_led = rospy.Timer(rospy.Duration(3.0),
00366 lambda e: self.respeaker.set_led_trace(),
00367 oneshot=True)
00368
00369 def on_audio(self, data):
00370 self.pub_audio.publish(AudioData(data=data))
00371 if self.is_speeching:
00372 if len(self.speech_audio_buffer) == 0:
00373 self.speech_audio_buffer = self.speech_prefetch_buffer
00374 self.speech_audio_buffer += data
00375 else:
00376 self.speech_prefetch_buffer += data
00377 self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
00378
00379 def on_timer(self, event):
00380 stamp = event.current_real or rospy.Time.now()
00381 is_voice = self.respeaker.is_voice()
00382 doa_rad = math.radians(self.respeaker.direction - 180.0)
00383 doa_rad = angles.shortest_angular_distance(
00384 doa_rad, math.radians(self.doa_yaw_offset))
00385 doa = math.degrees(doa_rad)
00386
00387
00388 if is_voice != self.prev_is_voice:
00389 self.pub_vad.publish(Bool(data=is_voice))
00390 self.prev_is_voice = is_voice
00391
00392
00393 if doa != self.prev_doa:
00394 self.pub_doa_raw.publish(Int32(data=doa))
00395 self.prev_doa = doa
00396
00397 msg = PoseStamped()
00398 msg.header.frame_id = self.sensor_frame_id
00399 msg.header.stamp = stamp
00400 ori = T.quaternion_from_euler(math.radians(doa), 0, 0)
00401 msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad)
00402 msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad)
00403 msg.pose.orientation.w = ori[0]
00404 msg.pose.orientation.x = ori[1]
00405 msg.pose.orientation.y = ori[2]
00406 msg.pose.orientation.z = ori[3]
00407 self.pub_doa.publish(msg)
00408
00409
00410 if is_voice:
00411 self.speech_stopped = stamp
00412 if stamp - self.speech_stopped < rospy.Duration(self.speech_continuation):
00413 self.is_speeching = True
00414 elif self.is_speeching:
00415 buf = self.speech_audio_buffer
00416 self.speech_audio_buffer = str()
00417 self.is_speeching = False
00418 duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth
00419 duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth
00420 rospy.loginfo("Speech detected for %.3f seconds" % duration)
00421 if self.speech_min_duration <= duration < self.speech_max_duration:
00422
00423 self.pub_speech_audio.publish(AudioData(data=buf))
00424
00425
00426 if __name__ == '__main__':
00427 rospy.init_node("respeaker_node")
00428 n = RespeakerNode()
00429 rospy.spin()