vosk_node.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Intergrated by Angelo Antikatzidis https://github.com/a-prototype/vosk_ros
5 # Source code based on https://github.com/alphacep/vosk-api/blob/master/python/example/test_microphone.py from VOSK's example code
6 
7 # Tuned for the python flavor of VOSK: vosk-0.3.31
8 # If you do not have vosk then please install it by running $ pip3 install vosk
9 # If you have a previous version of vosk installed then update it by running $ pip3 install vosk --upgrade
10 # Tested on ROS Noetic & Melodic. Please advise the "readme" for using it with ROS Melodic
11 
12 # This is a node that intergrates VOSK with ROS and supports a TTS engine to be used along with it
13 # When the TTS engine is speaking some words, the recognizer will stop listenning to the audio stream so it won't listen to it self :)
14 
15 # It publishes to the topic speech_recognition/vosk_result a custom "speech_recognition" message
16 # It publishes to the topic speech_recognition/final_result a simple string
17 # It publishes to the topic speech_recognition/partial_result a simple string
18 
19 
20 import os
21 import sys
22 import json
23 import queue
24 import vosk
25 import sounddevice as sd
26 from mmap import MAP_SHARED
27 
28 import rospy
29 import rospkg
30 from ros_vosk.msg import speech_recognition
31 from std_msgs.msg import String, Bool
32 
33 import vosk_ros_model_downloader as downloader
34 
35 class vosk_sr():
36  def __init__(self):
37  model_name = rospy.get_param('vosk/model', "vosk-model-small-en-us-0.15")
38 
39  rospack = rospkg.RosPack()
40  rospack.list()
41  package_path = rospack.get_path('ros_vosk')
42 
43  models_dir = os.path.join(package_path, 'models')
44  model_path = os.path.join(models_dir, model_name)
45 
46  if not os.path.exists(model_path):
47  print (f"model '{model_name}' not found in '{models_dir}'! Please use the GUI to download it or configure an available model...")
48  model_downloader = downloader.model_downloader()
49  model_downloader.execute()
50  model_name = model_downloader.model_to_download
51 
52  if not rospy.has_param('vosk/model'):
53  rospy.set_param('vosk/model', model_name)
54 
55  self.tts_status = False
56 
57  # ROS node initialization
58 
59  self.pub_vosk = rospy.Publisher('speech_recognition/vosk_result',speech_recognition, queue_size=10)
60  self.pub_final = rospy.Publisher('speech_recognition/final_result',String, queue_size=10)
61  self.pub_partial = rospy.Publisher('speech_recognition/partial_result',String, queue_size=10)
62 
63  self.rate = rospy.Rate(100)
64 
65  rospy.on_shutdown(self.cleanup)
66 
67  self.msg = speech_recognition()
68 
69  self.q = queue.Queue()
70 
71  self.input_dev_num = sd.query_hostapis()[0]['default_input_device']
72  if self.input_dev_num == -1:
73  rospy.logfatal('No input device found')
74  raise ValueError('No input device found, device number == -1')
75 
76  device_info = sd.query_devices(self.input_dev_num, 'input')
77  # soundfile expects an int, sounddevice provides a float:
78 
79  self.samplerate = int(device_info['default_samplerate'])
80  rospy.set_param('vosk/sample_rate', self.samplerate)
81 
82  self.model = vosk.Model(model_path)
83 
84  #TODO GPUInit automatically selects a CUDA device and allows multithreading.
85  # gpu = vosk.GpuInit() #TODO
86 
87 
88  def cleanup(self):
89  rospy.logwarn("Shutting down VOSK speech recognition node...")
90 
91  def stream_callback(self, indata, frames, time, status):
92  #"""This is called (from a separate thread) for each audio block."""
93  if status:
94  print(status, file=sys.stderr)
95  self.q.put(bytes(indata))
96 
97  def tts_get_status(self,msg):
98  self.tts_status = msg.data
99 
101  rospy.Subscriber('/tts/status', Bool, self.tts_get_status)
102 
103  def speech_recognize(self):
104  try:
105 
106  with sd.RawInputStream(samplerate=self.samplerate, blocksize=16000, device=self.input_dev_num, dtype='int16',
107  channels=1, callback=self.stream_callback):
108  rospy.logdebug('Started recording')
109 
110  rec = vosk.KaldiRecognizer(self.model, self.samplerate)
111  print("Vosk is ready to listen!")
112  isRecognized = False
113  isRecognized_partially = False
114 
115 
116  while not rospy.is_shutdown():
117  self.tts_status_listenner()
118 
119  if self.tts_status == True:
120  # If the text to speech is operating, clear the queue
121  with self.q.mutex:
122  self.q.queue.clear()
123  rec.Reset()
124 
125  elif self.tts_status == False:
126  data = self.q.get()
127  if rec.AcceptWaveform(data):
128 
129  # In case of final result
130  result = rec.FinalResult()
131 
132  diction = json.loads(result)
133  lentext = len(diction["text"])
134 
135  if lentext > 2:
136  result_text = diction["text"]
137  rospy.loginfo(result_text)
138  isRecognized = True
139  else:
140  isRecognized = False
141  # Resets current results so the recognition can continue from scratch
142  rec.Reset()
143  else:
144  # In case of partial result
145  result_partial = rec.PartialResult()
146  if (len(result_partial) > 20):
147 
148  isRecognized_partially = True
149  partial_dict = json.loads(result_partial)
150  partial = partial_dict["partial"]
151 
152  if (isRecognized is True):
153 
154  self.msg.isSpeech_recognized = True
155  self.msg.time_recognized = rospy.Time.now()
156  self.msg.final_result = result_text
157  self.msg.partial_result = "unk"
158  self.pub_vosk.publish(self.msg)
159  rospy.sleep(0.1)
160  self.pub_final.publish(result_text)
161  isRecognized = False
162 
163 
164  elif (isRecognized_partially is True):
165  if partial != "unk":
166  self.msg.isSpeech_recognized = False
167  self.msg.time_recognized = rospy.Time.now()
168  self.msg.final_result = "unk"
169  self.msg.partial_result = partial
170  self.pub_vosk.publish(self.msg)
171  rospy.sleep(0.1)
172  self.pub_partial.publish(partial)
173  partial = "unk"
174  isRecognized_partially = False
175 
176 
177 
178  except Exception as e:
179  exit(type(e).__name__ + ': ' + str(e))
180  except KeyboardInterrupt:
181  rospy.loginfo("Stopping the VOSK speech recognition node...")
182  rospy.sleep(1)
183  print("node terminated")
184 
185 if __name__ == '__main__':
186  try:
187  rospy.init_node('vosk', anonymous=False)
188  rec = vosk_sr()
189  rec.speech_recognize()
190  except (KeyboardInterrupt, rospy.ROSInterruptException) as e:
191  rospy.logfatal("Error occurred! Stopping the vosk speech recognition node...")
192  rospy.sleep(1)
193  print("node terminated")
vosk_node.vosk_sr.model
model
Definition: vosk_node.py:82
vosk_node.vosk_sr.pub_final
pub_final
Definition: vosk_node.py:60
vosk_node.vosk_sr.samplerate
samplerate
Definition: vosk_node.py:79
vosk_node.vosk_sr.pub_vosk
pub_vosk
Definition: vosk_node.py:59
vosk_node.vosk_sr.cleanup
def cleanup(self)
Definition: vosk_node.py:88
vosk_node.vosk_sr.stream_callback
def stream_callback(self, indata, frames, time, status)
Definition: vosk_node.py:91
vosk_node.vosk_sr.tts_get_status
def tts_get_status(self, msg)
Definition: vosk_node.py:97
vosk_node.vosk_sr.__init__
def __init__(self)
Definition: vosk_node.py:36
vosk_node.vosk_sr.input_dev_num
input_dev_num
Definition: vosk_node.py:71
vosk_node.vosk_sr
Definition: vosk_node.py:35
vosk_node.vosk_sr.rate
rate
Definition: vosk_node.py:63
vosk_node.vosk_sr.msg
msg
Definition: vosk_node.py:67
vosk_node.vosk_sr.tts_status_listenner
def tts_status_listenner(self)
Definition: vosk_node.py:100
vosk_node.vosk_sr.tts_status
tts_status
Definition: vosk_node.py:55
vosk_node.vosk_sr.pub_partial
pub_partial
Definition: vosk_node.py:61
vosk_node.vosk_sr.speech_recognize
def speech_recognize(self)
Definition: vosk_node.py:103
vosk_node.vosk_sr.q
q
Definition: vosk_node.py:69


vosk
Author(s): Angelo Antikatzidis. , Alpha Cephei Inc.
autogenerated on Fri May 5 2023 02:23:58