google_client.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # from google.cloud import speech
4 from google.cloud import speech_v1p1beta1 as speech
5 from google.cloud.speech_v1p1beta1 import enums
6 from google.cloud.speech_v1p1beta1 import types
7 from google.api_core.exceptions import InvalidArgument, OutOfRange
8 import pyaudio
9 import Queue
10 import rospy
11 import rospkg
12 import signal
13 import yaml
14 from std_msgs.msg import String
15 
16 
18  def __init__(self):
19  # Audio stream input setup
20  FORMAT = pyaudio.paInt16
21  CHANNELS = 1
22  RATE = 16000
23  self.CHUNK = 4096
24  self.audio = pyaudio.PyAudio()
25  self.stream = self.audio.open(format=FORMAT, channels=CHANNELS,
26  rate=RATE, input=True,
27  frames_per_buffer=self.CHUNK,
28  stream_callback=self.get_data)
29  self._buff = Queue.Queue() # Buffer to hold audio data
30  self.closed = False
31 
32  # ROS Text Publisher
33  self.text_pub = rospy.Publisher('/google_client/text', String, queue_size=10)
34 
35  # Context clues in yaml file
36  rospack = rospkg.RosPack()
37  yamlFileDir = rospack.get_path('dialogflow_ros') + '/config/context.yaml'
38  with open(yamlFileDir, 'r') as f:
39  self.context = yaml.load(f)
40 
41  def get_data(self, in_data, frame_count, time_info, status):
42  """PyAudio callback to continuously get audio data from the server and put it in a buffer.
43  """
44  self._buff.put(in_data)
45  return None, pyaudio.paContinue
46 
47  def generator(self):
48  """Generator function that continuously yields audio chunks from the buffer.
49  Used to stream data to the Google Speech API Asynchronously.
50  """
51  while not self.closed:
52  # Check first chunk of data
53  chunk = self._buff.get()
54  if chunk is None:
55  return
56  data = [chunk]
57 
58  # Read in a stream till the end using a non-blocking get()
59  while True:
60  try:
61  chunk = self._buff.get(block=False)
62  if chunk is None:
63  return
64  data.append(chunk)
65  except Queue.Empty:
66  break
67 
68  yield b''.join(data)
69 
70  def _listen_print_loop(self, responses):
71  """Iterates through server responses and prints them.
72  The responses passed is a generator that will block until a response
73  is provided by the server.
74  Each response may contain multiple results, and each result may contain
75  multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we
76  print only the transcription for the top alternative of the top result.
77  """
78  try:
79  for response in responses:
80  # If not a valid response, move on to next potential one
81  if not response.results:
82  continue
83 
84  # The `results` list is consecutive. For streaming, we only care about
85  # the first result being considered, since once it's `is_final`, it
86  # moves on to considering the next utterance.
87  result = response.results[0]
88  if not result.alternatives:
89  continue
90 
91  # Display the transcription of the top alternative.
92  transcript = result.alternatives[0].transcript
93 
94  # Parse the final utterance
95  if result.is_final:
96  rospy.logdebug("Google Speech result: {}".format(transcript))
97  # Received data is Unicode, convert it to string
98  transcript = transcript.encode('utf-8')
99  # Strip the initial space if any
100  if transcript.startswith(' '):
101  transcript = transcript[1:]
102  # Exit if needed
103  if transcript.lower() == 'exit' or rospy.is_shutdown():
104  self.shutdown()
105  # Send the rest of the sentence to topic
106  self.text_pub.publish(result[1])
107 
108  except InvalidArgument as e:
109  rospy.logwarn("{} caught in Mic. Client".format(e))
110  self.gspeech_client()
111  except OutOfRange as e:
112  rospy.logwarn("{} caught in Mic. Client".format(e))
113  self.gspeech_client()
114 
115  def gspeech_client(self):
116  """Creates the Google Speech API client, configures it, and sends/gets
117  audio/text data for parsing.
118  """
119  language_code = 'en-US'
120  # Hints for the API
121  context = types.SpeechContext(phrases=self.context)
122  client = speech.SpeechClient()
123  # Create metadata object, helps processing
124  metadata = types.RecognitionMetadata()
125  # Interaction Type:
126  # VOICE_SEARCH: Transcribe spoken questions and queries into text.
127  # VOICE_COMMAND: Transcribe voice commands, such as for controlling a device.
128  metadata.interaction_type = (enums.RecognitionMetadata.InteractionType.VOICE_COMMAND)
129  # Microphone Distance:
130  # NEARFIELD: The audio was captured from a closely placed microphone.
131  # MIDFIELD: The speaker is within 3 meters of the microphone.
132  # FARFIELD: The speaker is more than 3 meters away from the microphone.
133  metadata.microphone_distance = (enums.RecognitionMetadata.MicrophoneDistance.MIDFIELD)
134  # Device Type:
135  # PC: Speech was recorded using a personal computer or tablet.
136  # VEHICLE: Speech was recorded in a vehicle.
137  # OTHER_OUTDOOR_DEVICE: Speech was recorded outdoors.
138  # OTHER_INDOOR_DEVICE: Speech was recorded indoors.
139  metadata.recording_device_type = (enums.RecognitionMetadata.RecordingDeviceType.PC)
140  # Media Type:
141  # AUDIO: The speech data is an audio recording.
142  # VIDEO: The speech data originally recorded on a video.
143  metadata.original_media_type = (enums.RecognitionMetadata.OriginalMediaType.AUDIO)
144  config = types.RecognitionConfig(
145  encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
146  sample_rate_hertz=16000,
147  language_code=language_code,
148  speech_contexts=[context],
149  use_enhanced=True,
150  model='command_and_search',
151  metadata=metadata)
152  streaming_config = types.StreamingRecognitionConfig(
153  config=config,
154  single_utterance=False,
155  interim_results=False)
156  # Hack from Google Speech Python docs, very pythonic c:
157  requests = (types.StreamingRecognizeRequest(audio_content=content) for content in self.generator())
158  responses = client.streaming_recognize(streaming_config, requests)
159  self._listen_print_loop(responses)
160 
161  def __del__(self):
162  """Shut down as cleanly as possible"""
163  rospy.loginfo("Google STT shutting down")
164  self.closed = True
165  self._buff.put(None)
166  self.stream.close()
167  self.audio.terminate()
168  exit()
169 
170  def start_client(self):
171  """Entry function to start the client"""
172  try:
173  rospy.loginfo("Google STT started")
174  self.gspeech_client()
175  except KeyboardInterrupt:
176  self.__del__()
177 
178 
179 def signal_handler(signal, frame):
180  rospy.signal_shutdown("Order 66 Received")
181  exit("Order 66 Received")
182 
183 
184 if __name__ == '__main__':
185  # rospy.init_node('frasier_mic_client', log_level=rospy.DEBUG)
186  rospy.init_node('google_client')
187  signal.signal(signal.SIGINT, signal_handler)
189  g.start_client()
def _listen_print_loop(self, responses)
def get_data(self, in_data, frame_count, time_info, status)
def signal_handler(signal, frame)


dialogflow_ros
Author(s): Anas Abou Allaban
autogenerated on Mon Jun 10 2019 13:02:59