4 from google.cloud
import speech_v1p1beta1
as speech
5 from google.cloud.speech_v1p1beta1
import enums
6 from google.cloud.speech_v1p1beta1
import types
7 from google.api_core.exceptions
import InvalidArgument, OutOfRange
14 from std_msgs.msg
import String
20 FORMAT = pyaudio.paInt16
25 self.
stream = self.audio.open(format=FORMAT, channels=CHANNELS,
26 rate=RATE, input=
True,
27 frames_per_buffer=self.
CHUNK,
33 self.
text_pub = rospy.Publisher(
'/google_client/text', String, queue_size=10)
36 rospack = rospkg.RosPack()
37 yamlFileDir = rospack.get_path(
'dialogflow_ros') +
'/config/context.yaml' 38 with open(yamlFileDir,
'r') as f: 41 def get_data(self, in_data, frame_count, time_info, status):
42 """PyAudio callback to continuously get audio data from the server and put it in a buffer. 44 self._buff.put(in_data)
45 return None, pyaudio.paContinue
48 """Generator function that continuously yields audio chunks from the buffer. 49 Used to stream data to the Google Speech API Asynchronously. 53 chunk = self._buff.get()
61 chunk = self._buff.get(block=
False)
71 """Iterates through server responses and prints them. 72 The responses passed is a generator that will block until a response 73 is provided by the server. 74 Each response may contain multiple results, and each result may contain 75 multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we 76 print only the transcription for the top alternative of the top result. 79 for response
in responses:
81 if not response.results:
87 result = response.results[0]
88 if not result.alternatives:
92 transcript = result.alternatives[0].transcript
96 rospy.logdebug(
"Google Speech result: {}".format(transcript))
98 transcript = transcript.encode(
'utf-8')
100 if transcript.startswith(
' '):
101 transcript = transcript[1:]
103 if transcript.lower() ==
'exit' or rospy.is_shutdown():
106 self.text_pub.publish(result[1])
108 except InvalidArgument
as e:
109 rospy.logwarn(
"{} caught in Mic. Client".format(e))
111 except OutOfRange
as e:
112 rospy.logwarn(
"{} caught in Mic. Client".format(e))
116 """Creates the Google Speech API client, configures it, and sends/gets 117 audio/text data for parsing. 119 language_code =
'en-US' 121 context = types.SpeechContext(phrases=self.
context)
122 client = speech.SpeechClient()
124 metadata = types.RecognitionMetadata()
128 metadata.interaction_type = (enums.RecognitionMetadata.InteractionType.VOICE_COMMAND)
133 metadata.microphone_distance = (enums.RecognitionMetadata.MicrophoneDistance.MIDFIELD)
139 metadata.recording_device_type = (enums.RecognitionMetadata.RecordingDeviceType.PC)
143 metadata.original_media_type = (enums.RecognitionMetadata.OriginalMediaType.AUDIO)
144 config = types.RecognitionConfig(
145 encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
146 sample_rate_hertz=16000,
147 language_code=language_code,
148 speech_contexts=[context],
150 model=
'command_and_search',
152 streaming_config = types.StreamingRecognitionConfig(
154 single_utterance=
False,
155 interim_results=
False)
157 requests = (types.StreamingRecognizeRequest(audio_content=content)
for content
in self.
generator())
158 responses = client.streaming_recognize(streaming_config, requests)
162 """Shut down as cleanly as possible""" 163 rospy.loginfo(
"Google STT shutting down")
167 self.audio.terminate()
171 """Entry function to start the client""" 173 rospy.loginfo(
"Google STT started")
175 except KeyboardInterrupt:
180 rospy.signal_shutdown(
"Order 66 Received")
181 exit(
"Order 66 Received")
184 if __name__ ==
'__main__':
186 rospy.init_node(
'google_client')
187 signal.signal(signal.SIGINT, signal_handler)
def _listen_print_loop(self, responses)
def get_data(self, in_data, frame_count, time_info, status)
def signal_handler(signal, frame)