amazonpolly.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License").
6 # You may not use this file except in compliance with the License.
7 # A copy of the License is located at
8 #
9 # http://aws.amazon.com/apache2.0
10 #
11 # or in the "license" file accompanying this file. This file is distributed
12 # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 # express or implied. See the License for the specific language governing
14 # permissions and limitations under the License.
15 
16 import json
17 import os
18 import sys
19 import wave
20 import traceback
21 import requests
22 from boto3 import Session
23 from botocore.credentials import CredentialProvider, RefreshableCredentials
24 from botocore.session import get_session
25 from botocore.exceptions import UnknownServiceError
26 from contextlib import closing
27 from optparse import OptionParser
28 
29 import rospy
30 from tts.srv import Polly, PollyRequest, PollyResponse
31 
32 
33 def get_ros_param(param, default=None):
34  try:
35  key = rospy.search_param(param)
36  return default if key is None else rospy.get_param(key, default)
37  except Exception as e:
38  rospy.logwarn('Failed to get ros param {}, will use default {}. Exception: '.format(param, default, e))
39  return default
40 
41 
42 class AwsIotCredentialProvider(CredentialProvider):
43  METHOD = 'aws-iot'
44  CANONICAL_NAME = 'customIoTwithCertificate'
45 
46  DEFAULT_AUTH_CONNECT_TIMEOUT_MS = 5000
47  DEFAULT_AUTH_TOTAL_TIMEOUT_MS = 10000
48 
49  def __init__(self):
50  super(AwsIotCredentialProvider, self).__init__()
51  self.ros_param_prefix = 'iot/'
52 
53  def get_param(self, param, default=None):
54  return get_ros_param(self.ros_param_prefix + param, default)
55 
57  try:
58  cert_file = self.get_param('certfile')
59  key_file = self.get_param('keyfile')
60  endpoint = self.get_param('endpoint')
61  role_alias = self.get_param('role')
62  connect_timeout = self.get_param('connect_timeout_ms', self.DEFAULT_AUTH_CONNECT_TIMEOUT_MS)
63  total_timeout = self.get_param('total_timeout_ms', self.DEFAULT_AUTH_TOTAL_TIMEOUT_MS)
64  thing_name = self.get_param('thing_name', '')
65 
66  if any(v is None for v in (cert_file, key_file, endpoint, role_alias, thing_name)):
67  return None
68 
69  headers = {'x-amzn-iot-thingname': thing_name} if len(thing_name) > 0 else None
70  url = 'https://{}/role-aliases/{}/credentials'.format(endpoint, role_alias)
71  timeout = (connect_timeout, total_timeout - connect_timeout) # see also: urllib3/util/timeout.py
72 
73  response = requests.get(url, cert=(cert_file, key_file), headers=headers, timeout=timeout)
74  d = response.json()['credentials']
75 
76  rospy.loginfo('Credentials expiry time: {}'.format(d['expiration']))
77 
78  return {
79  'access_key': d['accessKeyId'],
80  'secret_key': d['secretAccessKey'],
81  'token': d['sessionToken'],
82  'expiry_time': d['expiration'],
83  }
84  except Exception as e:
85  rospy.logwarn('Failed to fetch credentials from AWS IoT: {}'.format(e))
86  return None
87 
88  def load(self):
89  return RefreshableCredentials.create_from_metadata(
90  self.retrieve_credentials(),
92  'aws-iot-with-certificate'
93  )
94 
95 
97  """A TTS engine that can be used in two different ways.
98 
99  Usage
100  -----
101 
102  1. It can run as a ROS service node.
103 
104  Start a polly node::
105 
106  $ rosrun tts polly_node.py
107 
108  Call the service from command line::
109 
110  $ rosservice call /polly SynthesizeSpeech 'hello polly' '' '' '' '' '' '' '' '' [] [] 0 '' '' '' '' '' '' false
111 
112  Call the service programmatically::
113 
114  from tts.srv import Polly
115  rospy.wait_for_service('polly')
116  polly = rospy.ServiceProxy('polly', Polly)
117  res = polly(**kw)
118 
119  2. It can also be used as a normal python class::
120 
121  AmazonPolly().synthesize(text='hi polly')
122 
123  PollyRequest supports many parameters, but the majority of the users can safely ignore most of them and just
124  use the vanilla version which involves only one argument, ``text``.
125 
126  If in some use cases more control is needed, SSML will come handy. Example::
127 
128  AmazonPolly().synthesize(
129  text='<speak>Mary has a <amazon:effect name="whispered">little lamb.</amazon:effect></speak>',
130  text_type='ssml'
131  )
132 
133  A user can also control the voice, output format and so on. Example::
134 
135  AmazonPolly().synthesize(
136  text='<speak>Mary has a <amazon:effect name="whispered">little lamb.</amazon:effect></speak>',
137  text_type='ssml',
138  voice_id='Joey',
139  output_format='mp3',
140  output_path='/tmp/blah'
141  )
142 
143 
144  Parameters
145  ----------
146 
147  Among the parameters defined in Polly.srv, the following are supported while others are reserved for future.
148 
149  * polly_action : currently only ``SynthesizeSpeech`` is supported
150  * text : the text to speak
151  * text_type : can be either ``text`` (default) or ``ssml``
152  * voice_id : any voice id supported by Amazon Polly, default is Joanna
153  * output_format : ogg (default), mp3 or pcm
154  * output_path : where the audio file is saved
155  * sample_rate : default is 16000 for pcm or 22050 for mp3 and ogg
156 
157  The following are the reserved ones. Note that ``language_code`` is rarely needed (this may seem counter-intuitive).
158  See official Amazon Polly documentation for details (link can be found below).
159 
160  * language_code
161  * lexicon_content
162  * lexicon_name
163  * lexicon_names
164  * speech_mark_types
165  * max_results
166  * next_token
167  * sns_topic_arn
168  * task_id
169  * task_status
170  * output_s3_bucket_name
171  * output_s3_key_prefix
172  * include_additional_language_codes
173 
174 
175  Links
176  -----
177 
178  Amazon Polly documentation: https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html
179 
180  """
181 
182  def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None):
183  if region_name is None:
184  region_name = get_ros_param('aws_client_configuration/region', default='us-west-2')
185 
186  self.polly = self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name)
187  self.default_text_type = 'text'
188  self.default_voice_id = 'Joanna'
189  self.default_output_format = 'ogg_vorbis'
192 
193  def _get_polly_client(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None,
194  region_name=None, with_service_model_patch=False):
195  """Note we get a new botocore session each time this function is called.
196  This is to avoid potential problems caused by inner state of the session.
197  """
198  botocore_session = get_session()
199 
200  if with_service_model_patch:
201  # Older versions of botocore don't have polly. We can possibly fix it by appending
202  # extra path with polly service model files to the search path.
203  current_dir = os.path.dirname(os.path.abspath(__file__))
204  service_model_path = os.path.join(current_dir, 'data', 'models')
205  botocore_session.set_config_variable('data_path', service_model_path)
206  rospy.loginfo('patching service model data path: {}'.format(service_model_path))
207 
208  botocore_session.get_component('credential_provider').insert_after('boto-config', AwsIotCredentialProvider())
209 
210  botocore_session.user_agent_extra = self._generate_user_agent_suffix()
211 
212  session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key,
213  aws_session_token=aws_session_token, region_name=region_name,
214  botocore_session=botocore_session)
215 
216  try:
217  return session.client("polly")
218  except UnknownServiceError:
219  # the first time we reach here, we try to fix the problem
220  if not with_service_model_patch:
221  return self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name,
222  with_service_model_patch=True)
223  else:
224  # we have tried our best, time to panic
225  rospy.logerr('Amazon Polly is not available. Please install the latest boto3.')
226  raise
227 
229  exec_env = get_ros_param('exec_env', 'AWS_RoboMaker').strip()
230  if 'AWS_RoboMaker' in exec_env:
231  ver = get_ros_param('robomaker_version', None)
232  if ver:
233  exec_env += '-' + ver.strip()
234  ros_distro = get_ros_param('rosdistro', 'Unknown_ROS_DISTRO').strip()
235  ros_version = get_ros_param('rosversion', 'Unknown_ROS_VERSION').strip()
236  return 'exec-env/{} ros-{}/{}'.format(exec_env, ros_distro, ros_version)
237 
238  def _pcm2wav(self, audio_data, wav_filename, sample_rate):
239  """per Amazon Polly official doc, the pcm in a signed 16-bit, 1 channel (mono), little-endian format."""
240  wavf = wave.open(wav_filename, 'w')
241  wavf.setframerate(int(sample_rate))
242  wavf.setnchannels(1) # 1 channel
243  wavf.setsampwidth(2) # 2 bytes == 16 bits
244  wavf.writeframes(audio_data)
245  wavf.close()
246 
247  def _make_audio_file_fullpath(self, output_path, output_format):
248  """Makes a full path for audio file based on given output path and format.
249 
250  If ``output_path`` doesn't have a path, current path is used.
251 
252  :param output_path: the output path received
253  :param output_format: the audio format, e.g., mp3, ogg_vorbis, pcm
254  :return: a full path for the output audio file. File ext will be constructed from audio format.
255  """
256  head, tail = os.path.split(output_path)
257  if not head:
258  head = self.default_output_folder
259  if not tail:
260  tail = self.default_output_file_basename
261 
262  file_ext = {'pcm': '.wav', 'mp3': '.mp3', 'ogg_vorbis': '.ogg'}[output_format.lower()]
263  if not tail.endswith(file_ext):
264  tail += file_ext
265 
266  return os.path.realpath(os.path.join(head, tail))
267 
268  def _synthesize_speech_and_save(self, request):
269  """Calls Amazon Polly and writes the returned audio data to a local file.
270 
271  To make it practical, three things will be returned in a JSON form string, which are audio file path,
272  audio type and Amazon Polly response metadata.
273 
274  If the Amazon Polly call fails, audio file name will be an empty string and audio type will be "N/A".
275 
276  Please see https://boto3.readthedocs.io/reference/services/polly.html#Polly.Client.synthesize_speech
277  for more details on Amazon Polly API.
278 
279  :param request: an instance of PollyRequest
280  :return: a string in JSON form with two attributes, "Audio File" and "Amazon Polly Response".
281  """
282  kws = {
283  'LexiconNames': request.lexicon_names if request.lexicon_names else [],
284  'OutputFormat': request.output_format if request.output_format else self.default_output_format,
285  'SampleRate': request.sample_rate,
286  'SpeechMarkTypes': request.speech_mark_types if request.speech_mark_types else [],
287  'Text': request.text,
288  'TextType': request.text_type if request.text_type else self.default_text_type,
289  'VoiceId': request.voice_id if request.voice_id else self.default_voice_id
290  }
291 
292  if not kws['SampleRate']:
293  kws['SampleRate'] = '16000' if kws['OutputFormat'].lower() == 'pcm' else '22050'
294 
295  rospy.loginfo('Amazon Polly Request: {}'.format(kws))
296  response = self.polly.synthesize_speech(**kws)
297  rospy.loginfo('Amazon Polly Response: {}'.format(response))
298 
299  if "AudioStream" in response:
300  audiofile = self._make_audio_file_fullpath(request.output_path, kws['OutputFormat'])
301  rospy.loginfo('will save audio as {}'.format(audiofile))
302 
303  with closing(response["AudioStream"]) as stream:
304  if kws['OutputFormat'].lower() == 'pcm':
305  self._pcm2wav(stream.read(), audiofile, kws['SampleRate'])
306  else:
307  with open(audiofile, "wb") as f:
308  f.write(stream.read())
309 
310  audiotype = response['ContentType']
311  else:
312  audiofile = ''
313  audiotype = 'N/A'
314 
315  return json.dumps({
316  'Audio File': audiofile,
317  'Audio Type': audiotype,
318  'Amazon Polly Response Metadata': str(response['ResponseMetadata'])
319  })
320 
321  def _dispatch(self, request):
322  """Amazon Polly supports a number of APIs. This will call the right one based on the content of request.
323 
324  Currently "SynthesizeSpeech" is the only recognized action. Basically this method just delegates the work
325  to ``self._synthesize_speech_and_save`` and returns the result as is. It will simply raise if a different
326  action is passed in.
327 
328  :param request: an instance of PollyRequest
329  :return: whatever returned by the delegate
330  """
331  actions = {
332  'SynthesizeSpeech': self._synthesize_speech_and_save
333  # ... more actions could go in here ...
334  }
335 
336  if request.polly_action not in actions:
337  raise RuntimeError('bad or unsupported Amazon Polly action: "' + request.polly_action + '".')
338 
339  return actions[request.polly_action](request)
340 
341  def _node_request_handler(self, request):
342  """The callback function for processing service request.
343 
344  It never raises. If anything unexpected happens, it will return a PollyResponse with details of the exception.
345 
346  :param request: an instance of PollyRequest
347  :return: a PollyResponse
348  """
349  rospy.loginfo('Amazon Polly Request: {}'.format(request))
350 
351  try:
352  response = self._dispatch(request)
353  rospy.loginfo('will return {}'.format(response))
354  return PollyResponse(result=response)
355  except Exception as e:
356  current_dir = os.path.dirname(os.path.abspath(__file__))
357  exc_type = sys.exc_info()[0]
358 
359  # not using `issubclass(exc_type, ConnectionError)` for the condition below because some versions
360  # of urllib3 raises exception when doing `from requests.exceptions import ConnectionError`
361  error_ogg_filename = 'connerror.ogg' if 'ConnectionError' in exc_type.__name__ else 'error.ogg'
362 
363  error_details = {
364  'Audio File': os.path.join(current_dir, 'data', error_ogg_filename),
365  'Audio Type': 'ogg',
366  'Exception': {
367  'Type': str(exc_type),
368  'Module': exc_type.__module__,
369  'Name': exc_type.__name__,
370  'Value': str(e),
371  },
372  'Traceback': traceback.format_exc()
373  }
374 
375  error_str = json.dumps(error_details)
376  rospy.logerr(error_str)
377  return PollyResponse(result=error_str)
378 
379  def synthesize(self, **kws):
380  """Call this method if you want to use polly but don't want to start a node.
381 
382  :param kws: input as defined in Polly.srv
383  :return: a string in JSON form with detailed information, success or failure
384  """
385  req = PollyRequest(polly_action='SynthesizeSpeech', **kws)
386  return self._node_request_handler(req)
387 
388  def start(self, node_name='polly_node', service_name='polly'):
389  """The entry point of a ROS service node.
390 
391  Details of the service API can be found in Polly.srv.
392 
393  :param node_name: name of ROS node
394  :param service_name: name of ROS service
395  :return: it doesn't return
396  """
397  rospy.init_node(node_name)
398 
399  service = rospy.Service(service_name, Polly, self._node_request_handler)
400 
401  rospy.loginfo('polly running: {}'.format(service.uri))
402 
403  rospy.spin()
404 
405 
406 def main():
407  usage = '''usage: %prog [options]
408  '''
409 
410  parser = OptionParser(usage)
411 
412  parser.add_option("-n", "--node-name", dest="node_name", default='polly_node',
413  help="name of the ROS node",
414  metavar="NODE_NAME")
415  parser.add_option("-s", "--service-name", dest="service_name", default='polly',
416  help="name of the ROS service",
417  metavar="SERVICE_NAME")
418 
419  (options, args) = parser.parse_args()
420 
421  node_name = options.node_name
422  service_name = options.service_name
423 
424  AmazonPolly().start(node_name=node_name, service_name=service_name)
425 
426 
427 if __name__ == "__main__":
428  main()
def _get_polly_client(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None, with_service_model_patch=False)
Definition: amazonpolly.py:194
def _dispatch(self, request)
Definition: amazonpolly.py:321
def _node_request_handler(self, request)
Definition: amazonpolly.py:341
def start(self, node_name='polly_node', service_name='polly')
Definition: amazonpolly.py:388
def _synthesize_speech_and_save(self, request)
Definition: amazonpolly.py:268
def _generate_user_agent_suffix(self)
Definition: amazonpolly.py:228
def get_ros_param(param, default=None)
Definition: amazonpolly.py:33
def get_param(self, param, default=None)
Definition: amazonpolly.py:53
def _make_audio_file_fullpath(self, output_path, output_format)
Definition: amazonpolly.py:247
def _pcm2wav(self, audio_data, wav_filename, sample_rate)
Definition: amazonpolly.py:238
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None)
Definition: amazonpolly.py:182


tts
Author(s): AWS RoboMaker
autogenerated on Fri Mar 5 2021 03:06:38