synthesizer.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License").
6 # You may not use this file except in compliance with the License.
7 # A copy of the License is located at
8 #
9 # http://aws.amazon.com/apache2.0
10 #
11 # or in the "license" file accompanying this file. This file is distributed
12 # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 # express or implied. See the License for the specific language governing
14 # permissions and limitations under the License.
15 
16 import os
17 import time
18 import json
19 import rospy
20 import hashlib
21 from optparse import OptionParser
22 from tts.srv import Synthesizer, SynthesizerResponse
23 
24 
26  """This class serves as a ROS service node that should be an entry point of a TTS task.
27 
28  Although the current implementation uses Amazon Polly as the synthesis engine, it is not hard to let it support
29  more heterogeneous engines while keeping the API the same.
30 
31  In order to support a variety of engines, the SynthesizerRequest was designed with flexibility in mind. It
32  has two fields: text and metadata. Both are strings. In most cases, a user can ignore the metadata and call
33  the service with some plain text. If the use case needs any control or engine-specific feature, the extra
34  information can be put into the JSON-form metadata. This class will use the information when calling the engine.
35 
36  The decoupling of the synthesizer and the actual synthesis engine will benefit the users in many ways.
37 
38  First, a user will be able to use a unified interface to do the TTS job and have the freedom to use different
39  engines available with no or very little change from the client side.
40 
41  Second, by applying some design patterns, the synthesizer can choose an engine dynamically. For example, a user
42  may prefer to use Amazon Polly but is also OK with an offline solution when network is not reliable.
43 
44  Third, engines can be complicated, thus difficult to use. As an example, Amazon Polly supports dozens of parameters
45  and is able to accomplish nontrivial synthesis jobs, but majority of the users never need those features. This
46  class provides a clean interface with two parameters only, so that it is much easier and pleasant to use. If by
47  any chance the advanced features are required, the user can always leverage the metadata field or even go to the
48  backend engine directly.
49 
50  Also, from an engineering perspective, simple and decoupled modules are easier to maintain.
51 
52  This class supports two modes of using polly. It can either call a service node or use AmazonPolly as a library.
53 
54  Start the service node::
55 
56  $ rosrun tts synthesizer_node.py # use default configuration
57  $ rosrun tts synthesizer_node.py -e POLLY_LIBRARY # will not call polly service node
58 
59  Call the service::
60 
61  $ rosservice call /synthesizer 'hello' ''
62  $ rosservice call /synthesizer '<speak>hello</speak>' '"{\"text_type\":\"ssml\"}"'
63  """
64 
65  class PollyViaNode:
66  def __init__(self, polly_service_name='polly'):
67  self.service_name = polly_service_name
68 
69  def __call__(self, **kwargs):
70  rospy.loginfo('will call service {}'.format(self.service_name))
71  from tts.srv import Polly
72  rospy.wait_for_service(self.service_name)
73  polly = rospy.ServiceProxy(self.service_name, Polly)
74  return polly(polly_action='SynthesizeSpeech', **kwargs)
75 
76  class PollyDirect:
77  def __init__(self):
78  pass
79 
80  def __call__(self, **kwargs):
81  rospy.loginfo('will import amazonpolly.AmazonPolly')
82  from tts.amazonpolly import AmazonPolly
83  node = AmazonPolly()
84  return node.synthesize(**kwargs)
85 
86  ENGINES = {
87  'POLLY_SERVICE': PollyViaNode,
88  'POLLY_LIBRARY': PollyDirect,
89  }
90 
91  class BadEngineError(NameError):
92  pass
93 
94  def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly'):
95  if engine not in self.ENGINES:
96  msg = 'bad engine {} which is not one of {}'.format(engine, ', '.join(SpeechSynthesizer.ENGINES.keys()))
97  raise SpeechSynthesizer.BadEngineError(msg)
98 
99  engine_kwargs = {'polly_service_name': polly_service_name} if engine == 'POLLY_SERVICE' else {}
100  self.engine = self.ENGINES[engine](**engine_kwargs)
101 
102  self.default_text_type = 'text'
103  self.default_voice_id = 'Joanna'
104  self.default_output_format = 'ogg_vorbis'
105 
106  def _call_engine(self, **kw):
107  """Call engine to do the job.
108 
109  If no output path is found from input, the audio file will be put into /tmp and the file name will have
110  a prefix of the md5 hash of the text.
111 
112  :param kw: what AmazonPolly needs to synthesize
113  :return: response from AmazonPolly
114  """
115  if 'output_path' not in kw:
116  tmp_filename = hashlib.md5(kw['text']).hexdigest()
117  tmp_filepath = os.path.join(os.sep, 'tmp', 'voice_{}_{}'.format(tmp_filename, str(time.time())))
118  kw['output_path'] = os.path.abspath(tmp_filepath)
119  rospy.loginfo('audio will be saved as {}'.format(kw['output_path']))
120 
121  return self.engine(**kw)
122 
123  def _parse_request_or_raise(self, request):
124  """It will raise if request is malformed.
125 
126  :param request: an instance of SynthesizerRequest
127  :return: a dict
128  """
129  md = json.loads(request.metadata) if request.metadata else {}
130 
131  md['output_format'] = md.get('output_format', self.default_output_format)
132  md['voice_id'] = md.get('voice_id', self.default_voice_id)
133  md['sample_rate'] = md.get('sample_rate', '16000' if md['output_format'].lower() == 'pcm' else '22050')
134  md['text_type'] = md.get('text_type', self.default_text_type)
135  md['text'] = request.text
136 
137  return md
138 
139  def _node_request_handler(self, request):
140  """The callback function for processing service request.
141 
142  It never raises. If anything unexpected happens, it will return a SynthesizerResponse with the exception.
143 
144  :param request: an instance of SynthesizerRequest
145  :return: a SynthesizerResponse
146  """
147  rospy.loginfo(request)
148  try:
149  kws = self._parse_request_or_raise(request)
150  res = self._call_engine(**kws).result
151 
152  return SynthesizerResponse(res)
153  except Exception as e:
154  return SynthesizerResponse('Exception: {}'.format(e))
155 
156  def start(self, node_name='synthesizer_node', service_name='synthesizer'):
157  """The entry point of a ROS service node.
158 
159  :param node_name: name of ROS node
160  :param service_name: name of ROS service
161  :return: it doesn't return
162  """
163  rospy.init_node(node_name)
164 
165  service = rospy.Service(service_name, Synthesizer, self._node_request_handler)
166 
167  rospy.loginfo('{} running: {}'.format(node_name, service.uri))
168 
169  rospy.spin()
170 
171 
172 def main():
173  usage = '''usage: %prog [options]
174  '''
175 
176  parser = OptionParser(usage)
177 
178  parser.add_option("-n", "--node-name", dest="node_name", default='synthesizer_node',
179  help="name of the ROS node",
180  metavar="NODE_NAME")
181  parser.add_option("-s", "--service-name", dest="service_name", default='synthesizer',
182  help="name of the ROS service",
183  metavar="SERVICE_NAME")
184  parser.add_option("-e", "--engine", dest="engine", default='POLLY_SERVICE',
185  help="name of the synthesis engine",
186  metavar="ENGINE")
187  parser.add_option("-p", "--polly-service-name", dest="polly_service_name", default='polly',
188  help="name of the polly service",
189  metavar="POLLY_SERVICE_NAME")
190 
191  (options, args) = parser.parse_args()
192 
193  node_name = options.node_name
194  service_name = options.service_name
195  engine = options.engine
196  polly_service_name = options.polly_service_name
197 
198  if engine == 'POLLY_SERVICE':
199  synthesizer = SpeechSynthesizer(engine=engine, polly_service_name=polly_service_name)
200  else:
201  synthesizer = SpeechSynthesizer(engine=engine)
202  synthesizer.start(node_name=node_name, service_name=service_name)
203 
204 
205 if __name__ == "__main__":
206  main()
def _parse_request_or_raise(self, request)
Definition: synthesizer.py:123
def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly')
Definition: synthesizer.py:94
def start(self, node_name='synthesizer_node', service_name='synthesizer')
Definition: synthesizer.py:156
def __init__(self, polly_service_name='polly')
Definition: synthesizer.py:66
def _node_request_handler(self, request)
Definition: synthesizer.py:139


tts
Author(s): AWS RoboMaker
autogenerated on Fri Mar 5 2021 03:06:38