21 from optparse
import OptionParser
22 from tts.srv
import Synthesizer, SynthesizerResponse
26 """This class serves as a ROS service node that should be an entry point of a TTS task. 28 Although the current implementation uses Amazon Polly as the synthesis engine, it is not hard to let it support 29 more heterogeneous engines while keeping the API the same. 31 In order to support a variety of engines, the SynthesizerRequest was designed with flexibility in mind. It 32 has two fields: text and metadata. Both are strings. In most cases, a user can ignore the metadata and call 33 the service with some plain text. If the use case needs any control or engine-specific feature, the extra 34 information can be put into the JSON-form metadata. This class will use the information when calling the engine. 36 The decoupling of the synthesizer and the actual synthesis engine will benefit the users in many ways. 38 First, a user will be able to use a unified interface to do the TTS job and have the freedom to use different 39 engines available with no or very little change from the client side. 41 Second, by applying some design patterns, the synthesizer can choose an engine dynamically. For example, a user 42 may prefer to use Amazon Polly but is also OK with an offline solution when network is not reliable. 44 Third, engines can be complicated, thus difficult to use. As an example, Amazon Polly supports dozens of parameters 45 and is able to accomplish nontrivial synthesis jobs, but majority of the users never need those features. This 46 class provides a clean interface with two parameters only, so that it is much easier and pleasant to use. If by 47 any chance the advanced features are required, the user can always leverage the metadata field or even go to the 48 backend engine directly. 50 Also, from an engineering perspective, simple and decoupled modules are easier to maintain. 52 This class supports two modes of using polly. It can either call a service node or use AmazonPolly as a library. 54 Start the service node:: 56 $ rosrun tts synthesizer_node.py # use default configuration 57 $ rosrun tts synthesizer_node.py -e POLLY_LIBRARY # will not call polly service node 61 $ rosservice call /synthesizer 'hello' '' 62 $ rosservice call /synthesizer '<speak>hello</speak>' '"{\"text_type\":\"ssml\"}"' 66 def __init__(self, polly_service_name='polly'):
70 rospy.loginfo(
'will call service {}'.format(self.
service_name))
71 from tts.srv
import Polly
74 return polly(polly_action=
'SynthesizeSpeech', **kwargs)
81 rospy.loginfo(
'will import amazonpolly.AmazonPolly')
84 return node.synthesize(**kwargs)
87 'POLLY_SERVICE': PollyViaNode,
88 'POLLY_LIBRARY': PollyDirect,
94 def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly'):
95 if engine
not in self.ENGINES:
96 msg =
'bad engine {} which is not one of {}'.format(engine,
', '.join(SpeechSynthesizer.ENGINES.keys()))
97 raise SpeechSynthesizer.BadEngineError(msg)
99 engine_kwargs = {
'polly_service_name': polly_service_name}
if engine ==
'POLLY_SERVICE' else {}
107 """Call engine to do the job. 109 If no output path is found from input, the audio file will be put into /tmp and the file name will have 110 a prefix of the md5 hash of the text. 112 :param kw: what AmazonPolly needs to synthesize 113 :return: response from AmazonPolly 115 if 'output_path' not in kw:
116 tmp_filename = hashlib.md5(kw[
'text']).hexdigest()
117 tmp_filepath = os.path.join(os.sep,
'tmp',
'voice_{}_{}'.format(tmp_filename, str(time.time())))
118 kw[
'output_path'] = os.path.abspath(tmp_filepath)
119 rospy.loginfo(
'audio will be saved as {}'.format(kw[
'output_path']))
124 """It will raise if request is malformed. 126 :param request: an instance of SynthesizerRequest 129 md = json.loads(request.metadata)
if request.metadata
else {}
133 md[
'sample_rate'] = md.get(
'sample_rate',
'16000' if md[
'output_format'].lower() ==
'pcm' else '22050')
135 md[
'text'] = request.text
140 """The callback function for processing service request. 142 It never raises. If anything unexpected happens, it will return a SynthesizerResponse with the exception. 144 :param request: an instance of SynthesizerRequest 145 :return: a SynthesizerResponse 147 rospy.loginfo(request)
152 return SynthesizerResponse(res)
153 except Exception
as e:
154 return SynthesizerResponse(
'Exception: {}'.format(e))
156 def start(self, node_name='synthesizer_node', service_name='synthesizer'):
157 """The entry point of a ROS service node. 159 :param node_name: name of ROS node 160 :param service_name: name of ROS service 161 :return: it doesn't return 163 rospy.init_node(node_name)
167 rospy.loginfo(
'{} running: {}'.format(node_name, service.uri))
173 usage =
'''usage: %prog [options] 176 parser = OptionParser(usage)
178 parser.add_option(
"-n",
"--node-name", dest=
"node_name", default=
'synthesizer_node',
179 help=
"name of the ROS node",
181 parser.add_option(
"-s",
"--service-name", dest=
"service_name", default=
'synthesizer',
182 help=
"name of the ROS service",
183 metavar=
"SERVICE_NAME")
184 parser.add_option(
"-e",
"--engine", dest=
"engine", default=
'POLLY_SERVICE',
185 help=
"name of the synthesis engine",
187 parser.add_option(
"-p",
"--polly-service-name", dest=
"polly_service_name", default=
'polly',
188 help=
"name of the polly service",
189 metavar=
"POLLY_SERVICE_NAME")
191 (options, args) = parser.parse_args()
193 node_name = options.node_name
194 service_name = options.service_name
195 engine = options.engine
196 polly_service_name = options.polly_service_name
198 if engine ==
'POLLY_SERVICE':
199 synthesizer =
SpeechSynthesizer(engine=engine, polly_service_name=polly_service_name)
202 synthesizer.start(node_name=node_name, service_name=service_name)
205 if __name__ ==
"__main__":
def _parse_request_or_raise(self, request)
def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly')
def __call__(self, kwargs)
def __call__(self, kwargs)
def start(self, node_name='synthesizer_node', service_name='synthesizer')
def __init__(self, polly_service_name='polly')
def _node_request_handler(self, request)
def _call_engine(self, kw)