interface.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 # Import ROS packages
00005 import os
00006 import rospy
00007 import rospkg
00008 import subprocess
00009 
00010 # Import ROS messages
00011 from rospeex_msgs.msg import SpeechRecognitionRequest
00012 from rospeex_msgs.msg import SpeechSynthesisRequest
00013 from rospeex_msgs.msg import SpeechRecognitionResponse
00014 from rospeex_msgs.msg import SignalProcessingResponse
00015 from rospeex_msgs.msg import SpeechSynthesisResponse
00016 from rospeex_msgs.msg import SpeechSynthesisHeader
00017 from rospeex_msgs.msg import SpeechSynthesisState
00018 from rospeex_msgs.srv import SpeechRecognitionConfig
00019 
00020 
00021 class ROSpeexInterface(object):
00022     """
00023     class:
00024         ROSpeexInterface class
00025     brief:
00026         Provides rospeex interface for python.
00027     """
00028     _SR_REQUEST_TOPIC_NAME = 'sr_req'
00029     _SR_RESPONSE_TOPIC_NAME = 'sr_res'
00030     _SS_REQUEST_TOPIC_NAME = 'ss_req'
00031     _SS_RESPONSE_TOPIC_NAME = 'ss_res'
00032     _SPI_RESPONSE_TOPIC_NAME = 'spi_res'
00033     _SPI_STATE_TOPIC_NAME = 'ss_state'
00034     _SPI_CONFIG_SERVICE_NAME = 'spi_config'
00035     _NO_MESSAGE_WAV_FILENAME = 'nomessage.wav'
00036     _ACCEPT_MESSAGE_WAV_FILENAME = 'accept.wav'
00037 
00038     TEXT_RANGE = [0, 100]
00039 
00040     def __init__(self):
00041         # define topic names
00042         # callback lists
00043         self._sr_response = None
00044         self._ss_response = None
00045 
00046         # publisher lists
00047         self._pub_sr = None
00048         self._pub_ss = None
00049         self._pub_ss_state = None
00050 
00051         # spi config
00052         self._spi_config_srv = None
00053 
00054         # request id count
00055         self._ss_req_id = 0
00056         self._sr_req_id = 0
00057 
00058         # spi settings
00059         self._spi_language = 'ja'
00060         self._spi_engine = 'nict'
00061 
00062     def init(self, ss=True, sr=True, spi=True):
00063         """
00064         brief:
00065             Initializing rospeex.
00066         param[in]:
00067             ss:  set true to enable speech synthesis
00068             sr: set true to enable speech recognition
00069             spi: set true to enable signal processing interface (waveform monitor)
00070         """
00071         # enable flags
00072         self._ss_enable = ss
00073         self._sr_enable = sr
00074         self._spi_enable = spi
00075 
00076         # publish topic for ss/sr request
00077         if self._sr_enable:
00078             rospy.loginfo('enable speech recognition.')
00079             self._pub_sr = rospy.Publisher(
00080                 self._SR_REQUEST_TOPIC_NAME,
00081                 SpeechRecognitionRequest,
00082                 queue_size=5
00083             )
00084             rospy.Subscriber(
00085                 self._SR_RESPONSE_TOPIC_NAME,
00086                 SpeechRecognitionResponse,
00087                 self._sr_response_callback
00088             )
00089 
00090         if self._ss_enable:
00091             rospy.loginfo('enable speech synthesis.')
00092             self._pub_ss = rospy.Publisher(
00093                 self._SS_REQUEST_TOPIC_NAME,
00094                 SpeechSynthesisRequest,
00095                 queue_size=5
00096             )
00097             rospy.Subscriber(
00098                 self._SS_RESPONSE_TOPIC_NAME,
00099                 SpeechSynthesisResponse,
00100                 self._ss_response_callback
00101             )
00102 
00103         if self._spi_enable:
00104             rospy.loginfo('enable signal processing interface.')
00105             self._pub_ss_state = rospy.Publisher(
00106                 self._SPI_STATE_TOPIC_NAME,
00107                 SpeechSynthesisState,
00108                 queue_size=5
00109             )
00110 
00111             rospy.Subscriber(
00112                 self._SPI_RESPONSE_TOPIC_NAME,
00113                 SignalProcessingResponse,
00114                 self._spi_response_callback
00115             )
00116 
00117             rospy.wait_for_service(self._SPI_CONFIG_SERVICE_NAME)
00118             self._spi_config_srv = rospy.ServiceProxy(
00119                 self._SPI_CONFIG_SERVICE_NAME,
00120                 SpeechRecognitionConfig
00121             )
00122 
00123     def play_sound(self, sound_path):
00124         """
00125         brief:
00126             playing audio file.
00127         param[in]:
00128             sound_path: sound file path
00129         """
00130         # disable mic input
00131         self._publish_ss_state(True)
00132 
00133         cmd = ['aplay', '-q', sound_path]
00134         try:
00135             subprocess.check_call(cmd)
00136 
00137         except subprocess.CalledProcessError as err:
00138             rospy.logwarn(str(err))
00139 
00140         finally:
00141             self._publish_ss_state(False)
00142 
00143     def _publish_ss_state(self, state):
00144         """
00145         brief:
00146             send ss state to spi node.
00147         param[in]:
00148             state: ss state flag.
00149         """
00150         if self._pub_ss_state:
00151             msg = SpeechSynthesisState()
00152             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00153             msg.header.engine = ''
00154             msg.header.voice_font = ''
00155             msg.header.language = ''
00156             msg.header.user = rospy.get_name()
00157             msg.header.request_id = ''
00158             msg.play_state = state
00159             self._pub_ss_state.publish(msg)
00160 
00161     def _play_package_sound(self, file_name):
00162         """
00163         brief:
00164             playing audio file.
00165         param[in]:
00166             file_name: sound file name (file location: rospeex/sound)
00167         """
00168         rp = rospkg.RosPack()
00169         rospeex_dir = rp.get_path('rospeex_if')
00170         sound_path = os.path.join(rospeex_dir, 'sound', file_name)
00171         self.play_sound(sound_path)
00172 
00173     def _spi_response_callback(self, response):
00174         """
00175         brief:
00176             Response from signal processing interface (wave monitor).
00177         param[in]:
00178             response:  response from ros node
00179         """
00180         if self._spi_enable:
00181             self._sr_req_id += 1
00182             self._play_package_sound(self._ACCEPT_MESSAGE_WAV_FILENAME)
00183 
00184     def _ss_response_callback(self, response):
00185         """
00186         brief:
00187             Response from speech synthesis node.
00188         param[in]:
00189             response:  speech synthesis result audio file (wave)
00190         """
00191         if self._ss_response and response.header.user == rospy.get_name():
00192             self._ss_response(response.data)
00193 
00194     def register_ss_response(self, func):
00195         """
00196         brief:
00197             Registering a speech synthesis callback function.
00198         param[in]:
00199             func:  form: func(data)
00200 
00201                 data: speech synthesis result audio file (wave)
00202         """
00203         self._ss_response = func
00204 
00205     def _sr_response_callback(self, response):
00206         """
00207         brief:
00208             Response from speech recognition node.
00209         param[in]:
00210             response:  speech recognition result surface string
00211         """
00212         if self._sr_response:
00213             if response.header.user in (rospy.get_name(), 'spi'):
00214                 self._sr_response(response.message)
00215 
00216         if not response.message:
00217             self._play_package_sound(self._NO_MESSAGE_WAV_FILENAME)
00218 
00219     def register_sr_response(self, func):
00220         """
00221         brief:
00222             Registering a speech recognition callback function.
00223         param[in]:
00224             func:  form: func(message)
00225 
00226                 message: speech recognition result surface string
00227         """
00228         self._sr_response = func
00229 
00230     def set_spi_config(self, language='ja', engine='nict'):
00231         """
00232         brief:
00233             Setting signal processing interface (wave monitor) parameters.
00234         param[in]:
00235             language:  language setting (ja, en, zh, ko, id, my, th, vi, fr, es)
00236 
00237             engine:  speech recognition engine setting (nict or google)
00238 
00239                     nict: supports ja, en, zh, ko, id, my, th, vi, fr, es
00240 
00241                     google: supports ja, en
00242         """
00243         self._spi_language = language
00244         self._spi_engine = engine
00245         if self._spi_config_srv:
00246             self._spi_config_srv(self._spi_engine, self._spi_language)
00247 
00248     def say(self, message, language='ja', engine='nict', voice_font='*', limit=True):
00249         """
00250         brief:
00251             Performs speech synthesis, and outputs to the speaker.
00252         param[in]:
00253             message:  text for performing speech synthesis
00254 
00255             language:  language setting (ja, en, zh, ko, id, my, th, vi)
00256 
00257             engine:  speech synthesis engine setting (nict or google)
00258 
00259                     nict: supports ja, en, zh, ko, id, my, th, vi
00260 
00261                     google: supports ja, en
00262 
00263             voice_font:  voice font setting
00264 
00265                     nict(ja): F128 or F117
00266 
00267                     nict(en): EF007
00268 
00269                     nict(zh): CJF101
00270 
00271                     nict(ko): KF001
00272 
00273                     nict(id/my/th/vi): None
00274 
00275                     google(ja/en): None
00276 
00277             limit: limit for message string length
00278 
00279                     (True:limit to 100 characters, False:no limit)
00280         """
00281         msg_check = True
00282         if limit:
00283             msg_check = self._check_text(message)
00284 
00285         if self._ss_enable and msg_check:
00286             msg = SpeechSynthesisRequest()
00287             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00288             msg.header.engine = engine
00289             msg.header.voice_font = voice_font
00290             msg.header.language = language
00291             msg.header.user = rospy.get_name()
00292             msg.header.request_id = str(self._ss_req_id)
00293             msg.message = message
00294             msg.memo = ""
00295             self._pub_ss.publish(msg)
00296             self._ss_req_id += 1
00297 
00298         else:
00299             rospy.loginfo('ss interface is disabled.')
00300 
00301     def tts(self, message, file_name, language='ja', engine='nict', voice_font='*'):
00302         """
00303         brief:
00304             Performs speech synthesis, and outputs to the wave file.
00305         param[in]:
00306             message:  text to perform speech synthesis
00307 
00308             file_name:  filepath to output
00309 
00310             language:  language setting (ja, en, zh, ko, id, my, th, vi)
00311 
00312             engine:  speech synthesis engine setting (nict or google)
00313 
00314                     nict: supports ja, en, zh, ko, id, my, th, vi
00315 
00316                     google: supports ja, en
00317 
00318             voice_font:  voice font setting
00319 
00320                     nict(ja): F128 or F117
00321 
00322                     nict(en): EF007
00323 
00324                     nict(zh): CJF101
00325 
00326                     nict(ko): KF001
00327 
00328                     nict(id/my/th/vi): None
00329 
00330                     google(ja/en): None
00331 
00332         """
00333         if self._ss_enable:
00334             msg = SpeechSynthesisRequest()
00335             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_TTS
00336             msg.header.engine = engine
00337             msg.header.voice_font = voice_font
00338             msg.header.language = language
00339             msg.header.user = rospy.get_name()
00340             msg.header.request_id = str(self._ss_req_id)
00341             msg.message = message
00342             msg.memo = file_name
00343             self._pub_ss.publish(msg)
00344             self._ss_req_id += 1
00345 
00346         else:
00347             rospy.loginfo('ss interface is disabled.')
00348 
00349     def recognize(self, data, language='ja', engine='nict'):
00350         """
00351         brief:
00352             Recognizing voice data.
00353         param[in]:
00354             data:  voice file (wave format: 16kHz, 16bit, mono, LE)
00355 
00356             language:  language setting (ja, en, zh, ko, id, my, th, vi, fr, es)
00357 
00358             engine:  speech recognition engine setting (nict or google)
00359 
00360                     nict: supports ja, en, zh, ko, id, my, th, vi, fr, es
00361 
00362                     google: supports ja, en
00363         """
00364         if self._sr_enable:
00365             msg = SpeechRecognitionRequest()
00366             msg.header.user = rospy.get_name()
00367             msg.header.request_id = str(self._sr_req_id)
00368             msg.header.language = language
00369             msg.header.engine = engine
00370             msg.data = data
00371             self._pub_sr.publish(msg)
00372             self._sr_req_id += 1
00373 
00374         else:
00375             rospy.loginfo('sr interface is disabled.')
00376 
00377     def _check_text(self, text):
00378         """
00379         brief:
00380             check tts text
00381         param[in]:
00382             text: input mesage
00383         """
00384         res = True
00385         input_text = text
00386         if type(text) is str:
00387             input_text = text.decode('utf-8')
00388 
00389         if not len(input_text) in range(*self.TEXT_RANGE):
00390             msg = 'parameter failed. text length is not in range.'\
00391                   'Except: {range[0]} <= text_length:{value}'\
00392                   ' <= {range[1]}'.format(
00393                         range=self.TEXT_RANGE,
00394                         value=len(input_text)
00395                     )
00396             rospy.logerr(msg)
00397             res = False
00398 
00399         return res


rospeex_if
Author(s): Komei Sugiura
autogenerated on Thu Jun 6 2019 18:53:13