rospeex_if: interface.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 # Import ROS packages
00005 try:
00006     import roslib; roslib.load_manifest('rospeex_if')
00007 except:
00008     pass
00009 
00010 import os
00011 import rospy
00012 import rospkg
00013 import subprocess
00014 
00015 # Import ROS messages
00016 from rospeex_msgs.msg import SpeechRecognitionRequest
00017 from rospeex_msgs.msg import SpeechSynthesisRequest
00018 from rospeex_msgs.msg import SpeechRecognitionResponse
00019 from rospeex_msgs.msg import SpeechSynthesisResponse
00020 from rospeex_msgs.msg import SignalProcessingResponse
00021 from rospeex_msgs.msg import SpeechSynthesisHeader
00022 from rospeex_msgs.msg import SpeechSynthesisState
00023 
00024 class ROSpeexInterface(object):
00025     """
00026     class:
00027         ROSpeexInterface class
00028     brief:
00029         Provides rospeex interface for python.
00030     """
00031     _SR_REQUEST_TOPIC_NAME = 'sr_req'
00032     _SR_RESPONSE_TOPIC_NAME = 'sr_res'
00033     _SS_REQUEST_TOPIC_NAME = 'ss_req'
00034     _SS_RESPONSE_TOPIC_NAME = 'ss_res'
00035     _SPI_RESPONSE_TOPIC_NAME = 'spi_res'
00036     _SPI_STATE_TOPIC_NAME = 'ss_state'
00037     _NO_MESSAGE_WAV_FILENAME = 'nomessage.wav'
00038     _ACCEPT_MESSAGE_WAV_FILENAME = 'accept.wav'
00039 
00040     def __init__(self):
00041         # define topic names
00042         # callback lists
00043         self._sr_response = None
00044         self._ss_response = None
00045 
00046         # publisher lists
00047         self._pub_sr = None
00048         self._pub_ss = None
00049         self._pub_ss_state = None
00050 
00051         # request id count
00052         self._ss_req_id = 0
00053         self._sr_req_id = 0
00054 
00055         self._sr_queue_num = 0
00056         self._sr_queue_disp = False
00057 
00058         # spi settings
00059         self._spi_language = 'ja'
00060         self._spi_engine = 'nict'
00061 
00062     def init(self, ss=True, sr=True, spi=True):
00063         """
00064         brief:
00065             Initializing rospeex.
00066         param[in]:
00067             ss:  set true to enable speech synthesis
00068 
00069             sr: set true to enable speech recognition
00070 
00071             spi: set true to enable signal processing interface (waveform monitor)
00072         """
00073         # enable flags
00074         self._ss_enable = ss
00075         self._sr_enable = sr
00076         self._spi_enable = spi
00077 
00078         # publish topic for ss/sr request
00079         if self._sr_enable == True:
00080             rospy.loginfo('enable speech recognition.')
00081             self._pub_sr = rospy.Publisher(self._SR_REQUEST_TOPIC_NAME, SpeechRecognitionRequest)
00082             rospy.Subscriber(self._SR_RESPONSE_TOPIC_NAME, SpeechRecognitionResponse, self._sr_response_callback)
00083 
00084         if self._ss_enable == True:
00085             rospy.loginfo('enable speech synthesis.')
00086             self._pub_ss = rospy.Publisher(self._SS_REQUEST_TOPIC_NAME, SpeechSynthesisRequest)
00087             rospy.Subscriber(self._SS_RESPONSE_TOPIC_NAME, SpeechSynthesisResponse, self._ss_response_callback)
00088 
00089         if self._spi_enable == True:
00090             rospy.loginfo('enable signal processing interface.')
00091             self._pub_ss_state = rospy.Publisher(self._SPI_STATE_TOPIC_NAME, SpeechSynthesisState)
00092             rospy.Subscriber(self._SPI_RESPONSE_TOPIC_NAME, SignalProcessingResponse, self._spi_response_callback)
00093 
00094 
00095     def play_sound(self, sound_path):
00096         """
00097         brief:
00098             playing audio file.
00099         param[in]:
00100             sound_path: sound file path
00101         """
00102         # disable mic input
00103         self._publish_ss_state(True)
00104 
00105         cmd = ['aplay', '-q', sound_path]
00106         try:
00107             subprocess.check_call(cmd)
00108         except subprocess.CalledProcessError as err:
00109             rospy.logwarn(str(err))
00110         finally:
00111             self._publish_ss_state(False)
00112 
00113 
00114     def _publish_ss_state(self, state):
00115         """
00116         brief:
00117             send ss state to spi node.
00118         param[in]:
00119             state: ss state flag.
00120         """
00121         if self._pub_ss_state:
00122             msg = SpeechSynthesisState()
00123             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00124             msg.header.engine = ''
00125             msg.header.voice_font = ''
00126             msg.header.language = ''
00127             msg.header.user = rospy.get_name()
00128             msg.header.request_id = ''
00129             msg.play_state = state
00130             self._pub_ss_state.publish(msg)
00131 
00132 
00133     def _play_package_sound(self, file_name):
00134         """
00135         brief:
00136             playing audio file.
00137         param[in]:
00138             file_name: sound file name (file location: rospeex/sound)
00139         """
00140         rp = rospkg.RosPack()
00141         rospeex_dir = rp.get_path('rospeex_if')
00142         sound_path  = os.path.join(rospeex_dir, 'sound', file_name)
00143         self.play_sound(sound_path)
00144 
00145 
00146     def _spi_response_callback(self, response):
00147         """
00148         brief:
00149             Response from signal processing interface (wave monitor).
00150         param[in]:
00151             response:  response from ros node
00152         """
00153 
00154         if self._spi_enable == True and self._pub_sr != None:
00155             # process messsage data
00156             msg = SpeechRecognitionRequest()
00157             msg.header.user = rospy.get_name()
00158             msg.header.request_id = str(self._sr_req_id)
00159             msg.header.language = self._spi_language
00160             msg.header.engine = self._spi_engine
00161             msg.data = response.data
00162             self._pub_sr.publish(msg)
00163             self._sr_req_id += 1
00164             self._sr_queue_num += 1
00165             if self._sr_queue_num > 1:
00166                 self._sr_queue_disp = True
00167             if self._sr_queue_disp == True:
00168                 rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00169 
00170             # notice to user
00171             self._play_package_sound("accept.wav")
00172 
00173     def _ss_response_callback(self, response):
00174         """
00175         brief:
00176             Response from speech synthesis node.
00177         param[in]:
00178             response:  speech synthesis result audio file (wave)
00179         """
00180         if self._ss_response and response.header.user == rospy.get_name():
00181             self._ss_response(response.data)
00182 
00183 
00184     def register_ss_response(self, func):
00185         """
00186         brief:
00187             Registering a speech synthesis callback function.
00188         param[in]:
00189             func:  form: func(data)
00190 
00191                 data: speech synthesis result audio file (wave)
00192         """
00193         self._ss_response = func
00194 
00195 
00196     def _sr_response_callback(self, response):
00197         """
00198         brief:
00199             Response from speech recognition node.
00200         param[in]:
00201             response:  speech recognition result surface string
00202         """
00203 
00204         self._sr_queue_num -= 1
00205         if self._sr_queue_disp == True:
00206             rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00207         if self._sr_queue_num == 0:
00208             self._sr_queue_disp = False
00209 
00210         if self._sr_response and response.header.user == rospy.get_name():
00211             self._sr_response(response.message)
00212 
00213         if not response.message:
00214             self._play_package_sound(self._NO_MESSAGE_WAV_FILENAME)
00215         elif self._spi_enable != True:
00216             self._play_package_sound(self._ACCEPT_MESSAGE_WAV_FILENAME)
00217 
00218 
00219     def register_sr_response(self, func):
00220         """
00221         brief:
00222             Registering a speech recognition callback function.
00223         param[in]:
00224             func:  form: func(message)
00225 
00226                 message: speech recognition result surface string
00227         """
00228         self._sr_response = func
00229 
00230 
00231     def set_spi_config(self, language='ja', engine='nict'):
00232         """
00233         brief:
00234             Setting signal processing interface (wave monitor) parameters.
00235         param[in]:
00236             language:  language setting (ja, en, zh, ko)
00237 
00238             engine:  speech recognition engine setting (nict or google)
00239 
00240                     nict: supports ja, en, zh, ko
00241 
00242                     google: supports ja, en
00243         """
00244         self._spi_language = language
00245         self._spi_engine = engine
00246 
00247 
00248     def say(self, message, language='ja', engine='nict', voice_font='*'):
00249         """
00250         brief:
00251             Performs speech synthesis, and outputs to the speaker.
00252         param[in]:
00253             message:  text for performing speech synthesis
00254 
00255             language:  language setting (ja, en, zh, ko)
00256 
00257             engine:  speech synthesis engine setting (nict or google or voicetext)
00258 
00259                     nict: supports ja, en, zh, ko
00260 
00261                     google: supports ja, en
00262 
00263                     voicetext: supports ja
00264 
00265             voice_font:  voice font setting
00266 
00267                     nict(ja): F128 or F117
00268 
00269                     nict(en): EF007
00270 
00271                     nict(zh): CJF101
00272 
00273                     nict(ko): KF001
00274 
00275                     google(ja/en): None
00276 
00277                     voicetext(ja): show, haruka, hikari, takeru, santa, bear
00278         """
00279         if self._ss_enable == True:
00280             msg = SpeechSynthesisRequest()
00281             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_SAY
00282             msg.header.engine = engine
00283             msg.header.voice_font = voice_font
00284             msg.header.language = language
00285             msg.header.user = rospy.get_name()
00286             msg.header.request_id = str(self._ss_req_id)
00287             msg.message = message
00288             msg.memo = ""
00289             self._pub_ss.publish(msg)
00290             self._ss_req_id += 1
00291         else:
00292             rospy.loginfo('ss interface is disabled.')
00293 
00294 
00295     def tts(self, message, file_name, language='ja', engine='nict', voice_font='*'):
00296         """
00297         brief:
00298             Performs speech synthesis, and outputs to the wave file.
00299         param[in]:
00300             message:  text to perform speech synthesis
00301 
00302             file_name:  filepath to output
00303 
00304             language:  language setting (ja, en, zh, ko)
00305 
00306             engine:  speech synthesis engine setting (nict or google)
00307 
00308                     nict: supports ja, en, zh, ko
00309 
00310                     google: supports ja, en
00311 
00312             voice_font:  voice font setting
00313 
00314                     nict(ja): F128 or F117
00315 
00316                     nict(en): EF007
00317 
00318                     nict(zh): CJF101
00319 
00320                     nict(ko): KF001
00321 
00322                     google(ja/en): None
00323 
00324         """
00325         if self._ss_enable == True:
00326             msg = SpeechSynthesisRequest()
00327             msg.header.request_type = SpeechSynthesisHeader.REQUEST_TYPE_TTS
00328             msg.header.engine = engine
00329             msg.header.voice_font = voice_font
00330             msg.header.language = language
00331             msg.header.user = rospy.get_name()
00332             msg.header.request_id = str(self._ss_req_id)
00333             msg.message = message
00334             msg.memo = file_name
00335             self._pub_ss.publish(msg)
00336             self._ss_req_id += 1
00337         else:
00338             rospy.loginfo('ss interface is disabled.')
00339 
00340 
00341     def recognize(self, data, language='ja', engine='nict'):
00342         """
00343         brief:
00344             Recognizing voice data.
00345         param[in]:
00346             data:  voice file (wave format: 16kHz, 16bit, mono, LE)
00347 
00348             language:  language setting (ja, en, zh, ko)
00349 
00350             engine:  speech recognition engine setting (nict or google)
00351 
00352                     nict: supports ja, en, zh, ko
00353 
00354                     google: supports ja, en
00355         """
00356         if self._sr_enable == True:
00357             msg = SpeechRecognitionRequest()
00358             msg.header.user = rospy.get_name()
00359             msg.header.request_id = str(self._sr_req_id)
00360             msg.header.language = language
00361             msg.header.engine = engine
00362             msg.data = data
00363             self._pub_sr.publish(msg)
00364             self._sr_req_id += 1
00365             self._sr_queue_num += 1
00366             if self._sr_queue_num > 1:
00367                 self._sr_queue_disp = True
00368             if self._sr_queue_disp == True:
00369                 rospy.loginfo('speech recognition in progress: %d' %self._sr_queue_num)
00370         else:
00371             rospy.loginfo('sr interface is disabled.')
00372