00001 #include "rospeex_if/rospeex.h"
00002 
00003 #include <stdio.h>
00004 #include <stdint.h>
00005 #include <sstream>
00006 
00007 #include <sys/stat.h>
00008 
00009 #include <ros/ros.h>
00010 #include <ros/console.h>
00011 #include <ros/package.h>
00012 
00013 #include <boost/format.hpp>
00014 #include <boost/filesystem.hpp>
00015 
00016 
00017 #include "rospeex_msgs/SpeechSynthesisResponse.h"
00018 #include "rospeex_msgs/SpeechSynthesisRequest.h"
00019 #include "rospeex_msgs/SpeechSynthesisHeader.h"
00020 #include "rospeex_msgs/SpeechSynthesisState.h"
00021 #include "rospeex_msgs/SpeechRecognitionResponse.h"
00022 #include "rospeex_msgs/SpeechRecognitionRequest.h"
00023 #include "rospeex_msgs/SignalProcessingResponse.h"
00024 
00025 using namespace rospeex;
00026 using namespace rospeex_msgs;
00027 
00028 #define SR_RESPONSE_TOPIC_NAME "sr_res"
00029 #define SS_RESPONSE_TOPIC_NAME "ss_res"
00030 #define SPI_RESPONSE_TOPIC_NAME "spi_res"
00031 #define SR_REQUEST_TOPIC_NAME "sr_req"
00032 #define SS_REQUEST_TOPIC_NAME "ss_req"
00033 #define SS_STATE_TOPIC_NAME "ss_state"
00034 
00035 
00036 class Interface::Impl
00037 {
00038 public:
00039         Impl()
00040                 : sr_func_()
00041                 , ss_func_()
00042                 , ss_request_id_(0)
00043                 , sr_request_id_(0)
00044                 , sr_queue_num_(0)
00045                 , sr_pub_()
00046                 , ss_pub_()
00047                 , ss_state_pub_()
00048                 , sr_sub_()
00049                 , ss_sub_()
00050                 , spi_sub_()
00051                 , ss_enable_()
00052                 , sr_enable_()
00053                 , spi_enable_()
00054         {}
00055 
00060         void SPIResponse( const SignalProcessingResponse::ConstPtr& response );
00061 
00066         void SRResponse( const SpeechRecognitionResponse::ConstPtr& response );
00067 
00072         void SSResponse( const SpeechSynthesisResponse::ConstPtr& response );
00073 
00079         void playPackageSound( const std::string& package_name );
00080 
00085         void playSound( const std::string& file_name );
00086 
00091         void sendSpeechSynthesisState( bool state );
00092 
00093         uint32_t ss_request_id_;                                        
00094         uint32_t sr_request_id_;                                        
00095 
00096         uint32_t sr_queue_num_;                                 
00097 
00098         SpeechRecognizeCallback sr_func_;                       
00099         SpeechSynthesisCallback ss_func_;                       
00100 
00101         ros::Publisher sr_pub_;                                         
00102         ros::Publisher ss_pub_;                                         
00103         ros::Publisher ss_state_pub_;                           
00104 
00105         ros::Subscriber sr_sub_;                                        
00106         ros::Subscriber ss_sub_;                                        
00107         ros::Subscriber spi_sub_;                                       
00108 
00109         std::string spi_engine_;                                        
00110         std::string spi_language_;                                      
00111 
00112         bool ss_enable_, sr_enable_, spi_enable_;
00113 };
00114 
00115 
00116 void Interface::Impl::SPIResponse( const SignalProcessingResponse::ConstPtr& response )
00117 {
00118         if ( sr_func_.empty() == false ) {
00119                 playPackageSound("accept.wav");
00120 
00121                 
00122                 ros::NodeHandle n;
00123                 std::stringstream ss;
00124                 ss << sr_request_id_;
00125                 SpeechRecognitionRequest request;
00126                 request.header.language = spi_language_;
00127                 request.header.engine = spi_engine_;
00128                 request.header.user = n.getNamespace();
00129                 request.header.request_id = ss.str();
00130                 request.data = response->data;
00131                 sr_pub_.publish(request);
00132                 sr_request_id_++;
00133 
00134                 sr_queue_num_++;
00135                 ROS_INFO("speech recognition in progress: %d", sr_queue_num_);
00136 
00137         } else {
00138                 
00139         }
00140 }
00141 
00142 
00143 void Interface::Impl::SRResponse( const SpeechRecognitionResponse::ConstPtr& response )
00144 {
00145 
00146         sr_queue_num_--;
00147         ROS_INFO("speech recognition in progress: %d", sr_queue_num_);
00148 
00149         if(response->message == ""){
00150                 playPackageSound("nomessage.wav");
00151         } else if ( spi_enable_ != true ) {
00152                 playPackageSound("accept.wav");
00153         }
00154         ros::NodeHandle n;
00155         if ( sr_func_.empty() == false && response->header.user == n.getNamespace() ) {
00156                 sr_func_( response->message );
00157 
00158         } else {
00159                 
00160         }
00161 }
00162 
00163 
00164 void Interface::Impl::SSResponse( const SpeechSynthesisResponse::ConstPtr& response )
00165 {
00166         ros::NodeHandle n;
00167         if ( ss_func_.empty() == false && response->header.user == n.getNamespace() ) {
00168                 ss_func_( response->data );
00169 
00170         } else {
00171                 
00172         }
00173 }
00174 
00175 
00176 Interface::Interface()
00177         : impl_( new Impl() )
00178 {}
00179 
00180 
00181 Interface::~Interface()
00182 {}
00183 
00184 
00185 bool Interface::init( bool ss_enable, bool sr_enable, bool spi_enable )
00186 {
00187         impl_ = boost::shared_ptr<Impl>(new Impl());
00188 
00189         ros::NodeHandle n;
00190         impl_->ss_enable_ = ss_enable;
00191         impl_->sr_enable_ = sr_enable;
00192         impl_->spi_enable_ = spi_enable;
00193 
00194         
00195         if ( sr_enable == true ) {
00196                 ROS_INFO("enable speeech recognition.");
00197                 impl_->sr_pub_ = n.advertise<SpeechRecognitionRequest>(SR_REQUEST_TOPIC_NAME, 10);
00198                 boost::function<void(const SpeechRecognitionResponse::ConstPtr&)> sr_func = boost::bind(&Impl::SRResponse, this->impl_, _1);
00199                 impl_->sr_sub_ = n.subscribe(SR_RESPONSE_TOPIC_NAME, 10, sr_func);
00200         }
00201 
00202         
00203         if ( ss_enable == true ) {
00204                 ROS_INFO("enable speech synthsis.");
00205                 impl_->ss_pub_ = n.advertise<SpeechSynthesisRequest>(SS_REQUEST_TOPIC_NAME, 10);
00206                 boost::function<void(const SpeechSynthesisResponse::ConstPtr&)> ss_func = boost::bind(&Impl::SSResponse, this->impl_, _1);
00207                 impl_->ss_sub_ = n.subscribe(SS_RESPONSE_TOPIC_NAME, 10, ss_func);
00208         }
00209 
00210         
00211         if ( spi_enable == true ) {
00212                 ROS_INFO("enable signal processing interface.");
00213                 boost::function<void(const SignalProcessingResponse::ConstPtr&)> spi_func = boost::bind(&Impl::SPIResponse, this->impl_, _1);
00214                 impl_->ss_state_pub_ = n.advertise<SpeechSynthesisState>(SS_STATE_TOPIC_NAME, 10);
00215                 impl_->spi_sub_ = n.subscribe(SPI_RESPONSE_TOPIC_NAME, 10, spi_func);
00216         }
00217 
00218         return true;
00219 }
00220 
00221 
00222 void Interface::Impl::playPackageSound( const std::string& sound_file )
00223 {
00224         
00225         sendSpeechSynthesisState(true);
00226 
00227         
00228         std::string package_path = ros::package::getPath("rospeex_if");
00229         boost::filesystem::path sound_path(package_path + "/sound/" + sound_file);
00230         playSound(sound_path.string());
00231 
00232         
00233         sendSpeechSynthesisState(false);
00234 }
00235 
00236 
00237 void Interface::Impl::sendSpeechSynthesisState( bool state )
00238 {
00239         
00240         if ( ss_state_pub_ ) {
00241                 ros::NodeHandle n;
00242                 SpeechSynthesisState ss_state;
00243                 ss_state.header.language = "";
00244                 ss_state.header.voice_font = "";
00245                 ss_state.header.engine = "";
00246                 ss_state.header.user = n.getNamespace();
00247                 ss_state.header.request_id = "0";
00248                 ss_state.header.request_type = SpeechSynthesisHeader::REQUEST_TYPE_SAY;
00249                 ss_state.play_state = state;
00250                 ss_state_pub_.publish(ss_state);
00251         }
00252 }
00253 
00254 
00255 void Interface::Impl::playSound( const std::string& file_path )
00256 {
00257         
00258         boost::system::error_code error;
00259         bool is_exist = boost::filesystem::exists(file_path, error);
00260 
00261         if ( error ) {
00262                 ROS_ERROR("file[%s] open error. %s",file_path.c_str() ,error.message().c_str());
00263                 return;
00264         }
00265 
00266         if ( !is_exist ) {
00267                 ROS_ERROR("[%s] is not exist.", file_path.c_str());
00268                 return;
00269         }
00270 
00271         
00272         std::string command = (boost::format("aplay -q %s") % file_path).str();
00273         int ret = system(command.c_str());
00274         if(ret != 0){
00275                 ROS_ERROR("playing sound error.");
00276         }
00277 }
00278 
00279 
00280 void Interface::say( const std::string& msg,
00281                                 const std::string& language,
00282                                 const std::string& engine,
00283                                 const std::string& voice_font)
00284 {
00285         if ( impl_->ss_pub_ ) {
00286                 ros::NodeHandle n;
00287                 std::stringstream ss;
00288                 ss << impl_->ss_request_id_;
00289                 SpeechSynthesisRequest request;
00290                 request.header.language = language;
00291                 request.header.voice_font = voice_font;
00292                 request.header.engine = engine;
00293                 request.header.user = n.getNamespace();
00294                 request.header.request_id = ss.str();
00295                 request.header.request_type = SpeechSynthesisHeader::REQUEST_TYPE_SAY;
00296                 request.message = msg;
00297                 impl_->ss_pub_.publish(request);
00298                 impl_->ss_request_id_++;
00299 
00300         } else {
00301                 ROS_INFO("ss interface is disabled.");
00302         }
00303 }
00304 
00305 
00306 void Interface::tts( const std::string& msg,
00307                                 const std::string& file,
00308                                 const std::string& language,
00309                                 const std::string& engine,
00310                                 const std::string& voice_font )
00311 {
00312         if ( impl_->ss_pub_ ) {
00313                 ros::NodeHandle n;
00314                 std::stringstream ss;
00315                 ss << impl_->ss_request_id_;
00316                 SpeechSynthesisRequest request;
00317                 request.header.language = language;
00318                 request.header.voice_font = voice_font;
00319                 request.header.engine = engine;
00320                 request.header.user = n.getNamespace();
00321                 request.header.request_id = ss.str();
00322                 request.header.request_type = SpeechSynthesisHeader::REQUEST_TYPE_TTS;
00323                 request.message = msg;
00324                 impl_->ss_pub_.publish(request);
00325                 impl_->ss_request_id_++;
00326 
00327         } else {
00328                 ROS_INFO("ss interface is disabled.");
00329         }
00330 }
00331 
00332 
00333 void Interface::recognize( const std::string& data,
00334                                                 const std::string& language,
00335                                                 const std::string& engine )
00336 {
00337         if ( impl_->sr_pub_ ) {
00338                 ros::NodeHandle n;
00339                 std::stringstream ss;
00340                 ss << impl_->sr_request_id_;
00341                 SpeechRecognitionRequest request;
00342                 request.header.language = language;
00343                 request.header.engine = engine;
00344                 request.header.user = n.getNamespace();
00345                 request.header.request_id = ss.str();
00346                 request.data = data;
00347                 impl_->sr_pub_.publish(request);
00348                 impl_->sr_request_id_++;
00349                 impl_->sr_queue_num_++;
00350                 ROS_INFO("speech recognition in progress: %d", impl_->sr_queue_num_);
00351 
00352         } else {
00353                 ROS_INFO("sr interface is disabled.");
00354         }
00355 }
00356 
00357 
00358 void Interface::registerSRResponse( const SpeechRecognizeCallback& callback )
00359 {
00360         impl_->sr_func_ = callback;
00361 }
00362 
00363 
00364 void Interface::registerSRResponse( void(*fp)(const std::string& msg) )
00365 {
00366         impl_->sr_func_ = SpeechRecognizeCallback(fp);
00367 }
00368 
00369 
00370 void Interface::registerSSResponse( const SpeechSynthesisCallback& callback )
00371 {
00372         impl_->ss_func_ = callback;
00373 }
00374 
00375 
00376 void Interface::registerSSResponse( void(*fp)(const std::string& data) )
00377 {
00378         impl_->ss_func_ = SpeechSynthesisCallback(fp);
00379 }
00380 
00381 
00382 void Interface::setSPIConfig( const std::string& language, const std::string& engine)
00383 {
00384         impl_->spi_language_ = language;
00385         impl_->spi_engine_ = engine;
00386 }
00387 
00388 
00389 void Interface::playSound( const std::string& file_name )
00390 {
00391         impl_->playSound(file_name);
00392 }