speech_database: speech_database.cpp Source File

Go to the documentation of this file.
00001 /*      Node that performs text-to-speech conversion by using Google's
00002         speech synthesiser.
00003         Alexander Reiter, Institute for Robotics (ROBIN), JKU Linz
00004         November-December 2013
00005         
00006         This node sends request to the soundplay_node to play audio files
00007         that contain synthesised speech. It checks with a dictonary if the 
00008         to-be-synthesised text has been handled before. If so, the existing
00009         audio file is used, otherwise, synthesised speech is downloaded from
00010         Google. The filename is a 50 character string and is chosen randomly.
00011         To determine whether an audio file that corresponds to the
00012         text already exists, the node accesses a json dictionary that 
00013         contains pre-downloaded files with synthesised speech. It holds 
00014         strings as keys and filenames as values.
00015         
00016         
00017         PARAMETERS:
00018         audioPath: path to folder with audio files (does not need to end with '/')
00019         jsonPath: path to json file with dictionary
00020         language: language code for speech synthesis (en, de, etc.)
00021         
00022         
00023         References:
00024         Google TTS API
00025         http://elinux.org/RPi_Text_to_Speech_%28Speech_Synthesis%29#Google_Text_to_Speech
00026         Jansson API
00027         https://jansson.readthedocs.org/en/2.5/apiref.html
00028 */
00029 
00030 #include <iostream>
00031 #include <sstream>
00032 #include <string>
00033 #include "ros/ros.h"
00034 #include "std_msgs/String.h"
00035 #include "sound_play/SoundRequest.h"
00036 #include <ctype.h>
00037 #include <stdio.h>
00038 #include <curl/curl.h>
00039 #include <jansson.h>
00040 #include <time.h> 
00041 
00042 using namespace std;
00043 
00044 void stringCallback(const std_msgs::String msg);
00045 void createFilename();
00046 
00047 ros::Publisher soundPub;        // publisher for sound_play requests
00048 
00049 // ROS parameters variables
00050 string audioPath;       // path to audio folder
00051 string jsonPath;        // path to json file with audio references
00052 string language;        // language code
00053 bool mplayer;           // using mplayer for audio output
00054 
00055 json_t* root;   // root node of json structure
00056 
00057 // curl variables
00058 CURL *curl;
00059 CURLcode res;
00060 char filename[51];
00061  
00062 int main(int argc, char** argv) {
00063   
00064   // init ROS
00065   ros::init(argc, argv, "speech_database");
00066   ros::NodeHandle n("~");
00067   ros::spinOnce();
00068   ros::Subscriber sub;
00069   sub = n.subscribe("/speech", 10, stringCallback);
00070   soundPub = n.advertise<sound_play::SoundRequest>("/robotsound", 10);
00071   
00072   ros::Rate loop_rate(10);
00073   if(!n.getParam("audioPath", audioPath)) {
00074           ROS_ERROR("Parameter audioPath not found");
00075           ros::shutdown();
00076   }
00077   // check if audioPath ends with '/'
00078   if(audioPath.find_last_of("/") != audioPath.length()-1) {
00079     audioPath.append("/");
00080   }
00081   if(!n.getParam("language", language)) {
00082           ROS_ERROR("Parameter language not found");
00083           ros::shutdown();
00084   }
00085   if(!n.getParam("jsonPath", jsonPath)) {
00086           ROS_ERROR("Parameter jsonPath not found");
00087           ros::shutdown();
00088   }
00089   if(!n.getParam("mplayer", mplayer)) {
00090           ROS_ERROR("Parameter mplayer not found");
00091           ros::shutdown();
00092   }
00093   
00094   // prepare rand
00095   srand(time(NULL));
00096   srand(time(NULL) + rand());
00097   
00098   // init json file
00099   json_error_t err;
00100   root = json_load_file(jsonPath.c_str(), 0, &err);
00101   if(!root) {
00102           ROS_DEBUG("json file %s does not exist, a new file will be created", jsonPath.c_str());
00103           root = json_object();
00104   } else {
00105           ROS_DEBUG("json file %s successfully opened", jsonPath.c_str());
00106   }
00107   
00108   // init curl
00109   curl = curl_easy_init();
00110  
00111   while (curl && ros::ok()) {
00112     ros::spinOnce();
00113     loop_rate.sleep();
00114   }
00115   
00116   curl_easy_cleanup(curl);
00117  
00118   return 0;
00119 }
00120 
00121 /* Callback function that checks if the current json structure already
00122  * contains an entry that corresponds to the string from the message. If
00123  * no entry exists, it a new a new speech file is downloaded and stored
00124  * using curl and a new json entry is created.
00125  * The path of the audio file that corresponds to the string in message
00126  * is stored in a SoundRequest message and advertised on the robotsound
00127  * topic.
00128  * Since Google TTS only supports string with a length of up to 100 
00129  * characters, a warning is thrown when a string is to be synthesised
00130  * the exceeds the maximum length.
00131  * 
00132  * INPUT:       const std_msgs::String msg: message with string to be translated
00133  * OUTPUT:      none
00134  */
00135 void stringCallback(const std_msgs::String msg) {
00136         string data = msg.data;
00137         if(data.length() >  100) {
00138                 ROS_WARN("The exceeds the maximum length of 100 characters and will not be synthesised.");
00139                 return;
00140         }
00141         
00142         // convert string to lowercase
00143         for(int i = 0; i < data.length(); i++) {
00144                 data[i] = (char) tolower((int) data[i]);
00145         }
00146         
00147         // check if entry for this string exists in json file
00148         json_t* entry = json_object_get(root, data.c_str());    // holds json_string object with audio file path
00149         if(!entry) {
00150                 ROS_DEBUG("Entry does not exist, downloading new file");
00151                 
00152                 // prepare URL for curl
00153                 stringstream urlStream;
00154                 urlStream << "http://translate.google.com/translate_tts?tl=";
00155                 urlStream << language << "&q=";
00156                 char* str = curl_easy_escape(curl, data.c_str(), 0);
00157                 urlStream << str;
00158                 curl_free(str);
00159                 
00160                 stringstream filenameStream;
00161                 FILE* file = fopen("/dev/null", "r");   // some existing file
00162                 while(file) {   // generate new filenames until corresponding file does not exist
00163                         curl_easy_setopt(curl, CURLOPT_URL, urlStream.str().c_str());
00164                         filenameStream.str("");
00165                         filenameStream.clear();
00166                         filenameStream << audioPath;
00167                         createFilename();
00168                         filenameStream << filename;
00169                         file = fopen(filenameStream.str().c_str(), "r");
00170                 }
00171                 // file does not exist, create it
00172                 file = fopen(filenameStream.str().c_str(), "w");
00173                 if(!file) {
00174                         ROS_ERROR("Could not open file %s", filenameStream.str().c_str());
00175                         fclose(file);
00176                         return;
00177                 }
00178                 
00179                 curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) ;
00180                 res = curl_easy_perform(curl);
00181                 fclose(file);
00182                 
00183                 // check for curl errors
00184                 if(res == CURLE_OK) {
00185                         ROS_DEBUG("File downloaded and stored in %s", filenameStream.str().c_str());
00186                 } else {
00187                         ROS_ERROR("Download failed, curl_easy_perform() error: %s", curl_easy_strerror(res));
00188                         return;
00189                 }
00190                 
00191                 // add json object
00192                 entry = json_string(filename);
00193                 json_object_set(root, data.c_str(), entry);
00194                 
00195                 if(json_dump_file(root, jsonPath.c_str(), 0) == 0) {
00196                         ROS_DEBUG("json file successfully written to %s", jsonPath.c_str());
00197                 } else {
00198                         ROS_ERROR("json file could not be written, all changes are lost");
00199                 }
00200         }
00201         
00202         // create stop message
00203         sound_play::SoundRequest req;
00204         req.sound = req.ALL;
00205         req.command = req.PLAY_STOP;
00206         if(!mplayer) {
00207                 soundPub.publish(req);
00208         }
00209         
00210         // create speech message
00211         req.sound = req.PLAY_FILE;
00212         req.command = req.PLAY_ONCE;
00213         string path_to_file = audioPath;
00214         path_to_file.append(json_string_value(entry));
00215         req.arg = path_to_file;
00216         
00217         if(mplayer) {
00218                 string cmd_str = "mplayer ";
00219                 cmd_str = cmd_str + path_to_file;
00220                 system(cmd_str.c_str());
00221         } else {
00222                 soundPub.publish(req);
00223         }
00224 }
00225 
00226 /* Function that writes a random sequence of (human readable) characters
00227  * to the global filename c string (0 terminated).
00228  * 
00229  * INPUT:       none
00230  * OUTPUT:      none
00231  */
00232 void createFilename() {
00233         int i;
00234         char ch;
00235         for(i = 0; i < 50; i++) {
00236                 int ran = rand() % 62;
00237                 ch = (char) ran;
00238                 if(ch < 10) {
00239                         ch = (char) ch + '0'; // numbers
00240                 } else if(ch < 36) {
00241                         ch = (ch - 10) + 'A'; // capital letters
00242                 } else {
00243                         ch = (ch - 36) + 'a'; // small letters
00244                 }
00245                 filename[i] = ch;
00246         }
00247         filename[50] = 0;
00248 }