00001 #include "voice_recognition_alg.h"
00002
00003 static const arg_module_t online_module = {NULL, "General options",
00004 {
00005 {"save-lattices", ARG_BOOL, "false", 0, "Enables saving lattices"},
00006 {"lattice-directory", ARG_DIR, NULL, 0, "Directory where lattices will be saved"},
00007 {"print-time", ARG_BOOL, "false", 0, "Print realtime factor"},
00008 {"print-score", ARG_BOOL, "false", 0, "Print hypothesis score"},
00009 {"verbosity", ARG_INT, "0", 0, "Set verbosity level"},
00010 {"statistics-verbosity", ARG_INT, "0", 0, "Set statistics verbosity level"},
00011 {"print-default", ARG_BOOL, "false", 0, "Print default config file"},
00012 {"print-config", ARG_BOOL, "false", 0, "Print final config file"},
00013 {"preprocess-config", ARG_STRING, NULL, 0, "Preprocess config file"},
00014 {NULL, ARG_END_MODULE, NULL, 0, NULL}
00015 }
00016 };
00017
00018 static const arg_shortcut_t shortcuts[] = {
00019 {"v", "verbosity"},
00020 {"s", "statistics-verbosity"},
00021 {"p", "preprocess-config"},
00022 {"t", "print-time"},
00023 {"d", "print-default"},
00024 {"l", "decoder.grammar-scale-factor"},
00025 {"w", "decoder.word-insertion-penalty"},
00026 {NULL, NULL}
00027 };
00028
00029 VoiceRecognitionAlgorithm::VoiceRecognitionAlgorithm(void)
00030 {
00031 }
00032
00033 VoiceRecognitionAlgorithm::~VoiceRecognitionAlgorithm(void)
00034 {
00035 }
00036
00037 void VoiceRecognitionAlgorithm::config_update(Config& new_cfg, uint32_t level)
00038 {
00039 this->lock();
00040
00041
00042 this->config_=new_cfg;
00043
00044 this->unlock();
00045 }
00046
00047
00048
00049 void VoiceRecognitionAlgorithm::initialize()
00050 {
00051 this->answerCode_=0;
00052
00053 std::string conf1;
00054 std::string conf2;
00055
00056 conf1=filePath_ + "/hcu_notask.cnf";
00057 conf2=filePath_ + "/conf.feat";
00058
00059 char *argv[] = {"iatros", "-c", conf1.c_str(), "-p", conf2.c_str(), NULL};
00060 int argc = sizeof(argv) / sizeof(char*) - 1;
00061
00062 arg_error_t aerr = ARG_OK;
00063
00064 args = args_create();
00065 args_set_summary(args, "Online speech text recognizer");
00066 args_set_doc(args, "For more info see http://prhlt.iti.es");
00067 args_set_version(args, IATROS_SPEECH_PROJECT_STRING"\n"IATROS_SPEECH_BUILD_INFO"\n\n"
00068 IATROS_PROJECT_STRING"\n"IATROS_BUILD_INFO);
00069 args_set_bug_report(args, "Report bugs to "IATROS_SPEECH_PROJECT_BUGREPORT".");
00070 args_add_module(args, &online_module);
00071 args_add_module(args, &decoder_module);
00072 args_add_module(args, &lattice_module);
00073 args_add_shortcuts(args, shortcuts);
00074
00075 args_parse_command_line(args, argc, argv);
00076
00077
00078 if (args_get_bool(args, "print-default", &aerr))
00079 {
00080 args_write_default_config_file(args, stdout);
00081 args_delete(args);
00082
00083 }
00084
00085 if (args_get_bool(args, "print-config", &aerr))
00086 {
00087 args_dump(args, stderr);
00088 }
00089
00090
00091 SET_STATISTICS_VERBOSITY(args_get_int(args, "statistics-verbosity", &aerr));
00092
00093 INIT_TRACE(args_get_int(args, "verbosity", &aerr));
00094
00095 const char *prep_conf_filename = args_get_string(args, "preprocess-config", &aerr);
00096 REQUIRE(aerr == ARG_OK && prep_conf_filename != NULL, "Preprocess config file is missing");
00097
00098
00099 decoder = decoder_create_from_args(args);
00100
00101 acquisitor = acquisitor_create_from_file(prep_conf_filename);
00102
00103 }
00104
00105 void VoiceRecognitionAlgorithm::finalize()
00106 {
00107 decoder_delete(decoder);
00108 acquisitor_delete(acquisitor);
00109 args_delete(args);
00110 }
00111
00112 void VoiceRecognitionAlgorithm::setFilePath(std::string path)
00113 {
00114 filePath_=path;
00115 }
00116
00117 std::string VoiceRecognitionAlgorithm::iteration()
00118 {
00119
00120
00121 std::string generatedAnswer_;
00122 search_t *search = search_create(decoder);
00123 lattice_t *lattice = lattice_create_from_args(args, decoder);
00124
00125 clock_t tim = clock();
00126 decode_online(search, acquisitor, lattice);
00127
00128 clock_t tim2 = clock();
00129 printf("time %f\n", ((float) ((tim2 - tim) / CLOCKS_PER_SEC) / search->n_frames) / 0.01);
00130 fflush(stdout);
00131
00132 generatedAnswer_ = outputs(args, lattice);
00133
00134 lattice_delete(lattice);
00135 search_delete(search);
00136
00137 acquisitor_clear(acquisitor);
00138
00139 return generatedAnswer_;
00140 }
00141
00142 std::string VoiceRecognitionAlgorithm::answer_type(std::string answerfn)
00143 {
00144 FILE *f;
00145 char aux[2048];
00146 int n, r, i;
00147
00148 answerfn = filePath_ + "/" + answerfn;
00149
00150 std::cout <<"answer_type: "<< answerfn << std::endl;
00151
00152 f=fopen(answerfn.c_str(),"r");
00153 if (f==NULL)
00154 return "Se me han frito los circuitos!!!";
00155
00156 n=0;
00157 while (fgets(aux,2047,f)!=NULL)
00158 n++;
00159
00160 fclose(f);
00161
00162 r=rand()%n;
00163
00164 f=fopen(answerfn.c_str(),"r");
00165 for (i=0;i<r;i++)
00166 fgets(aux,2047,f);
00167 fclose(f);
00168
00169 return std::string(aux);
00170 }
00171
00172 std::string VoiceRecognitionAlgorithm::answer_type_location(std::string answerfn, std::string loc)
00173 {
00174 FILE *f;
00175 char aux[2048];
00176 char *auxloc;
00177 int n, r, i;
00178
00179 answerfn = filePath_ + "/" + answerfn;
00180
00181 std::cout <<"answer_type_location: "<< answerfn << std::endl;
00182
00183
00184 auxloc=(char *) malloc((strlen(loc.c_str())+2)*sizeof(char));
00185 strcpy(auxloc,loc.c_str());
00186 auxloc[strlen(auxloc)+1]='\0';
00187 auxloc[strlen(auxloc)]='#';
00188
00189 f=fopen(answerfn.c_str(),"r");
00190 if (f==NULL)
00191 return "Se me han frito los circuitos!!!";
00192
00193 n=0;
00194 while (fgets(aux,2047,f)!=NULL)
00195 if (!strncmp(aux,auxloc,strlen(auxloc)))
00196 n++;
00197
00198 fclose(f);
00199
00200 r=rand()%n+1;
00201
00202 f=fopen(answerfn.c_str(),"r");
00203 i=0;
00204 while (i<r)
00205 {
00206 fgets(aux,2047,f);
00207 if (!strncmp(aux,auxloc,strlen(auxloc)))
00208 i++;
00209 }
00210 fclose(f);
00211
00212 return std::string(aux+strlen(loc.c_str())+1);
00213 }
00214
00215 std::string VoiceRecognitionAlgorithm::generate_answer(double *probs, char **sentences_str, int n)
00216 {
00217 int i, j, k, classs, nb=0, nc=0;
00218 int classv[NCLASSES];
00219 double classp[NCLASSES];
00220 char **build;
00221 char **classrooms;
00222 int *buildc, *classroomsc;
00223 char *auxloc;
00224 int loccode;
00225 std::string auxstring;
00226 this->answerCode_=0;
00227
00228 std::string currentAnswer;
00229
00230 build=(char **) malloc(n*sizeof(char *));
00231 classrooms=(char **) malloc(n*sizeof(char *));
00232
00233 for (i=0;i<n;i++)
00234 {
00235 build[i]=NULL; classrooms[i]=NULL;
00236 }
00237
00238 for (i=0;i<NCLASSES;i++)
00239 {
00240 classv[i]=0; classp[i]=0.0;
00241 }
00242
00243 for (i=0;i<n;i++)
00244 {
00245
00246 if (sentences_str[i]!=NULL)
00247 {
00248 classs=NONE;
00249 for (j=0;j<strlen(sentences_str[i]);j++)
00250 {
00251 if (!strncmp("padre",sentences_str[i]+j,5)) classs=AUTORES;
00252 if (!strncmp("madre",sentences_str[i]+j,5)) classs=AUTORES;
00253 if (!strncmp("dise~n",sentences_str[i]+j,6)) classs=AUTORES;
00254 if (!strncmp("creo",sentences_str[i]+j,4)) classs=AUTORES;
00255 if (!strncmp("creado",sentences_str[i]+j,6)) classs=AUTORES;
00256 if (!strncmp("hizo",sentences_str[i]+j,4)) classs=AUTORES;
00257 if (!strncmp("hecho",sentences_str[i]+j,5)) classs=AUTORES;
00258 if (!strncmp("constru",sentences_str[i]+j,7)) classs=AUTORES;
00259
00260 if (!strncmp("hacer",sentences_str[i]+j,5)) classs=HABILIDADES;
00261 if (!strncmp("dedicas",sentences_str[i]+j,7)) classs=HABILIDADES;
00262
00263 if (!strncmp("nombre",sentences_str[i]+j,6)) classs=IDENTIFICACION;
00264 if (!strncmp("llamas",sentences_str[i]+j,6)) classs=IDENTIFICACION;
00265 if (!strncmp("quien eres",sentences_str[i]+j,10)) classs=IDENTIFICACION;
00266
00267 if (!strncmp("donde estamos",sentences_str[i]+j,13)) classs=LUGAR;
00268
00269 if (!strncmp("proyecto",sentences_str[i]+j,8)) classs=OTROS_PROYECTOS;
00270 if (!strncmp("investiga",sentences_str[i]+j,9)) classs=OTROS_PROYECTOS;
00271 if (!strncmp("realiza",sentences_str[i]+j,7)) classs=OTROS_PROYECTOS;
00272 if (!strncmp("cosas",sentences_str[i]+j,5)) classs=OTROS_PROYECTOS;
00273
00274 if (!strncmp("familia",sentences_str[i]+j,7)) classs=OTROS_ROBOTS;
00275 if (!strncmp("herman",sentences_str[i]+j,6)) classs=OTROS_ROBOTS;
00276 if (!strncmp("otros",sentences_str[i]+j,5)) classs=OTROS_ROBOTS;
00277 if (!strncmp("otras",sentences_str[i]+j,5)) classs=OTROS_ROBOTS;
00278 if (!strncmp("conoces",sentences_str[i]+j,7)) classs=OTROS_ROBOTS;
00279
00280 if (!strncmp("vives",sentences_str[i]+j,5)) classs=ZONA;
00281 if (!strncmp("trabajas",sentences_str[i]+j,8)) classs=ZONA;
00282 if (!strncmp("habitas",sentences_str[i]+j,7)) classs=ZONA;
00283
00284 if (!strncmp("EDIF#",sentences_str[i]+j,5))
00285 {
00286 build[nb]=(char *) malloc((strlen(sentences_str[i]+j+5)+1)*sizeof(char));
00287 strcpy(build[nb],sentences_str[i]+j+5);
00288 k=0;
00289 while (build[nb][k]!=' ' && build[nb][k]!='\0')
00290 k++;
00291 build[nb][k]='\0';
00292 nb++;
00293 classs=EDIF;
00294 }
00295 if (!strncmp("AUL#",sentences_str[i]+j,4))
00296 {
00297 classrooms[nc]=(char *) malloc((strlen(sentences_str[i]+j+4)+1)*sizeof(char));
00298 strcpy(classrooms[nc],sentences_str[i]+j+4);
00299 k=0;
00300 while (classrooms[nc][k]!=' ' && classrooms[nc][k]!='\0')
00301 k++;
00302 classrooms[nc][k]='\0';
00303 nc++;
00304 classs=AUL;
00305 }
00306 }
00307 classv[classs]++;
00308 classp[classs]+=probs[i];
00309 }
00310 }
00311
00312
00313 for (j=0;j<NCLASSES;j++)
00314 if (classv[j]>0)
00315 classp[j]=classp[j]/classv[j];
00316
00317 classs=0;
00318 for (j=1;j<NCLASSES;j++)
00319 {
00320 if (classp[j]!=0.0 && (classp[classs]<classp[j] || classp[classs]==0.0) )
00321 {
00322 classs=j;
00323 }
00324 }
00325
00326 switch(classs)
00327 {
00328 case NONE:
00329 this->answerCode_ = 10;
00330 return answer_type("answers/NONE.txt");
00331 break;
00332 case AUTORES:
00333 this->answerCode_ = 11;
00334 return answer_type("answers/AUTORES.txt");
00335 break;
00336 case HABILIDADES:
00337 this->answerCode_ = 12;
00338 return answer_type("answers/HABILIDADES.txt");
00339 break;
00340 case IDENTIFICACION:
00341 this->answerCode_ = 13;
00342 return answer_type("answers/IDENTIFICACION.txt");
00343 break;
00344 case LUGAR:
00345 this->answerCode_ = 14;
00346 return answer_type("answers/LUGAR.txt");
00347 break;
00348 case OTROS_PROYECTOS:
00349 this->answerCode_ = 15;
00350 return answer_type("answers/OTROS_PROYECTOS.txt");
00351 break;
00352 case OTROS_ROBOTS:
00353 this->answerCode_ = 16;
00354 return answer_type("answers/OTROS_ROBOTS.txt");
00355 break;
00356 case ZONA:
00357 this->answerCode_ = 17;
00358 return answer_type("answers/ZONA.txt");
00359 break;
00360 case EDIF:
00361
00362 buildc=(int *) malloc(nb*sizeof(int));
00363 for (j=0;j<nb;j++)
00364 {
00365 buildc[j]=0;
00366 for (k=j;k<nb;k++)
00367 if (!strcmp(build[j],build[k]))
00368 buildc[j]++;
00369 }
00370 k=0;
00371 for (j=1;j<nb;j++)
00372 if (buildc[j]>buildc[k])
00373 k=j;
00374 auxstring = answer_type_location("answers/EDIF.txt", build[k]).c_str();
00375 auxloc = auxstring.c_str();
00376 currentAnswer.clear();
00377 j=0;
00378 while (auxloc[j]!='\0' && auxloc[j]!='#')
00379 {
00380
00381 currentAnswer+=auxloc[j];
00382 j++;
00383 }
00384 if (auxloc[j]=='#')
00385 loccode=atoi(auxloc+j+1);
00386 else loccode=0;
00387
00388
00389 this->answerCode_=loccode;
00390 std::cout << "!!! mapCode: " << this->answerCode_ << std::endl;
00391 free(buildc);
00392 return currentAnswer;
00393 break;
00394 case AUL:
00395
00396 classroomsc=(int *) malloc(nc*sizeof(int));
00397 for (j=0;j<nc;j++)
00398 {
00399 classroomsc[j]=0;
00400 for (k=j;k<nc;k++)
00401 if (!strcmp(classrooms[j],classrooms[k]))
00402 classroomsc[j]++;
00403 }
00404 k=0;
00405 for (j=1;j<nc;j++)
00406 if (classroomsc[j]>classroomsc[k])
00407 k=j;
00408 auxstring=answer_type_location("answers/AUL.txt", classrooms[k]).c_str();
00409 auxloc = auxstring.c_str();
00410 currentAnswer.clear();
00411 j=0;
00412 while (auxloc[j]!='\0' && auxloc[j]!='#')
00413 {
00414
00415 currentAnswer+=auxloc[j];
00416 j++;
00417 }
00418 if (auxloc[j]=='#')
00419 loccode=atoi(auxloc+j+1);
00420 else loccode=0;
00421
00422
00423 this->answerCode_=loccode;
00424 free(classroomsc);
00425 return currentAnswer;
00426 break;
00427 default:
00428 this->answerCode_ = 1000;
00429 return answer_type("answers/NONE.txt");
00430 }
00431 }
00432
00433 std::string VoiceRecognitionAlgorithm::outputs(const args_t *args, lattice_t *lattice)
00434 {
00435 std::string generatedAnswer_;
00436 int i, n=lattice->nbest/3;
00437 symbol_t **sentences;
00438 double *probs;
00439 char **sentences_str;
00440
00441 if (n==0) n=1;
00442
00443
00444 if (args_get_bool(args, "save-lattices", NULL))
00445 {
00446 char filename[MAX_LINE];
00447 char path[MAX_LINE];
00448
00449 time_t tm = time(NULL);
00450 strftime(filename, sizeof(filename), "%y-%m-%d_%M:%S", localtime(&tm));
00451 sprintf(filename, "%s_%d_PID%d", filename, (int)clock(), getpid());
00452
00453 char const *lattices_dn = args_get_string(args, "lattice-directory", NULL);
00454 if (lattices_dn == NULL) lattices_dn = ".";
00455 printf("The word_graph is in %s/%s.lat.gz\n", lattices_dn, filename);
00456
00457
00458 sprintf(path, "%s/%s.lat.gz", lattices_dn, filename);
00459 FILE *lattice_file = gzopen(path, "w");
00460 REQUIRE(lattice_file != NULL, "Couldn't open lattice file '%s'\n", path);
00461 lattice_write(lattice, lattice_file, path);
00462 gzclose(lattice_file);
00463 }
00464
00465
00466 lattice_nbest_hyp(lattice, n, &sentences, &probs);
00467 sentences_str=(char **) malloc(n*sizeof(char *));
00468 if (sentences[0] != NULL)
00469 {
00470 for (i=0;i<n;i++)
00471 {
00472 if (sentences[i]!=NULL)
00473 {
00474 sentences_str[i] = NULL;
00475 extended_vocab_symbols_to_string(sentences[i], lattice->decoder->vocab, &(sentences_str[i]));
00476
00477
00478 }
00479 else sentences_str[i] = NULL;
00480 }
00481
00482 generatedAnswer_ = generate_answer(probs,sentences_str, n);
00483 for (i=0;i<n;i++)
00484 {
00485 free(sentences[i]);
00486 free(sentences_str[i]);
00487 }
00488 free(probs);
00489 }
00490 else
00491 {
00492 printf("Sentence not recognized\n");
00493 fflush(stdout);
00494 }
00495 return generatedAnswer_;
00496
00497 }
00498