svm-predict.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <errno.h>
00006 #include "svm.h"
00007 
00008 int print_null(const char *s,...) {}
00009 
00010 static int (*info)(const char *fmt,...) = &printf;
00011 
00012 struct svm_node *x;
00013 int max_nr_attr = 64;
00014 
00015 struct svm_model* model;
00016 int predict_probability=0;
00017 
00018 static char *line = NULL;
00019 static int max_line_len;
00020 
00021 static char* readline(FILE *input)
00022 {
00023         int len;
00024 
00025         if(fgets(line,max_line_len,input) == NULL)
00026                 return NULL;
00027 
00028         while(strrchr(line,'\n') == NULL)
00029         {
00030                 max_line_len *= 2;
00031                 line = (char *) realloc(line,max_line_len);
00032                 len = (int) strlen(line);
00033                 if(fgets(line+len,max_line_len-len,input) == NULL)
00034                         break;
00035         }
00036         return line;
00037 }
00038 
00039 void exit_input_error(int line_num)
00040 {
00041         fprintf(stderr,"Wrong input format at line %d\n", line_num);
00042         exit(1);
00043 }
00044 
00045 void predict(FILE *input, FILE *output)
00046 {
00047         int correct = 0;
00048         int total = 0;
00049         double error = 0;
00050         double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
00051 
00052         int svm_type=svm_get_svm_type(model);
00053         int nr_class=svm_get_nr_class(model);
00054         double *prob_estimates=NULL;
00055         int j;
00056 
00057         if(predict_probability)
00058         {
00059                 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00060                         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
00061                 else
00062                 {
00063                         int *labels=(int *) malloc(nr_class*sizeof(int));
00064                         svm_get_labels(model,labels);
00065                         prob_estimates = (double *) malloc(nr_class*sizeof(double));
00066                         fprintf(output,"labels");               
00067                         for(j=0;j<nr_class;j++)
00068                                 fprintf(output," %d",labels[j]);
00069                         fprintf(output,"\n");
00070                         free(labels);
00071                 }
00072         }
00073 
00074         max_line_len = 1024;
00075         line = (char *)malloc(max_line_len*sizeof(char));
00076         while(readline(input) != NULL)
00077         {
00078                 int i = 0;
00079                 double target_label, predict_label;
00080                 char *idx, *val, *label, *endptr;
00081                 int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
00082 
00083                 label = strtok(line," \t\n");
00084                 if(label == NULL) // empty line
00085                         exit_input_error(total+1);
00086 
00087                 target_label = strtod(label,&endptr);
00088                 if(endptr == label || *endptr != '\0')
00089                         exit_input_error(total+1);
00090 
00091                 while(1)
00092                 {
00093                         if(i>=max_nr_attr-1)    // need one more for index = -1
00094                         {
00095                                 max_nr_attr *= 2;
00096                                 x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
00097                         }
00098 
00099                         idx = strtok(NULL,":");
00100                         val = strtok(NULL," \t");
00101 
00102                         if(val == NULL)
00103                                 break;
00104                         errno = 0;
00105                         x[i].index = (int) strtol(idx,&endptr,10);
00106                         if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
00107                                 exit_input_error(total+1);
00108                         else
00109                                 inst_max_index = x[i].index;
00110 
00111                         errno = 0;
00112                         x[i].value = strtod(val,&endptr);
00113                         if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00114                                 exit_input_error(total+1);
00115 
00116                         ++i;
00117                 }
00118                 x[i].index = -1;
00119 
00120                 if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
00121                 {
00122                         predict_label = svm_predict_probability(model,x,prob_estimates);
00123                         fprintf(output,"%g",predict_label);
00124                         for(j=0;j<nr_class;j++)
00125                                 fprintf(output," %g",prob_estimates[j]);
00126                         fprintf(output,"\n");
00127                 }
00128                 else
00129                 {
00130                         predict_label = svm_predict(model,x);
00131                         fprintf(output,"%g\n",predict_label);
00132                 }
00133 
00134                 if(predict_label == target_label)
00135                         ++correct;
00136                 error += (predict_label-target_label)*(predict_label-target_label);
00137                 sump += predict_label;
00138                 sumt += target_label;
00139                 sumpp += predict_label*predict_label;
00140                 sumtt += target_label*target_label;
00141                 sumpt += predict_label*target_label;
00142                 ++total;
00143         }
00144         if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00145         {
00146                 info("Mean squared error = %g (regression)\n",error/total);
00147                 info("Squared correlation coefficient = %g (regression)\n",
00148                         ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
00149                         ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
00150                         );
00151         }
00152         else
00153                 info("Accuracy = %g%% (%d/%d) (classification)\n",
00154                         (double)correct/total*100,correct,total);
00155         if(predict_probability)
00156                 free(prob_estimates);
00157 }
00158 
00159 void exit_with_help()
00160 {
00161         printf(
00162         "Usage: svm-predict [options] test_file model_file output_file\n"
00163         "options:\n"
00164         "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
00165         "-q : quiet mode (no outputs)\n"
00166         );
00167         exit(1);
00168 }
00169 
00170 int main(int argc, char **argv)
00171 {
00172         FILE *input, *output;
00173         int i;
00174         // parse options
00175         for(i=1;i<argc;i++)
00176         {
00177                 if(argv[i][0] != '-') break;
00178                 ++i;
00179                 switch(argv[i-1][1])
00180                 {
00181                         case 'b':
00182                                 predict_probability = atoi(argv[i]);
00183                                 break;
00184                         case 'q':
00185                                 info = &print_null;
00186                                 i--;
00187                                 break;
00188                         default:
00189                                 fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
00190                                 exit_with_help();
00191                 }
00192         }
00193 
00194         if(i>=argc-2)
00195                 exit_with_help();
00196 
00197         input = fopen(argv[i],"r");
00198         if(input == NULL)
00199         {
00200                 fprintf(stderr,"can't open input file %s\n",argv[i]);
00201                 exit(1);
00202         }
00203 
00204         output = fopen(argv[i+2],"w");
00205         if(output == NULL)
00206         {
00207                 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
00208                 exit(1);
00209         }
00210 
00211         if((model=svm_load_model(argv[i+1]))==0)
00212         {
00213                 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
00214                 exit(1);
00215         }
00216 
00217         x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
00218         if(predict_probability)
00219         {
00220                 if(svm_check_probability_model(model)==0)
00221                 {
00222                         fprintf(stderr,"Model does not support probabiliy estimates\n");
00223                         exit(1);
00224                 }
00225         }
00226         else
00227         {
00228                 if(svm_check_probability_model(model)!=0)
00229                         info("Model supports probability estimates, but disabled in prediction.\n");
00230         }
00231 
00232         predict(input,output);
00233         svm_free_and_destroy_model(&model);
00234         free(x);
00235         free(line);
00236         fclose(input);
00237         fclose(output);
00238         return 0;
00239 }


ml_classifiers
Author(s): Scott Niekum
autogenerated on Fri Jan 3 2014 11:30:23