svm-predict.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <errno.h>
00006 #include "svm.h"
00007 
00008 struct svm_node *x;
00009 int max_nr_attr = 64;
00010 
00011 struct svm_model* model;
00012 int predict_probability=0;
00013 
00014 static char *line = NULL;
00015 static int max_line_len;
00016 
00017 static char* readline(FILE *input)
00018 {
00019         int len;
00020         
00021         if(fgets(line,max_line_len,input) == NULL)
00022                 return NULL;
00023 
00024         while(strrchr(line,'\n') == NULL)
00025         {
00026                 max_line_len *= 2;
00027                 line = (char *) realloc(line,max_line_len);
00028                 len = (int) strlen(line);
00029                 if(fgets(line+len,max_line_len-len,input) == NULL)
00030                         break;
00031         }
00032         return line;
00033 }
00034 
00035 void exit_input_error(int line_num)
00036 {
00037         fprintf(stderr,"Wrong input format at line %d\n", line_num);
00038         exit(1);
00039 }
00040 
00041 void predict(FILE *input, FILE *output)
00042 {
00043         int correct = 0;
00044         int total = 0;
00045         double error = 0;
00046         double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
00047 
00048         int svm_type=svm_get_svm_type(model);
00049         int nr_class=svm_get_nr_class(model);
00050         double *prob_estimates=NULL;
00051         int j;
00052 
00053         if(predict_probability)
00054         {
00055                 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00056                         printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
00057                 else
00058                 {
00059                         int *labels=(int *) malloc(nr_class*sizeof(int));
00060                         svm_get_labels(model,labels);
00061                         prob_estimates = (double *) malloc(nr_class*sizeof(double));
00062                         fprintf(output,"labels");               
00063                         for(j=0;j<nr_class;j++)
00064                                 fprintf(output," %d",labels[j]);
00065                         fprintf(output,"\n");
00066                         free(labels);
00067                 }
00068         }
00069 
00070         max_line_len = 1024;
00071         line = (char *)malloc(max_line_len*sizeof(char));
00072         while(readline(input) != NULL)
00073         {
00074                 int i = 0;
00075                 double target_label, predict_label;
00076                 char *idx, *val, *label, *endptr;
00077                 int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
00078 
00079                 label = strtok(line," \t");
00080                 target_label = strtod(label,&endptr);
00081                 if(endptr == label)
00082                         exit_input_error(total+1);
00083 
00084                 while(1)
00085                 {
00086                         if(i>=max_nr_attr-1)    // need one more for index = -1
00087                         {
00088                                 max_nr_attr *= 2;
00089                                 x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
00090                         }
00091 
00092                         idx = strtok(NULL,":");
00093                         val = strtok(NULL," \t");
00094 
00095                         if(val == NULL)
00096                                 break;
00097                         errno = 0;
00098                         x[i].index = (int) strtol(idx,&endptr,10);
00099                         if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
00100                                 exit_input_error(total+1);
00101                         else
00102                                 inst_max_index = x[i].index;
00103 
00104                         errno = 0;
00105                         x[i].value = strtod(val,&endptr);
00106                         if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00107                                 exit_input_error(total+1);
00108 
00109                         ++i;
00110                 }
00111                 x[i].index = -1;
00112 
00113                 if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
00114                 {
00115                         predict_label = svm_predict_probability(model,x,prob_estimates);
00116                         fprintf(output,"%g",predict_label);
00117                         for(j=0;j<nr_class;j++)
00118                                 fprintf(output," %g",prob_estimates[j]);
00119                         fprintf(output,"\n");
00120                 }
00121                 else
00122                 {
00123                         predict_label = svm_predict(model,x);
00124                         fprintf(output,"%g\n",predict_label);
00125                 }
00126 
00127                 if(predict_label == target_label)
00128                         ++correct;
00129                 error += (predict_label-target_label)*(predict_label-target_label);
00130                 sump += predict_label;
00131                 sumt += target_label;
00132                 sumpp += predict_label*predict_label;
00133                 sumtt += target_label*target_label;
00134                 sumpt += predict_label*target_label;
00135                 ++total;
00136         }
00137         if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00138         {
00139                 printf("Mean squared error = %g (regression)\n",error/total);
00140                 printf("Squared correlation coefficient = %g (regression)\n",
00141                        ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
00142                        ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
00143                        );
00144         }
00145         else
00146                 printf("Accuracy = %g%% (%d/%d) (classification)\n",
00147                        (double)correct/total*100,correct,total);
00148         if(predict_probability)
00149                 free(prob_estimates);
00150 }
00151 
00152 void exit_with_help()
00153 {
00154         printf(
00155         "Usage: svm-predict [options] test_file model_file output_file\n"
00156         "options:\n"
00157         "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
00158         );
00159         exit(1);
00160 }
00161 
00162 int main(int argc, char **argv)
00163 {
00164         FILE *input, *output;
00165         int i;
00166 
00167         // parse options
00168         for(i=1;i<argc;i++)
00169         {
00170                 if(argv[i][0] != '-') break;
00171                 ++i;
00172                 switch(argv[i-1][1])
00173                 {
00174                         case 'b':
00175                                 predict_probability = atoi(argv[i]);
00176                                 break;
00177                         default:
00178                                 fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
00179                                 exit_with_help();
00180                 }
00181         }
00182         if(i>=argc-2)
00183                 exit_with_help();
00184         
00185         input = fopen(argv[i],"r");
00186         if(input == NULL)
00187         {
00188                 fprintf(stderr,"can't open input file %s\n",argv[i]);
00189                 exit(1);
00190         }
00191 
00192         output = fopen(argv[i+2],"w");
00193         if(output == NULL)
00194         {
00195                 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
00196                 exit(1);
00197         }
00198 
00199         if((model=svm_load_model(argv[i+1]))==0)
00200         {
00201                 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
00202                 exit(1);
00203         }
00204 
00205         x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
00206         if(predict_probability)
00207         {
00208                 if(svm_check_probability_model(model)==0)
00209                 {
00210                         fprintf(stderr,"Model does not support probabiliy estimates\n");
00211                         exit(1);
00212                 }
00213         }
00214         else
00215         {
00216                 if(svm_check_probability_model(model)!=0)
00217                         printf("Model supports probability estimates, but disabled in prediction.\n");
00218         }
00219         predict(input,output);
00220         svm_free_and_destroy_model(&model);
00221         free(x);
00222         free(line);
00223         fclose(input);
00224         fclose(output);
00225         return 0;
00226 }


libsvm3
Author(s): various
autogenerated on Wed Nov 27 2013 11:36:23