00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <errno.h>
00006 #include "svm.h"
00007
00008 int print_null(const char *s,...) {}
00009
00010 static int (*info)(const char *fmt,...) = &printf;
00011
00012 struct svm_node *x;
00013 int max_nr_attr = 64;
00014
00015 struct svm_model* model;
00016 int predict_probability=0;
00017
00018 static char *line = NULL;
00019 static int max_line_len;
00020
00021 static char* readline(FILE *input)
00022 {
00023 int len;
00024
00025 if(fgets(line,max_line_len,input) == NULL)
00026 return NULL;
00027
00028 while(strrchr(line,'\n') == NULL)
00029 {
00030 max_line_len *= 2;
00031 line = (char *) realloc(line,max_line_len);
00032 len = (int) strlen(line);
00033 if(fgets(line+len,max_line_len-len,input) == NULL)
00034 break;
00035 }
00036 return line;
00037 }
00038
00039 void exit_input_error(int line_num)
00040 {
00041 fprintf(stderr,"Wrong input format at line %d\n", line_num);
00042 exit(1);
00043 }
00044
00045 void predict(FILE *input, FILE *output)
00046 {
00047 int correct = 0;
00048 int total = 0;
00049 double error = 0;
00050 double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
00051
00052 int svm_type=svm_get_svm_type(model);
00053 int nr_class=svm_get_nr_class(model);
00054 double *prob_estimates=NULL;
00055 int j;
00056
00057 if(predict_probability)
00058 {
00059 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00060 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
00061 else
00062 {
00063 int *labels=(int *) malloc(nr_class*sizeof(int));
00064 svm_get_labels(model,labels);
00065 prob_estimates = (double *) malloc(nr_class*sizeof(double));
00066 fprintf(output,"labels");
00067 for(j=0;j<nr_class;j++)
00068 fprintf(output," %d",labels[j]);
00069 fprintf(output,"\n");
00070 free(labels);
00071 }
00072 }
00073
00074 max_line_len = 1024;
00075 line = (char *)malloc(max_line_len*sizeof(char));
00076 while(readline(input) != NULL)
00077 {
00078 int i = 0;
00079 double target_label, predict_label;
00080 char *idx, *val, *label, *endptr;
00081 int inst_max_index = -1;
00082
00083 label = strtok(line," \t\n");
00084 if(label == NULL)
00085 exit_input_error(total+1);
00086
00087 target_label = strtod(label,&endptr);
00088 if(endptr == label || *endptr != '\0')
00089 exit_input_error(total+1);
00090
00091 while(1)
00092 {
00093 if(i>=max_nr_attr-1)
00094 {
00095 max_nr_attr *= 2;
00096 x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
00097 }
00098
00099 idx = strtok(NULL,":");
00100 val = strtok(NULL," \t");
00101
00102 if(val == NULL)
00103 break;
00104 errno = 0;
00105 x[i].index = (int) strtol(idx,&endptr,10);
00106 if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
00107 exit_input_error(total+1);
00108 else
00109 inst_max_index = x[i].index;
00110
00111 errno = 0;
00112 x[i].value = strtod(val,&endptr);
00113 if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00114 exit_input_error(total+1);
00115
00116 ++i;
00117 }
00118 x[i].index = -1;
00119
00120 if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
00121 {
00122 predict_label = svm_predict_probability(model,x,prob_estimates);
00123 fprintf(output,"%g",predict_label);
00124 for(j=0;j<nr_class;j++)
00125 fprintf(output," %g",prob_estimates[j]);
00126 fprintf(output,"\n");
00127 }
00128 else
00129 {
00130 predict_label = svm_predict(model,x);
00131 fprintf(output,"%g\n",predict_label);
00132 }
00133
00134 if(predict_label == target_label)
00135 ++correct;
00136 error += (predict_label-target_label)*(predict_label-target_label);
00137 sump += predict_label;
00138 sumt += target_label;
00139 sumpp += predict_label*predict_label;
00140 sumtt += target_label*target_label;
00141 sumpt += predict_label*target_label;
00142 ++total;
00143 }
00144 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00145 {
00146 info("Mean squared error = %g (regression)\n",error/total);
00147 info("Squared correlation coefficient = %g (regression)\n",
00148 ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
00149 ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
00150 );
00151 }
00152 else
00153 info("Accuracy = %g%% (%d/%d) (classification)\n",
00154 (double)correct/total*100,correct,total);
00155 if(predict_probability)
00156 free(prob_estimates);
00157 }
00158
00159 void exit_with_help()
00160 {
00161 printf(
00162 "Usage: svm-predict [options] test_file model_file output_file\n"
00163 "options:\n"
00164 "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
00165 "-q : quiet mode (no outputs)\n"
00166 );
00167 exit(1);
00168 }
00169
00170 int main(int argc, char **argv)
00171 {
00172 FILE *input, *output;
00173 int i;
00174
00175 for(i=1;i<argc;i++)
00176 {
00177 if(argv[i][0] != '-') break;
00178 ++i;
00179 switch(argv[i-1][1])
00180 {
00181 case 'b':
00182 predict_probability = atoi(argv[i]);
00183 break;
00184 case 'q':
00185 info = &print_null;
00186 i--;
00187 break;
00188 default:
00189 fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
00190 exit_with_help();
00191 }
00192 }
00193
00194 if(i>=argc-2)
00195 exit_with_help();
00196
00197 input = fopen(argv[i],"r");
00198 if(input == NULL)
00199 {
00200 fprintf(stderr,"can't open input file %s\n",argv[i]);
00201 exit(1);
00202 }
00203
00204 output = fopen(argv[i+2],"w");
00205 if(output == NULL)
00206 {
00207 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
00208 exit(1);
00209 }
00210
00211 if((model=svm_load_model(argv[i+1]))==0)
00212 {
00213 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
00214 exit(1);
00215 }
00216
00217 x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
00218 if(predict_probability)
00219 {
00220 if(svm_check_probability_model(model)==0)
00221 {
00222 fprintf(stderr,"Model does not support probabiliy estimates\n");
00223 exit(1);
00224 }
00225 }
00226 else
00227 {
00228 if(svm_check_probability_model(model)!=0)
00229 info("Model supports probability estimates, but disabled in prediction.\n");
00230 }
00231
00232 predict(input,output);
00233 svm_free_and_destroy_model(&model);
00234 free(x);
00235 free(line);
00236 fclose(input);
00237 fclose(output);
00238 return 0;
00239 }