00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <errno.h>
00006 #include "svm.h"
00007
00008 struct svm_node *x;
00009 int max_nr_attr = 64;
00010
00011 struct svm_model* model;
00012 int predict_probability=0;
00013
00014 static char *line = NULL;
00015 static int max_line_len;
00016
00017 static char* readline(FILE *input)
00018 {
00019 int len;
00020
00021 if(fgets(line,max_line_len,input) == NULL)
00022 return NULL;
00023
00024 while(strrchr(line,'\n') == NULL)
00025 {
00026 max_line_len *= 2;
00027 line = (char *) realloc(line,max_line_len);
00028 len = (int) strlen(line);
00029 if(fgets(line+len,max_line_len-len,input) == NULL)
00030 break;
00031 }
00032 return line;
00033 }
00034
00035 void exit_input_error(int line_num)
00036 {
00037 fprintf(stderr,"Wrong input format at line %d\n", line_num);
00038 exit(1);
00039 }
00040
00041 void predict(FILE *input, FILE *output)
00042 {
00043 int correct = 0;
00044 int total = 0;
00045 double error = 0;
00046 double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
00047
00048 int svm_type=svm_get_svm_type(model);
00049 int nr_class=svm_get_nr_class(model);
00050 double *prob_estimates=NULL;
00051 int j;
00052
00053 if(predict_probability)
00054 {
00055 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00056 printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
00057 else
00058 {
00059 int *labels=(int *) malloc(nr_class*sizeof(int));
00060 svm_get_labels(model,labels);
00061 prob_estimates = (double *) malloc(nr_class*sizeof(double));
00062 fprintf(output,"labels");
00063 for(j=0;j<nr_class;j++)
00064 fprintf(output," %d",labels[j]);
00065 fprintf(output,"\n");
00066 free(labels);
00067 }
00068 }
00069
00070 max_line_len = 1024;
00071 line = (char *)malloc(max_line_len*sizeof(char));
00072 while(readline(input) != NULL)
00073 {
00074 int i = 0;
00075 double target_label, predict_label;
00076 char *idx, *val, *label, *endptr;
00077 int inst_max_index = -1;
00078
00079 label = strtok(line," \t\n");
00080 if(label == NULL)
00081 exit_input_error(total+1);
00082
00083 target_label = strtod(label,&endptr);
00084 if(endptr == label || *endptr != '\0')
00085 exit_input_error(total+1);
00086
00087 while(1)
00088 {
00089 if(i>=max_nr_attr-1)
00090 {
00091 max_nr_attr *= 2;
00092 x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
00093 }
00094
00095 idx = strtok(NULL,":");
00096 val = strtok(NULL," \t");
00097
00098 if(val == NULL)
00099 break;
00100 errno = 0;
00101 x[i].index = (int) strtol(idx,&endptr,10);
00102 if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
00103 exit_input_error(total+1);
00104 else
00105 inst_max_index = x[i].index;
00106
00107 errno = 0;
00108 x[i].value = strtod(val,&endptr);
00109 if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00110 exit_input_error(total+1);
00111
00112 ++i;
00113 }
00114 x[i].index = -1;
00115
00116 if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
00117 {
00118 predict_label = svm_predict_probability(model,x,prob_estimates);
00119 fprintf(output,"%g",predict_label);
00120 for(j=0;j<nr_class;j++)
00121 fprintf(output," %g",prob_estimates[j]);
00122 fprintf(output,"\n");
00123 }
00124 else
00125 {
00126 predict_label = svm_predict(model,x);
00127 fprintf(output,"%g\n",predict_label);
00128 }
00129
00130 if(predict_label == target_label)
00131 ++correct;
00132 error += (predict_label-target_label)*(predict_label-target_label);
00133 sump += predict_label;
00134 sumt += target_label;
00135 sumpp += predict_label*predict_label;
00136 sumtt += target_label*target_label;
00137 sumpt += predict_label*target_label;
00138 ++total;
00139 }
00140 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00141 {
00142 printf("Mean squared error = %g (regression)\n",error/total);
00143 printf("Squared correlation coefficient = %g (regression)\n",
00144 ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
00145 ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
00146 );
00147 }
00148 else
00149 printf("Accuracy = %g%% (%d/%d) (classification)\n",
00150 (double)correct/total*100,correct,total);
00151 if(predict_probability)
00152 free(prob_estimates);
00153 }
00154
00155 void exit_with_help()
00156 {
00157 printf(
00158 "Usage: svm-predict [options] test_file model_file output_file\n"
00159 "options:\n"
00160 "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
00161 );
00162 exit(1);
00163 }
00164
00165 int main(int argc, char **argv)
00166 {
00167 FILE *input, *output;
00168 int i;
00169
00170
00171 for(i=1;i<argc;i++)
00172 {
00173 if(argv[i][0] != '-') break;
00174 ++i;
00175 switch(argv[i-1][1])
00176 {
00177 case 'b':
00178 predict_probability = atoi(argv[i]);
00179 break;
00180 default:
00181 fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
00182 exit_with_help();
00183 }
00184 }
00185 if(i>=argc-2)
00186 exit_with_help();
00187
00188 input = fopen(argv[i],"r");
00189 if(input == NULL)
00190 {
00191 fprintf(stderr,"can't open input file %s\n",argv[i]);
00192 exit(1);
00193 }
00194
00195 output = fopen(argv[i+2],"w");
00196 if(output == NULL)
00197 {
00198 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
00199 exit(1);
00200 }
00201
00202 if((model=svm_load_model(argv[i+1]))==0)
00203 {
00204 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
00205 exit(1);
00206 }
00207
00208 x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
00209 if(predict_probability)
00210 {
00211 if(svm_check_probability_model(model)==0)
00212 {
00213 fprintf(stderr,"Model does not support probabiliy estimates\n");
00214 exit(1);
00215 }
00216 }
00217 else
00218 {
00219 if(svm_check_probability_model(model)!=0)
00220 printf("Model supports probability estimates, but disabled in prediction.\n");
00221 }
00222 predict(input,output);
00223 svm_free_and_destroy_model(&model);
00224 free(x);
00225 free(line);
00226 fclose(input);
00227 fclose(output);
00228 return 0;
00229 }