00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <errno.h>
00006 #include "svm.h"
00007
00008 struct svm_node *x;
00009 int max_nr_attr = 64;
00010
00011 struct svm_model* model;
00012 int predict_probability=0;
00013
00014 static char *line = NULL;
00015 static int max_line_len;
00016
00017 static char* readline(FILE *input)
00018 {
00019 int len;
00020
00021 if(fgets(line,max_line_len,input) == NULL)
00022 return NULL;
00023
00024 while(strrchr(line,'\n') == NULL)
00025 {
00026 max_line_len *= 2;
00027 line = (char *) realloc(line,max_line_len);
00028 len = (int) strlen(line);
00029 if(fgets(line+len,max_line_len-len,input) == NULL)
00030 break;
00031 }
00032 return line;
00033 }
00034
00035 void exit_input_error(int line_num)
00036 {
00037 fprintf(stderr,"Wrong input format at line %d\n", line_num);
00038 exit(1);
00039 }
00040
00041 void predict(FILE *input, FILE *output)
00042 {
00043 int correct = 0;
00044 int total = 0;
00045 double error = 0;
00046 double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
00047
00048 int svm_type=svm_get_svm_type(model);
00049 int nr_class=svm_get_nr_class(model);
00050 double *prob_estimates=NULL;
00051 int j;
00052
00053 if(predict_probability)
00054 {
00055 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00056 printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
00057 else
00058 {
00059 int *labels=(int *) malloc(nr_class*sizeof(int));
00060 svm_get_labels(model,labels);
00061 prob_estimates = (double *) malloc(nr_class*sizeof(double));
00062 fprintf(output,"labels");
00063 for(j=0;j<nr_class;j++)
00064 fprintf(output," %d",labels[j]);
00065 fprintf(output,"\n");
00066 free(labels);
00067 }
00068 }
00069
00070 max_line_len = 1024;
00071 line = (char *)malloc(max_line_len*sizeof(char));
00072 while(readline(input) != NULL)
00073 {
00074 int i = 0;
00075 double target_label, predict_label;
00076 char *idx, *val, *label, *endptr;
00077 int inst_max_index = -1;
00078
00079 label = strtok(line," \t");
00080 target_label = strtod(label,&endptr);
00081 if(endptr == label)
00082 exit_input_error(total+1);
00083
00084 while(1)
00085 {
00086 if(i>=max_nr_attr-1)
00087 {
00088 max_nr_attr *= 2;
00089 x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
00090 }
00091
00092 idx = strtok(NULL,":");
00093 val = strtok(NULL," \t");
00094
00095 if(val == NULL)
00096 break;
00097 errno = 0;
00098 x[i].index = (int) strtol(idx,&endptr,10);
00099 if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
00100 exit_input_error(total+1);
00101 else
00102 inst_max_index = x[i].index;
00103
00104 errno = 0;
00105 x[i].value = strtod(val,&endptr);
00106 if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
00107 exit_input_error(total+1);
00108
00109 ++i;
00110 }
00111 x[i].index = -1;
00112
00113 if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
00114 {
00115 predict_label = svm_predict_probability(model,x,prob_estimates);
00116 fprintf(output,"%g",predict_label);
00117 for(j=0;j<nr_class;j++)
00118 fprintf(output," %g",prob_estimates[j]);
00119 fprintf(output,"\n");
00120 }
00121 else
00122 {
00123 predict_label = svm_predict(model,x);
00124 fprintf(output,"%g\n",predict_label);
00125 }
00126
00127 if(predict_label == target_label)
00128 ++correct;
00129 error += (predict_label-target_label)*(predict_label-target_label);
00130 sump += predict_label;
00131 sumt += target_label;
00132 sumpp += predict_label*predict_label;
00133 sumtt += target_label*target_label;
00134 sumpt += predict_label*target_label;
00135 ++total;
00136 }
00137 if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
00138 {
00139 printf("Mean squared error = %g (regression)\n",error/total);
00140 printf("Squared correlation coefficient = %g (regression)\n",
00141 ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
00142 ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
00143 );
00144 }
00145 else
00146 printf("Accuracy = %g%% (%d/%d) (classification)\n",
00147 (double)correct/total*100,correct,total);
00148 if(predict_probability)
00149 free(prob_estimates);
00150 }
00151
00152 void exit_with_help()
00153 {
00154 printf(
00155 "Usage: svm-predict [options] test_file model_file output_file\n"
00156 "options:\n"
00157 "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
00158 );
00159 exit(1);
00160 }
00161
00162 int main(int argc, char **argv)
00163 {
00164 FILE *input, *output;
00165 int i;
00166
00167
00168 for(i=1;i<argc;i++)
00169 {
00170 if(argv[i][0] != '-') break;
00171 ++i;
00172 switch(argv[i-1][1])
00173 {
00174 case 'b':
00175 predict_probability = atoi(argv[i]);
00176 break;
00177 default:
00178 fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
00179 exit_with_help();
00180 }
00181 }
00182 if(i>=argc-2)
00183 exit_with_help();
00184
00185 input = fopen(argv[i],"r");
00186 if(input == NULL)
00187 {
00188 fprintf(stderr,"can't open input file %s\n",argv[i]);
00189 exit(1);
00190 }
00191
00192 output = fopen(argv[i+2],"w");
00193 if(output == NULL)
00194 {
00195 fprintf(stderr,"can't open output file %s\n",argv[i+2]);
00196 exit(1);
00197 }
00198
00199 if((model=svm_load_model(argv[i+1]))==0)
00200 {
00201 fprintf(stderr,"can't open model file %s\n",argv[i+1]);
00202 exit(1);
00203 }
00204
00205 x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
00206 if(predict_probability)
00207 {
00208 if(svm_check_probability_model(model)==0)
00209 {
00210 fprintf(stderr,"Model does not support probabiliy estimates\n");
00211 exit(1);
00212 }
00213 }
00214 else
00215 {
00216 if(svm_check_probability_model(model)!=0)
00217 printf("Model supports probability estimates, but disabled in prediction.\n");
00218 }
00219 predict(input,output);
00220 svm_free_and_destroy_model(&model);
00221 free(x);
00222 free(line);
00223 fclose(input);
00224 fclose(output);
00225 return 0;
00226 }