svm-scale.c
Go to the documentation of this file.
00001 #include <float.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <string.h>
00006 
00007 void exit_with_help()
00008 {
00009         printf(
00010         "Usage: svm-scale [options] data_filename\n"
00011         "options:\n"
00012         "-l lower : x scaling lower limit (default -1)\n"
00013         "-u upper : x scaling upper limit (default +1)\n"
00014         "-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
00015         "-s save_filename : save scaling parameters to save_filename\n"
00016         "-r restore_filename : restore scaling parameters from restore_filename\n"
00017         );
00018         exit(1);
00019 }
00020 
00021 char *line = NULL;
00022 int max_line_len = 1024;
00023 double lower=-1.0,upper=1.0,y_lower,y_upper;
00024 int y_scaling = 0;
00025 double *feature_max;
00026 double *feature_min;
00027 double y_max = -DBL_MAX;
00028 double y_min = DBL_MAX;
00029 int max_index;
00030 long int num_nonzeros = 0;
00031 long int new_num_nonzeros = 0;
00032 
00033 #define max(x,y) (((x)>(y))?(x):(y))
00034 #define min(x,y) (((x)<(y))?(x):(y))
00035 
00036 void output_target(double value);
00037 void output(int index, double value);
00038 char* readline(FILE *input);
00039 
00040 int main(int argc,char **argv)
00041 {
00042         int i,index;
00043         FILE *fp, *fp_restore = NULL;
00044         char *save_filename = NULL;
00045         char *restore_filename = NULL;
00046 
00047         for(i=1;i<argc;i++)
00048         {
00049                 if(argv[i][0] != '-') break;
00050                 ++i;
00051                 switch(argv[i-1][1])
00052                 {
00053                         case 'l': lower = atof(argv[i]); break;
00054                         case 'u': upper = atof(argv[i]); break;
00055                         case 'y':
00056                                 y_lower = atof(argv[i]);
00057                                 ++i;
00058                                 y_upper = atof(argv[i]);
00059                                 y_scaling = 1;
00060                                 break;
00061                         case 's': save_filename = argv[i]; break;
00062                         case 'r': restore_filename = argv[i]; break;
00063                         default:
00064                                 fprintf(stderr,"unknown option\n");
00065                                 exit_with_help();
00066                 }
00067         }
00068 
00069         if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
00070         {
00071                 fprintf(stderr,"inconsistent lower/upper specification\n");
00072                 exit(1);
00073         }
00074         
00075         if(restore_filename && save_filename)
00076         {
00077                 fprintf(stderr,"cannot use -r and -s simultaneously\n");
00078                 exit(1);
00079         }
00080 
00081         if(argc != i+1) 
00082                 exit_with_help();
00083 
00084         fp=fopen(argv[i],"r");
00085         
00086         if(fp==NULL)
00087         {
00088                 fprintf(stderr,"can't open file %s\n", argv[i]);
00089                 exit(1);
00090         }
00091 
00092         line = (char *) malloc(max_line_len*sizeof(char));
00093 
00094 #define SKIP_TARGET\
00095         while(isspace(*p)) ++p;\
00096         while(!isspace(*p)) ++p;
00097 
00098 #define SKIP_ELEMENT\
00099         while(*p!=':') ++p;\
00100         ++p;\
00101         while(isspace(*p)) ++p;\
00102         while(*p && !isspace(*p)) ++p;
00103         
00104         /* assumption: min index of attributes is 1 */
00105         /* pass 1: find out max index of attributes */
00106         max_index = 0;
00107 
00108         if(restore_filename)
00109         {
00110                 int idx, c;
00111 
00112                 fp_restore = fopen(restore_filename,"r");
00113                 if(fp_restore==NULL)
00114                 {
00115                         fprintf(stderr,"can't open file %s\n", restore_filename);
00116                         exit(1);
00117                 }
00118 
00119                 c = fgetc(fp_restore);
00120                 if(c == 'y')
00121                 {
00122                         readline(fp_restore);
00123                         readline(fp_restore);
00124                         readline(fp_restore);
00125                 }
00126                 readline(fp_restore);
00127                 readline(fp_restore);
00128 
00129                 while(fscanf(fp_restore,"%d %*f %*f\n",&idx) == 1)
00130                         max_index = max(idx,max_index);
00131                 rewind(fp_restore);
00132         }
00133 
00134         while(readline(fp)!=NULL)
00135         {
00136                 char *p=line;
00137 
00138                 SKIP_TARGET
00139 
00140                 while(sscanf(p,"%d:%*f",&index)==1)
00141                 {
00142                         max_index = max(max_index, index);
00143                         SKIP_ELEMENT
00144                         num_nonzeros++;
00145                 }               
00146         }
00147         rewind(fp);
00148         
00149         feature_max = (double *)malloc((max_index+1)* sizeof(double));
00150         feature_min = (double *)malloc((max_index+1)* sizeof(double));
00151         
00152         if(feature_max == NULL || feature_min == NULL)
00153         {
00154                 fprintf(stderr,"can't allocate enough memory\n");
00155                 exit(1);
00156         }
00157 
00158         for(i=0;i<=max_index;i++)
00159         {
00160                 feature_max[i]=-DBL_MAX;
00161                 feature_min[i]=DBL_MAX;
00162         }
00163 
00164         /* pass 2: find out min/max value */
00165         while(readline(fp)!=NULL)
00166         {
00167                 char *p=line;
00168                 int next_index=1;
00169                 double target;
00170                 double value;
00171 
00172                 sscanf(p,"%lf",&target);
00173                 y_max = max(y_max,target);
00174                 y_min = min(y_min,target);
00175                 
00176                 SKIP_TARGET
00177 
00178                 while(sscanf(p,"%d:%lf",&index,&value)==2)
00179                 {
00180                         for(i=next_index;i<index;i++)
00181                         {
00182                                 feature_max[i]=max(feature_max[i],0);
00183                                 feature_min[i]=min(feature_min[i],0);
00184                         }
00185                         
00186                         feature_max[index]=max(feature_max[index],value);
00187                         feature_min[index]=min(feature_min[index],value);
00188 
00189                         SKIP_ELEMENT
00190                         next_index=index+1;
00191                 }               
00192 
00193                 for(i=next_index;i<=max_index;i++)
00194                 {
00195                         feature_max[i]=max(feature_max[i],0);
00196                         feature_min[i]=min(feature_min[i],0);
00197                 }       
00198         }
00199 
00200         rewind(fp);
00201 
00202         /* pass 2.5: save/restore feature_min/feature_max */
00203         
00204         if(restore_filename)
00205         {
00206                 /* fp_restore rewinded in finding max_index */
00207                 int idx, c;
00208                 double fmin, fmax;
00209                 
00210                 if((c = fgetc(fp_restore)) == 'y')
00211                 {
00212                         fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper);
00213                         fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max);
00214                         y_scaling = 1;
00215                 }
00216                 else
00217                         ungetc(c, fp_restore);
00218 
00219                 if (fgetc(fp_restore) == 'x') {
00220                         fscanf(fp_restore, "%lf %lf\n", &lower, &upper);
00221                         while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3)
00222                         {
00223                                 if(idx<=max_index)
00224                                 {
00225                                         feature_min[idx] = fmin;
00226                                         feature_max[idx] = fmax;
00227                                 }
00228                         }
00229                 }
00230                 fclose(fp_restore);
00231         }
00232         
00233         if(save_filename)
00234         {
00235                 FILE *fp_save = fopen(save_filename,"w");
00236                 if(fp_save==NULL)
00237                 {
00238                         fprintf(stderr,"can't open file %s\n", save_filename);
00239                         exit(1);
00240                 }
00241                 if(y_scaling)
00242                 {
00243                         fprintf(fp_save, "y\n");
00244                         fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper);
00245                         fprintf(fp_save, "%.16g %.16g\n", y_min, y_max);
00246                 }
00247                 fprintf(fp_save, "x\n");
00248                 fprintf(fp_save, "%.16g %.16g\n", lower, upper);
00249                 for(i=1;i<=max_index;i++)
00250                 {
00251                         if(feature_min[i]!=feature_max[i])
00252                                 fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]);
00253                 }
00254                 fclose(fp_save);
00255         }
00256         
00257         /* pass 3: scale */
00258         while(readline(fp)!=NULL)
00259         {
00260                 char *p=line;
00261                 int next_index=1;
00262                 double target;
00263                 double value;
00264                 
00265                 sscanf(p,"%lf",&target);
00266                 output_target(target);
00267 
00268                 SKIP_TARGET
00269 
00270                 while(sscanf(p,"%d:%lf",&index,&value)==2)
00271                 {
00272                         for(i=next_index;i<index;i++)
00273                                 output(i,0);
00274                         
00275                         output(index,value);
00276 
00277                         SKIP_ELEMENT
00278                         next_index=index+1;
00279                 }               
00280 
00281                 for(i=next_index;i<=max_index;i++)
00282                         output(i,0);
00283 
00284                 printf("\n");
00285         }
00286 
00287         if (new_num_nonzeros > num_nonzeros)
00288                 fprintf(stderr, 
00289                         "WARNING: original #nonzeros %ld\n"
00290                         "         new      #nonzeros %ld\n"
00291                         "Use -l 0 if many original feature values are zeros\n",
00292                         num_nonzeros, new_num_nonzeros);
00293 
00294         free(line);
00295         free(feature_max);
00296         free(feature_min);
00297         fclose(fp);
00298         return 0;
00299 }
00300 
00301 char* readline(FILE *input)
00302 {
00303         int len;
00304         
00305         if(fgets(line,max_line_len,input) == NULL)
00306                 return NULL;
00307 
00308         while(strrchr(line,'\n') == NULL)
00309         {
00310                 max_line_len *= 2;
00311                 line = (char *) realloc(line, max_line_len);
00312                 len = (int) strlen(line);
00313                 if(fgets(line+len,max_line_len-len,input) == NULL)
00314                         break;
00315         }
00316         return line;
00317 }
00318 
00319 void output_target(double value)
00320 {
00321         if(y_scaling)
00322         {
00323                 if(value == y_min)
00324                         value = y_lower;
00325                 else if(value == y_max)
00326                         value = y_upper;
00327                 else value = y_lower + (y_upper-y_lower) *
00328                              (value - y_min)/(y_max-y_min);
00329         }
00330         printf("%g ",value);
00331 }
00332 
00333 void output(int index, double value)
00334 {
00335         /* skip single-valued attribute */
00336         if(feature_max[index] == feature_min[index])
00337                 return;
00338 
00339         if(value == feature_min[index])
00340                 value = lower;
00341         else if(value == feature_max[index])
00342                 value = upper;
00343         else
00344                 value = lower + (upper-lower) * 
00345                         (value-feature_min[index])/
00346                         (feature_max[index]-feature_min[index]);
00347 
00348         if(value != 0)
00349         {
00350                 printf("%d:%g ",index, value);
00351                 new_num_nonzeros++;
00352         }
00353 }


haf_grasping
Author(s): David Fischinger
autogenerated on Wed Jan 11 2017 03:48:49