svm_scale.java
Go to the documentation of this file.
00001 import libsvm.*;
00002 import java.io.*;
00003 import java.util.*;
00004 import java.text.DecimalFormat;
00005 
00006 class svm_scale
00007 {
00008         private String line = null;
00009         private double lower = -1.0;
00010         private double upper = 1.0;
00011         private double y_lower;
00012         private double y_upper;
00013         private boolean y_scaling = false;
00014         private double[] feature_max;
00015         private double[] feature_min;
00016         private double y_max = -Double.MAX_VALUE;
00017         private double y_min = Double.MAX_VALUE;
00018         private int max_index;
00019         private long num_nonzeros = 0;
00020         private long new_num_nonzeros = 0;
00021 
00022         private static void exit_with_help()
00023         {
00024                 System.out.print(
00025                  "Usage: svm-scale [options] data_filename\n"
00026                 +"options:\n"
00027                 +"-l lower : x scaling lower limit (default -1)\n"
00028                 +"-u upper : x scaling upper limit (default +1)\n"
00029                 +"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
00030                 +"-s save_filename : save scaling parameters to save_filename\n"
00031                 +"-r restore_filename : restore scaling parameters from restore_filename\n"
00032                 );
00033                 System.exit(1);
00034         }
00035 
00036         private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
00037         {
00038                 fp.close();
00039                 return new BufferedReader(new FileReader(filename));
00040         }
00041 
00042         private void output_target(double value)
00043         {
00044                 if(y_scaling)
00045                 {
00046                         if(value == y_min)
00047                                 value = y_lower;
00048                         else if(value == y_max)
00049                                 value = y_upper;
00050                         else
00051                                 value = y_lower + (y_upper-y_lower) *
00052                                 (value-y_min) / (y_max-y_min);
00053                 }
00054 
00055                 System.out.print(value + " ");
00056         }
00057 
00058         private void output(int index, double value)
00059         {
00060                 /* skip single-valued attribute */
00061                 if(feature_max[index] == feature_min[index])
00062                         return;
00063 
00064                 if(value == feature_min[index])
00065                         value = lower;
00066                 else if(value == feature_max[index])
00067                         value = upper;
00068                 else
00069                         value = lower + (upper-lower) * 
00070                                 (value-feature_min[index])/
00071                                 (feature_max[index]-feature_min[index]);
00072 
00073                 if(value != 0)
00074                 {
00075                         System.out.print(index + ":" + value + " ");
00076                         new_num_nonzeros++;
00077                 }
00078         }
00079 
00080         private String readline(BufferedReader fp) throws IOException
00081         {
00082                 line = fp.readLine();
00083                 return line;
00084         }
00085 
00086         private void run(String []argv) throws IOException
00087         {
00088                 int i,index;
00089                 BufferedReader fp = null, fp_restore = null;
00090                 String save_filename = null;
00091                 String restore_filename = null;
00092                 String data_filename = null;
00093 
00094 
00095                 for(i=0;i<argv.length;i++)
00096                 {
00097                         if (argv[i].charAt(0) != '-')   break;
00098                         ++i;
00099                         switch(argv[i-1].charAt(1))
00100                         {
00101                                 case 'l': lower = Double.parseDouble(argv[i]);  break;
00102                                 case 'u': upper = Double.parseDouble(argv[i]);  break;
00103                                 case 'y':
00104                                           y_lower = Double.parseDouble(argv[i]);
00105                                           ++i;
00106                                           y_upper = Double.parseDouble(argv[i]);
00107                                           y_scaling = true;
00108                                           break;
00109                                 case 's': save_filename = argv[i];      break;
00110                                 case 'r': restore_filename = argv[i];   break;
00111                                 default:
00112                                           System.err.println("unknown option");
00113                                           exit_with_help();
00114                         }
00115                 }
00116 
00117                 if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
00118                 {
00119                         System.err.println("inconsistent lower/upper specification");
00120                         System.exit(1);
00121                 }
00122                 if(restore_filename != null && save_filename != null)
00123                 {
00124                         System.err.println("cannot use -r and -s simultaneously");
00125                         System.exit(1);
00126                 }
00127 
00128                 if(argv.length != i+1)
00129                         exit_with_help();
00130 
00131                 data_filename = argv[i];
00132                 try {
00133                         fp = new BufferedReader(new FileReader(data_filename));
00134                 } catch (Exception e) {
00135                         System.err.println("can't open file " + data_filename);
00136                         System.exit(1);
00137                 }
00138 
00139                 /* assumption: min index of attributes is 1 */
00140                 /* pass 1: find out max index of attributes */
00141                 max_index = 0;
00142 
00143                 if(restore_filename != null)
00144                 {
00145                         int idx, c;
00146 
00147                         try {
00148                                 fp_restore = new BufferedReader(new FileReader(restore_filename));
00149                         }
00150                         catch (Exception e) {
00151                                 System.err.println("can't open file " + restore_filename);
00152                                 System.exit(1);
00153                         }
00154                         if((c = fp_restore.read()) == 'y')
00155                         {
00156                                 fp_restore.readLine();
00157                                 fp_restore.readLine();          
00158                                 fp_restore.readLine();          
00159                         }
00160                         fp_restore.readLine();
00161                         fp_restore.readLine();
00162 
00163                         String restore_line = null;
00164                         while((restore_line = fp_restore.readLine())!=null)
00165                         {
00166                                 StringTokenizer st2 = new StringTokenizer(restore_line);
00167                                 idx = Integer.parseInt(st2.nextToken());
00168                                 max_index = Math.max(max_index, idx);
00169                         }
00170                         fp_restore = rewind(fp_restore, restore_filename);
00171                 }
00172 
00173                 while (readline(fp) != null)
00174                 {
00175                         StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00176                         st.nextToken();
00177                         while(st.hasMoreTokens())
00178                         {
00179                                 index = Integer.parseInt(st.nextToken());
00180                                 max_index = Math.max(max_index, index);
00181                                 st.nextToken();
00182                                 num_nonzeros++;
00183                         }
00184                 }
00185 
00186                 try {
00187                         feature_max = new double[(max_index+1)];
00188                         feature_min = new double[(max_index+1)];
00189                 } catch(OutOfMemoryError e) {
00190                         System.err.println("can't allocate enough memory");
00191                         System.exit(1);
00192                 }
00193 
00194                 for(i=0;i<=max_index;i++)
00195                 {
00196                         feature_max[i] = -Double.MAX_VALUE;
00197                         feature_min[i] = Double.MAX_VALUE;
00198                 }
00199 
00200                 fp = rewind(fp, data_filename);
00201 
00202                 /* pass 2: find out min/max value */
00203                 while(readline(fp) != null)
00204                 {
00205                         int next_index = 1;
00206                         double target;
00207                         double value;
00208 
00209                         StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00210                         target = Double.parseDouble(st.nextToken());
00211                         y_max = Math.max(y_max, target);
00212                         y_min = Math.min(y_min, target);
00213 
00214                         while (st.hasMoreTokens())
00215                         {
00216                                 index = Integer.parseInt(st.nextToken());
00217                                 value = Double.parseDouble(st.nextToken());
00218 
00219                                 for (i = next_index; i<index; i++)
00220                                 {
00221                                         feature_max[i] = Math.max(feature_max[i], 0);
00222                                         feature_min[i] = Math.min(feature_min[i], 0);
00223                                 }
00224 
00225                                 feature_max[index] = Math.max(feature_max[index], value);
00226                                 feature_min[index] = Math.min(feature_min[index], value);
00227                                 next_index = index + 1;
00228                         }
00229 
00230                         for(i=next_index;i<=max_index;i++)
00231                         {
00232                                 feature_max[i] = Math.max(feature_max[i], 0);
00233                                 feature_min[i] = Math.min(feature_min[i], 0);
00234                         }
00235                 }
00236 
00237                 fp = rewind(fp, data_filename);
00238 
00239                 /* pass 2.5: save/restore feature_min/feature_max */
00240                 if(restore_filename != null)
00241                 {
00242                         // fp_restore rewinded in finding max_index 
00243                         int idx, c;
00244                         double fmin, fmax;
00245 
00246                         fp_restore.mark(2);                             // for reset
00247                         if((c = fp_restore.read()) == 'y')
00248                         {
00249                                 fp_restore.readLine();          // pass the '\n' after 'y'
00250                                 StringTokenizer st = new StringTokenizer(fp_restore.readLine());
00251                                 y_lower = Double.parseDouble(st.nextToken());
00252                                 y_upper = Double.parseDouble(st.nextToken());
00253                                 st = new StringTokenizer(fp_restore.readLine());
00254                                 y_min = Double.parseDouble(st.nextToken());
00255                                 y_max = Double.parseDouble(st.nextToken());
00256                                 y_scaling = true;
00257                         }
00258                         else
00259                                 fp_restore.reset();
00260 
00261                         if(fp_restore.read() == 'x') {
00262                                 fp_restore.readLine();          // pass the '\n' after 'x'
00263                                 StringTokenizer st = new StringTokenizer(fp_restore.readLine());
00264                                 lower = Double.parseDouble(st.nextToken());
00265                                 upper = Double.parseDouble(st.nextToken());
00266                                 String restore_line = null;
00267                                 while((restore_line = fp_restore.readLine())!=null)
00268                                 {
00269                                         StringTokenizer st2 = new StringTokenizer(restore_line);
00270                                         idx = Integer.parseInt(st2.nextToken());
00271                                         fmin = Double.parseDouble(st2.nextToken());
00272                                         fmax = Double.parseDouble(st2.nextToken());
00273                                         if (idx <= max_index)
00274                                         {
00275                                                 feature_min[idx] = fmin;
00276                                                 feature_max[idx] = fmax;
00277                                         }
00278                                 }
00279                         }
00280                         fp_restore.close();
00281                 }
00282 
00283                 if(save_filename != null)
00284                 {
00285                         Formatter formatter = new Formatter(new StringBuilder());
00286                         BufferedWriter fp_save = null;
00287 
00288                         try {
00289                                 fp_save = new BufferedWriter(new FileWriter(save_filename));
00290                         } catch(IOException e) {
00291                                 System.err.println("can't open file " + save_filename);
00292                                 System.exit(1);
00293                         }
00294 
00295                         if(y_scaling)
00296                         {
00297                                 formatter.format("y\n");
00298                                 formatter.format("%.16g %.16g\n", y_lower, y_upper);
00299                                 formatter.format("%.16g %.16g\n", y_min, y_max);
00300                         }
00301                         formatter.format("x\n");
00302                         formatter.format("%.16g %.16g\n", lower, upper);
00303                         for(i=1;i<=max_index;i++)
00304                         {
00305                                 if(feature_min[i] != feature_max[i]) 
00306                                         formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
00307                         }
00308                         fp_save.write(formatter.toString());
00309                         fp_save.close();
00310                 }
00311 
00312                 /* pass 3: scale */
00313                 while(readline(fp) != null)
00314                 {
00315                         int next_index = 1;
00316                         double target;
00317                         double value;
00318 
00319                         StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00320                         target = Double.parseDouble(st.nextToken());
00321                         output_target(target);
00322                         while(st.hasMoreElements())
00323                         {
00324                                 index = Integer.parseInt(st.nextToken());
00325                                 value = Double.parseDouble(st.nextToken());
00326                                 for (i = next_index; i<index; i++)
00327                                         output(i, 0);
00328                                 output(index, value);
00329                                 next_index = index + 1;
00330                         }
00331 
00332                         for(i=next_index;i<= max_index;i++)
00333                                 output(i, 0);
00334                         System.out.print("\n");
00335                 }
00336                 if (new_num_nonzeros > num_nonzeros)
00337                         System.err.print(
00338                          "WARNING: original #nonzeros " + num_nonzeros+"\n"
00339                         +"         new      #nonzeros " + new_num_nonzeros+"\n"
00340                         +"Use -l 0 if many original feature values are zeros\n");
00341 
00342                 fp.close();
00343         }
00344 
00345         public static void main(String argv[]) throws IOException
00346         {
00347                 svm_scale s = new svm_scale();
00348                 s.run(argv);
00349         }
00350 }


ml_classifiers
Author(s): Scott Niekum
autogenerated on Fri Jan 3 2014 11:30:23