Go to the documentation of this file.00001 import libsvm.*;
00002 import java.io.*;
00003 import java.util.*;
00004 import java.text.DecimalFormat;
00005
00006 class svm_scale
00007 {
00008 private String line = null;
00009 private double lower = -1.0;
00010 private double upper = 1.0;
00011 private double y_lower;
00012 private double y_upper;
00013 private boolean y_scaling = false;
00014 private double[] feature_max;
00015 private double[] feature_min;
00016 private double y_max = -Double.MAX_VALUE;
00017 private double y_min = Double.MAX_VALUE;
00018 private int max_index;
00019 private long num_nonzeros = 0;
00020 private long new_num_nonzeros = 0;
00021
00022 private static void exit_with_help()
00023 {
00024 System.out.print(
00025 "Usage: svm-scale [options] data_filename\n"
00026 +"options:\n"
00027 +"-l lower : x scaling lower limit (default -1)\n"
00028 +"-u upper : x scaling upper limit (default +1)\n"
00029 +"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
00030 +"-s save_filename : save scaling parameters to save_filename\n"
00031 +"-r restore_filename : restore scaling parameters from restore_filename\n"
00032 );
00033 System.exit(1);
00034 }
00035
00036 private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
00037 {
00038 fp.close();
00039 return new BufferedReader(new FileReader(filename));
00040 }
00041
00042 private void output_target(double value)
00043 {
00044 if(y_scaling)
00045 {
00046 if(value == y_min)
00047 value = y_lower;
00048 else if(value == y_max)
00049 value = y_upper;
00050 else
00051 value = y_lower + (y_upper-y_lower) *
00052 (value-y_min) / (y_max-y_min);
00053 }
00054
00055 System.out.print(value + " ");
00056 }
00057
00058 private void output(int index, double value)
00059 {
00060
00061 if(feature_max[index] == feature_min[index])
00062 return;
00063
00064 if(value == feature_min[index])
00065 value = lower;
00066 else if(value == feature_max[index])
00067 value = upper;
00068 else
00069 value = lower + (upper-lower) *
00070 (value-feature_min[index])/
00071 (feature_max[index]-feature_min[index]);
00072
00073 if(value != 0)
00074 {
00075 System.out.print(index + ":" + value + " ");
00076 new_num_nonzeros++;
00077 }
00078 }
00079
00080 private String readline(BufferedReader fp) throws IOException
00081 {
00082 line = fp.readLine();
00083 return line;
00084 }
00085
00086 private void run(String []argv) throws IOException
00087 {
00088 int i,index;
00089 BufferedReader fp = null, fp_restore = null;
00090 String save_filename = null;
00091 String restore_filename = null;
00092 String data_filename = null;
00093
00094
00095 for(i=0;i<argv.length;i++)
00096 {
00097 if (argv[i].charAt(0) != '-') break;
00098 ++i;
00099 switch(argv[i-1].charAt(1))
00100 {
00101 case 'l': lower = Double.parseDouble(argv[i]); break;
00102 case 'u': upper = Double.parseDouble(argv[i]); break;
00103 case 'y':
00104 y_lower = Double.parseDouble(argv[i]);
00105 ++i;
00106 y_upper = Double.parseDouble(argv[i]);
00107 y_scaling = true;
00108 break;
00109 case 's': save_filename = argv[i]; break;
00110 case 'r': restore_filename = argv[i]; break;
00111 default:
00112 System.err.println("unknown option");
00113 exit_with_help();
00114 }
00115 }
00116
00117 if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
00118 {
00119 System.err.println("inconsistent lower/upper specification");
00120 System.exit(1);
00121 }
00122 if(restore_filename != null && save_filename != null)
00123 {
00124 System.err.println("cannot use -r and -s simultaneously");
00125 System.exit(1);
00126 }
00127
00128 if(argv.length != i+1)
00129 exit_with_help();
00130
00131 data_filename = argv[i];
00132 try {
00133 fp = new BufferedReader(new FileReader(data_filename));
00134 } catch (Exception e) {
00135 System.err.println("can't open file " + data_filename);
00136 System.exit(1);
00137 }
00138
00139
00140
00141 max_index = 0;
00142
00143 if(restore_filename != null)
00144 {
00145 int idx, c;
00146
00147 try {
00148 fp_restore = new BufferedReader(new FileReader(restore_filename));
00149 }
00150 catch (Exception e) {
00151 System.err.println("can't open file " + restore_filename);
00152 System.exit(1);
00153 }
00154 if((c = fp_restore.read()) == 'y')
00155 {
00156 fp_restore.readLine();
00157 fp_restore.readLine();
00158 fp_restore.readLine();
00159 }
00160 fp_restore.readLine();
00161 fp_restore.readLine();
00162
00163 String restore_line = null;
00164 while((restore_line = fp_restore.readLine())!=null)
00165 {
00166 StringTokenizer st2 = new StringTokenizer(restore_line);
00167 idx = Integer.parseInt(st2.nextToken());
00168 max_index = Math.max(max_index, idx);
00169 }
00170 fp_restore = rewind(fp_restore, restore_filename);
00171 }
00172
00173 while (readline(fp) != null)
00174 {
00175 StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00176 st.nextToken();
00177 while(st.hasMoreTokens())
00178 {
00179 index = Integer.parseInt(st.nextToken());
00180 max_index = Math.max(max_index, index);
00181 st.nextToken();
00182 num_nonzeros++;
00183 }
00184 }
00185
00186 try {
00187 feature_max = new double[(max_index+1)];
00188 feature_min = new double[(max_index+1)];
00189 } catch(OutOfMemoryError e) {
00190 System.err.println("can't allocate enough memory");
00191 System.exit(1);
00192 }
00193
00194 for(i=0;i<=max_index;i++)
00195 {
00196 feature_max[i] = -Double.MAX_VALUE;
00197 feature_min[i] = Double.MAX_VALUE;
00198 }
00199
00200 fp = rewind(fp, data_filename);
00201
00202
00203 while(readline(fp) != null)
00204 {
00205 int next_index = 1;
00206 double target;
00207 double value;
00208
00209 StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00210 target = Double.parseDouble(st.nextToken());
00211 y_max = Math.max(y_max, target);
00212 y_min = Math.min(y_min, target);
00213
00214 while (st.hasMoreTokens())
00215 {
00216 index = Integer.parseInt(st.nextToken());
00217 value = Double.parseDouble(st.nextToken());
00218
00219 for (i = next_index; i<index; i++)
00220 {
00221 feature_max[i] = Math.max(feature_max[i], 0);
00222 feature_min[i] = Math.min(feature_min[i], 0);
00223 }
00224
00225 feature_max[index] = Math.max(feature_max[index], value);
00226 feature_min[index] = Math.min(feature_min[index], value);
00227 next_index = index + 1;
00228 }
00229
00230 for(i=next_index;i<=max_index;i++)
00231 {
00232 feature_max[i] = Math.max(feature_max[i], 0);
00233 feature_min[i] = Math.min(feature_min[i], 0);
00234 }
00235 }
00236
00237 fp = rewind(fp, data_filename);
00238
00239
00240 if(restore_filename != null)
00241 {
00242
00243 int idx, c;
00244 double fmin, fmax;
00245
00246 fp_restore.mark(2);
00247 if((c = fp_restore.read()) == 'y')
00248 {
00249 fp_restore.readLine();
00250 StringTokenizer st = new StringTokenizer(fp_restore.readLine());
00251 y_lower = Double.parseDouble(st.nextToken());
00252 y_upper = Double.parseDouble(st.nextToken());
00253 st = new StringTokenizer(fp_restore.readLine());
00254 y_min = Double.parseDouble(st.nextToken());
00255 y_max = Double.parseDouble(st.nextToken());
00256 y_scaling = true;
00257 }
00258 else
00259 fp_restore.reset();
00260
00261 if(fp_restore.read() == 'x') {
00262 fp_restore.readLine();
00263 StringTokenizer st = new StringTokenizer(fp_restore.readLine());
00264 lower = Double.parseDouble(st.nextToken());
00265 upper = Double.parseDouble(st.nextToken());
00266 String restore_line = null;
00267 while((restore_line = fp_restore.readLine())!=null)
00268 {
00269 StringTokenizer st2 = new StringTokenizer(restore_line);
00270 idx = Integer.parseInt(st2.nextToken());
00271 fmin = Double.parseDouble(st2.nextToken());
00272 fmax = Double.parseDouble(st2.nextToken());
00273 if (idx <= max_index)
00274 {
00275 feature_min[idx] = fmin;
00276 feature_max[idx] = fmax;
00277 }
00278 }
00279 }
00280 fp_restore.close();
00281 }
00282
00283 if(save_filename != null)
00284 {
00285 Formatter formatter = new Formatter(new StringBuilder());
00286 BufferedWriter fp_save = null;
00287
00288 try {
00289 fp_save = new BufferedWriter(new FileWriter(save_filename));
00290 } catch(IOException e) {
00291 System.err.println("can't open file " + save_filename);
00292 System.exit(1);
00293 }
00294
00295 if(y_scaling)
00296 {
00297 formatter.format("y\n");
00298 formatter.format("%.16g %.16g\n", y_lower, y_upper);
00299 formatter.format("%.16g %.16g\n", y_min, y_max);
00300 }
00301 formatter.format("x\n");
00302 formatter.format("%.16g %.16g\n", lower, upper);
00303 for(i=1;i<=max_index;i++)
00304 {
00305 if(feature_min[i] != feature_max[i])
00306 formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
00307 }
00308 fp_save.write(formatter.toString());
00309 fp_save.close();
00310 }
00311
00312
00313 while(readline(fp) != null)
00314 {
00315 int next_index = 1;
00316 double target;
00317 double value;
00318
00319 StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
00320 target = Double.parseDouble(st.nextToken());
00321 output_target(target);
00322 while(st.hasMoreElements())
00323 {
00324 index = Integer.parseInt(st.nextToken());
00325 value = Double.parseDouble(st.nextToken());
00326 for (i = next_index; i<index; i++)
00327 output(i, 0);
00328 output(index, value);
00329 next_index = index + 1;
00330 }
00331
00332 for(i=next_index;i<= max_index;i++)
00333 output(i, 0);
00334 System.out.print("\n");
00335 }
00336 if (new_num_nonzeros > num_nonzeros)
00337 System.err.print(
00338 "Warning: original #nonzeros " + num_nonzeros+"\n"
00339 +" new #nonzeros " + new_num_nonzeros+"\n"
00340 +"Use -l 0 if many original feature values are zeros\n");
00341
00342 fp.close();
00343 }
00344
00345 public static void main(String argv[]) throws IOException
00346 {
00347 svm_scale s = new svm_scale();
00348 s.run(argv);
00349 }
00350 }