svm_scale.java
Go to the documentation of this file.
1 import libsvm.*;
2 import java.io.*;
3 import java.util.*;
4 import java.text.DecimalFormat;
5 
6 class svm_scale
7 {
8  private String line = null;
9  private double lower = -1.0;
10  private double upper = 1.0;
11  private double y_lower;
12  private double y_upper;
13  private boolean y_scaling = false;
14  private double[] feature_max;
15  private double[] feature_min;
16  private double y_max = -Double.MAX_VALUE;
17  private double y_min = Double.MAX_VALUE;
18  private int max_index;
19  private long num_nonzeros = 0;
20  private long new_num_nonzeros = 0;
21 
22  private static void exit_with_help()
23  {
24  System.out.print(
25  "Usage: svm-scale [options] data_filename\n"
26  +"options:\n"
27  +"-l lower : x scaling lower limit (default -1)\n"
28  +"-u upper : x scaling upper limit (default +1)\n"
29  +"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
30  +"-s save_filename : save scaling parameters to save_filename\n"
31  +"-r restore_filename : restore scaling parameters from restore_filename\n"
32  );
33  System.exit(1);
34  }
35 
36  private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
37  {
38  fp.close();
39  return new BufferedReader(new FileReader(filename));
40  }
41 
42  private void output_target(double value)
43  {
44  if(y_scaling)
45  {
46  if(value == y_min)
47  value = y_lower;
48  else if(value == y_max)
49  value = y_upper;
50  else
51  value = y_lower + (y_upper-y_lower) *
52  (value-y_min) / (y_max-y_min);
53  }
54 
55  System.out.print(value + " ");
56  }
57 
58  private void output(int index, double value)
59  {
60  /* skip single-valued attribute */
61  if(feature_max[index] == feature_min[index])
62  return;
63 
64  if(value == feature_min[index])
65  value = lower;
66  else if(value == feature_max[index])
67  value = upper;
68  else
69  value = lower + (upper-lower) *
70  (value-feature_min[index])/
71  (feature_max[index]-feature_min[index]);
72 
73  if(value != 0)
74  {
75  System.out.print(index + ":" + value + " ");
76  new_num_nonzeros++;
77  }
78  }
79 
80  private String readline(BufferedReader fp) throws IOException
81  {
82  line = fp.readLine();
83  return line;
84  }
85 
86  private void run(String []argv) throws IOException
87  {
88  int i,index;
89  BufferedReader fp = null, fp_restore = null;
90  String save_filename = null;
91  String restore_filename = null;
92  String data_filename = null;
93 
94 
95  for(i=0;i<argv.length;i++)
96  {
97  if (argv[i].charAt(0) != '-') break;
98  ++i;
99  switch(argv[i-1].charAt(1))
100  {
101  case 'l': lower = Double.parseDouble(argv[i]); break;
102  case 'u': upper = Double.parseDouble(argv[i]); break;
103  case 'y':
104  y_lower = Double.parseDouble(argv[i]);
105  ++i;
106  y_upper = Double.parseDouble(argv[i]);
107  y_scaling = true;
108  break;
109  case 's': save_filename = argv[i]; break;
110  case 'r': restore_filename = argv[i]; break;
111  default:
112  System.err.println("unknown option");
113  exit_with_help();
114  }
115  }
116 
117  if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
118  {
119  System.err.println("inconsistent lower/upper specification");
120  System.exit(1);
121  }
122  if(restore_filename != null && save_filename != null)
123  {
124  System.err.println("cannot use -r and -s simultaneously");
125  System.exit(1);
126  }
127 
128  if(argv.length != i+1)
129  exit_with_help();
130 
131  data_filename = argv[i];
132  try {
133  fp = new BufferedReader(new FileReader(data_filename));
134  } catch (Exception e) {
135  System.err.println("can't open file " + data_filename);
136  System.exit(1);
137  }
138 
139  /* assumption: min index of attributes is 1 */
140  /* pass 1: find out max index of attributes */
141  max_index = 0;
142 
143  if(restore_filename != null)
144  {
145  int idx, c;
146 
147  try {
148  fp_restore = new BufferedReader(new FileReader(restore_filename));
149  }
150  catch (Exception e) {
151  System.err.println("can't open file " + restore_filename);
152  System.exit(1);
153  }
154  if((c = fp_restore.read()) == 'y')
155  {
156  fp_restore.readLine();
157  fp_restore.readLine();
158  fp_restore.readLine();
159  }
160  fp_restore.readLine();
161  fp_restore.readLine();
162 
163  String restore_line = null;
164  while((restore_line = fp_restore.readLine())!=null)
165  {
166  StringTokenizer st2 = new StringTokenizer(restore_line);
167  idx = Integer.parseInt(st2.nextToken());
168  max_index = Math.max(max_index, idx);
169  }
170  fp_restore = rewind(fp_restore, restore_filename);
171  }
172 
173  while (readline(fp) != null)
174  {
175  StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
176  st.nextToken();
177  while(st.hasMoreTokens())
178  {
179  index = Integer.parseInt(st.nextToken());
180  max_index = Math.max(max_index, index);
181  st.nextToken();
182  num_nonzeros++;
183  }
184  }
185 
186  try {
187  feature_max = new double[(max_index+1)];
188  feature_min = new double[(max_index+1)];
189  } catch(OutOfMemoryError e) {
190  System.err.println("can't allocate enough memory");
191  System.exit(1);
192  }
193 
194  for(i=0;i<=max_index;i++)
195  {
196  feature_max[i] = -Double.MAX_VALUE;
197  feature_min[i] = Double.MAX_VALUE;
198  }
199 
200  fp = rewind(fp, data_filename);
201 
202  /* pass 2: find out min/max value */
203  while(readline(fp) != null)
204  {
205  int next_index = 1;
206  double target;
207  double value;
208 
209  StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
210  target = Double.parseDouble(st.nextToken());
211  y_max = Math.max(y_max, target);
212  y_min = Math.min(y_min, target);
213 
214  while (st.hasMoreTokens())
215  {
216  index = Integer.parseInt(st.nextToken());
217  value = Double.parseDouble(st.nextToken());
218 
219  for (i = next_index; i<index; i++)
220  {
221  feature_max[i] = Math.max(feature_max[i], 0);
222  feature_min[i] = Math.min(feature_min[i], 0);
223  }
224 
225  feature_max[index] = Math.max(feature_max[index], value);
226  feature_min[index] = Math.min(feature_min[index], value);
227  next_index = index + 1;
228  }
229 
230  for(i=next_index;i<=max_index;i++)
231  {
232  feature_max[i] = Math.max(feature_max[i], 0);
233  feature_min[i] = Math.min(feature_min[i], 0);
234  }
235  }
236 
237  fp = rewind(fp, data_filename);
238 
239  /* pass 2.5: save/restore feature_min/feature_max */
240  if(restore_filename != null)
241  {
242  // fp_restore rewinded in finding max_index
243  int idx, c;
244  double fmin, fmax;
245 
246  fp_restore.mark(2); // for reset
247  if((c = fp_restore.read()) == 'y')
248  {
249  fp_restore.readLine(); // pass the '\n' after 'y'
250  StringTokenizer st = new StringTokenizer(fp_restore.readLine());
251  y_lower = Double.parseDouble(st.nextToken());
252  y_upper = Double.parseDouble(st.nextToken());
253  st = new StringTokenizer(fp_restore.readLine());
254  y_min = Double.parseDouble(st.nextToken());
255  y_max = Double.parseDouble(st.nextToken());
256  y_scaling = true;
257  }
258  else
259  fp_restore.reset();
260 
261  if(fp_restore.read() == 'x') {
262  fp_restore.readLine(); // pass the '\n' after 'x'
263  StringTokenizer st = new StringTokenizer(fp_restore.readLine());
264  lower = Double.parseDouble(st.nextToken());
265  upper = Double.parseDouble(st.nextToken());
266  String restore_line = null;
267  while((restore_line = fp_restore.readLine())!=null)
268  {
269  StringTokenizer st2 = new StringTokenizer(restore_line);
270  idx = Integer.parseInt(st2.nextToken());
271  fmin = Double.parseDouble(st2.nextToken());
272  fmax = Double.parseDouble(st2.nextToken());
273  if (idx <= max_index)
274  {
275  feature_min[idx] = fmin;
276  feature_max[idx] = fmax;
277  }
278  }
279  }
280  fp_restore.close();
281  }
282 
283  if(save_filename != null)
284  {
285  Formatter formatter = new Formatter(new StringBuilder());
286  BufferedWriter fp_save = null;
287 
288  try {
289  fp_save = new BufferedWriter(new FileWriter(save_filename));
290  } catch(IOException e) {
291  System.err.println("can't open file " + save_filename);
292  System.exit(1);
293  }
294 
295  if(y_scaling)
296  {
297  formatter.format("y\n");
298  formatter.format("%.16g %.16g\n", y_lower, y_upper);
299  formatter.format("%.16g %.16g\n", y_min, y_max);
300  }
301  formatter.format("x\n");
302  formatter.format("%.16g %.16g\n", lower, upper);
303  for(i=1;i<=max_index;i++)
304  {
305  if(feature_min[i] != feature_max[i])
306  formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
307  }
308  fp_save.write(formatter.toString());
309  fp_save.close();
310  }
311 
312  /* pass 3: scale */
313  while(readline(fp) != null)
314  {
315  int next_index = 1;
316  double target;
317  double value;
318 
319  StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
320  target = Double.parseDouble(st.nextToken());
321  output_target(target);
322  while(st.hasMoreElements())
323  {
324  index = Integer.parseInt(st.nextToken());
325  value = Double.parseDouble(st.nextToken());
326  for (i = next_index; i<index; i++)
327  output(i, 0);
328  output(index, value);
329  next_index = index + 1;
330  }
331 
332  for(i=next_index;i<= max_index;i++)
333  output(i, 0);
334  System.out.print("\n");
335  }
336  if (new_num_nonzeros > num_nonzeros)
337  System.err.print(
338  "WARNING: original #nonzeros " + num_nonzeros+"\n"
339  +" new #nonzeros " + new_num_nonzeros+"\n"
340  +"Use -l 0 if many original feature values are zeros\n");
341 
342  fp.close();
343  }
344 
345  public static void main(String argv[]) throws IOException
346  {
347  svm_scale s = new svm_scale();
348  s.run(argv);
349  }
350 }
long int new_num_nonzeros
Definition: svm-scale.c:32
void output_target(double value)
Definition: svm-scale.c:353
XmlRpcServer s
int y_scaling
Definition: svm-scale.c:24
void exit_with_help()
Definition: libsvmread.c:21
double y_min
Definition: svm-scale.c:28
Definition: svm.java:5
long int num_nonzeros
Definition: svm-scale.c:31
char * readline(FILE *input)
Definition: svm-scale.c:335
double y_lower
Definition: svm-scale.c:23
void output(int index, double value)
Definition: svm-scale.c:367
double * feature_min
Definition: svm-scale.c:26
int max_index
Definition: svm-scale.c:29
double * feature_max
Definition: svm-scale.c:25
double lower
Definition: svm-scale.c:23
double upper
Definition: svm-scale.c:23
char * line
Definition: svm-scale.c:21
double y_upper
Definition: svm-scale.c:23
c
Definition: easy.py:61
void run(ClassLoader *loader)
int main(int argc, char **argv)
double y_max
Definition: svm-scale.c:27


ml_classifiers
Author(s): Scott Niekum , Joshua Whitley
autogenerated on Tue May 14 2019 02:28:35