Go to the documentation of this file.00001
00002 import roslib
00003 roslib.load_manifest('ml_lib')
00004
00005 import numpy as np
00006 import dataset as ds
00007 import hrl_lib.util as ut
00008
00009 def pca_gain_threshold(s, percentage_change_threshold=.15):
00010 if s.__class__ != np.ndarray:
00011 raise ValueError('Need ndarray as input.')
00012 shifted = np.concatenate((s[1:].copy(), np.array([s[-1]])), axis=1)
00013 diff = s - shifted
00014 percent_diff = diff / s
00015 positions = np.where(percent_diff < percentage_change_threshold)
00016 return positions[0][0]
00017
00018 def pca_variance_threshold(eigen_values, percent_variance=.9):
00019 eigen_sum = np.sum(eigen_values)
00020
00021 eigen_normed = np.cumsum(eigen_values) / eigen_sum
00022 positions = np.where(eigen_normed >= percent_variance)
00023 print 'pca_variance_threshold: percent_variance', percent_variance
00024
00025
00026
00027 if positions[0].shape[0] == 0:
00028 return eigen_normed.shape[0]-1
00029 else:
00030 return positions[0][0]
00031
00032 def pca(data):
00033 cov_data = np.cov(data)
00034 u, s, vh = np.linalg.svd(cov_data)
00035 return u,s,vh
00036
00037 def pca_eigenface_trick(data):
00038 u, s, vh = np.linalg.svd(data.T * data)
00039 orig_u = data * u
00040 orig_u = orig_u / ut.norm(orig_u)
00041 return orig_u, s, None
00042
00043 def pca_vectors(data, percent_variance):
00044
00045
00046 if data.shape[1] < data.shape[0]:
00047 print 'pca_vectors: using pca_eigenface_trick since number of data points is less than dim'
00048 u, s, _ = pca_eigenface_trick(data)
00049
00050
00051
00052 else:
00053 print 'pca_vectors: using normal PCA...'
00054 u, s, _ = pca(data)
00055
00056
00057
00058
00059 number_of_vectors = pca_variance_threshold(s, percent_variance=percent_variance)
00060 return np.matrix(u[:,0:number_of_vectors+1])
00061
00062 def randomized_vectors(dataset, number_of_vectors):
00063 rvectors = np.matrix(np.random.random_sample((dataset.num_attributes(), number_of_vectors))) * 2 - 1.0
00064 lengths = np.diag(1.0 / np.power(np.sum(np.power(rvectors, 2), axis=0), 0.5))
00065 return rvectors * lengths
00066
00067
00068
00069 class LinearDimReduceDataset(ds.Dataset):
00070
00071 def __init__(self, inputs, outputs):
00072 ds.Dataset.__init__(self, inputs, outputs)
00073 self.original_inputs = inputs
00074
00075
00076
00077 def reduce_input(self):
00078 self.original_inputs = self.inputs
00079 self.inputs = self.projection_basis.T * self.inputs
00080
00081
00082
00083 def set_projection_vectors(self, vec):
00084 self.projection_basis = vec
00085
00086
00087
00088
00089 def reduce(self, data_points):
00090 return self.projection_basis.T * data_points
00091
00092
00093
00094
00095 def pca_reduce(self, percent_variance):
00096 self.set_projection_vectors(pca_vectors(self.inputs, percent_variance))
00097
00098
00099
00100
00101 def randomized_vectors_reduce(self, number_of_vectors):
00102 self.set_projection_vectors(randomized_vectors(self.inputs, number_of_vectors))
00103
00104
00105
00106 def get_original_inputs(self):
00107 return self.original_inputs
00108
00109