tabletop_pushing: rbf_control.py Source File

Go to the documentation of this file.
00001 import roslib; roslib.load_manifest('tabletop_pushing')
00002 import rospy
00003 import numpy as np
00004 
00005 class RBFController:
00006     '''
00007     Class to implement an RBF network controller based on PILCO learning for use in control of the robot.
00008     '''
00009 
00010     def feedbackControl(self, X):
00011         inp = np.zeros((self.N,self.D))
00012         for r in xrange(self.N):
00013             inp[r,:] = self.X_pi.T[r,:]-X.T
00014 
00015         k = None
00016         # Compute predicted mean
00017         U = np.zeros((self.E,1))
00018         for i in xrange(self.E):
00019             Lambda = self.lambdas[i] # NOTE: iL
00020             sf2 = self.sf2s[i] # NOTE: c
00021             t = np.matrix(inp)*Lambda # NOTE: in & t
00022             tb = np.sum(np.multiply(t,t),1)/2
00023             l = np.exp(-tb)
00024             lb = np.reshape(np.multiply(l.squeeze(),self.beta[:,i]),(self.N,1))
00025             U[i] = sf2*np.sum(lb)
00026             k_i = 2*self.Hyp[i,self.D]-tb;
00027             if k is None:
00028                 k = k_i
00029             else:
00030                 k = np.hstack([k, k_i])
00031 
00032         # Compute predicted covariance
00033         S = np.zeros((self.E, self.E))
00034         for i in xrange(self.E):
00035             ii = inp/np.exp(2.*self.Hyp[i,:self.D])
00036             for j in xrange(i+1):
00037                 R = np.eye(self.D)
00038                 ij = inp/np.exp(2.*self.Hyp[j,:self.D])
00039                 L = np.asmatrix(np.exp(k[:,i]+k[:,j].T))
00040                 S[i,j] = np.asmatrix(self.beta[:,i])*L*np.asmatrix(self.beta[:,j]).T
00041                 S[j,i] = S[i,j]
00042             S[i,i] += 1e-6
00043         S = S - U*U.T
00044 
00045         # Keep values in range [-max_U, max_U]
00046         if self.max_U is not None:
00047             F = self.D+self.E
00048             j = range(self.D, F)
00049             M = np.vstack([X, U])
00050             V = np.zeros((F,F))
00051             V[np.ix_(j,j)] = S
00052             return self.gaussian_saturate(M, V, j, self.max_U)
00053         return U
00054 
00055     def computeBetaPi(self):
00056         '''
00057         Compute the RBF network weights in GP form
00058         '''
00059         self.beta = np.zeros((self.N, self.E))
00060         self.lambdas = []
00061         self.sf2s = []
00062         for i in xrange(self.E):
00063             Lambda = np.diag(np.exp(-1*self.Hyp[i,:self.D])) # Length scale
00064             sf2 = np.exp(2*self.Hyp[i,self.D]) # signal variance
00065             sigmaBeta2 = np.exp(2*self.Hyp[i,self.D+1]) # kernel noise
00066 
00067 
00068             inp = self.X_pi.T / np.exp(self.Hyp[i,:self.D]).T
00069             KX = np.exp(2*self.Hyp[i,self.D]-self.maha(inp, inp)/2.)
00070 
00071             L = np.linalg.cholesky(KX+sigmaBeta2*np.eye(self.N))
00072             betai = np.linalg.solve(L.T, np.linalg.solve(L, self.Y_pi[:,i]))
00073             self.lambdas.append(Lambda)
00074             self.sf2s.append(sf2)
00075             self.beta[:,i] = betai.squeeze()
00076 
00077     def squaredExponentialKernelARD(self, x, c, ell, sf2):
00078         '''
00079         Squared exponential with automatic relavence determination pre-computed parameters 'self.ell_diag'
00080         '''
00081         return sf2*np.exp(-self.squaredDist(ell*x, ell*c)/2)
00082 
00083     def squaredExponentialKernel(self, x, c, sf2):
00084         '''
00085         Squared exponential without the length scale hyperparemeters
00086         '''
00087         return sf2*np.exp(-self.squaredDist(x,c)/2)
00088 
00089     def squaredDist(self, x1, x2):
00090         '''
00091         Get the squared distance between two vectors
00092         TODO: Make this work for full matrices
00093         '''
00094         xx = x1-x2
00095         xx = np.multiply(xx,xx)
00096         xx = np.sum(xx)
00097         return xx
00098 
00099     def maha(self, a, b):
00100         A = np.sum(np.multiply(a,a),1)
00101         B = np.sum(np.multiply(b,b),1)
00102         C = -2*a*b.T
00103         return A + B.T + C
00104 
00105     def gaussian_saturate(self, m, v, i, e):
00106         m = np.asmatrix(m)
00107         d = len(m)
00108         I = len(i)
00109         i = np.hstack([i, np.asarray(i)+d])
00110         P = np.asmatrix(np.vstack([np.eye(d), 3.*np.eye(d)]))
00111         m = P*m
00112         e = np.asmatrix(np.hstack([9.*e, e])/8).T
00113         va = P*v*P.T
00114         va = (va+va.T)/2.
00115         vi = va[i][:,i]
00116         vii = np.asmatrix(np.diag(vi)).T
00117         mi = m[i,:]
00118 
00119         # Get mean
00120         M2 = np.multiply(np.multiply(e, np.exp(-vii/2)), np.sin(mi));
00121         # Combine back to correct dimensions
00122         P = np.asmatrix(np.hstack([np.eye(I), np.eye(I)]))
00123         return P*M2
00124 
00125 #
00126 # I/O Functions
00127 #
00128     def loadRBFController(self, controller_path):
00129         '''
00130         Read controller data from disk that was learned by PILCO
00131         '''
00132         M_LINE = 0
00133         N_LINE = 1
00134         E_LINE = 2
00135         D_LINE = 3
00136         MAX_U_LINE = 4
00137         HYP_LINE = 5
00138         TARGET_LINE = 6
00139 
00140         rospy.logwarn('controller_path:'+controller_path)
00141         controller_file = file(controller_path,'r')
00142         data_in = controller_file.readlines()
00143         controller_file.close()
00144         M = int(data_in[M_LINE].split()[1])
00145         N = int(data_in[N_LINE].split()[1])
00146         E = int(data_in[E_LINE].split()[1]) # Length of policy output
00147         D = int(data_in[D_LINE].split()[1]) # Length of policy input
00148         self.max_U = np.asarray([float(u) for u in data_in[MAX_U_LINE].split()[1:]])
00149         Hyp = np.asarray([float(h) for h in data_in[HYP_LINE].split()])
00150         self.Hyp = np.reshape(Hyp, (E, D+2))
00151         data_in = data_in[TARGET_LINE:]
00152         Y_pi = np.asmatrix([d.split() for d in data_in[:N]],'float')
00153         X_pi = np.asmatrix([d.split() for d in data_in[N:]],'float').T
00154         self.X_pi = X_pi
00155         self.Y_pi = Y_pi
00156         self.N = N
00157         self.D = D
00158         self.E = E
00159         # Precompute stuff
00160         self.computeBetaPi()