mpc.cpp
Go to the documentation of this file.
00001 #include <mpc/mpc.h>
00002 
00003 #include <Eigen/Core>
00004 #include <Eigen/Eigenvalues>
00005 #include <algorithm>
00006 #include <cmath>
00007 
00008 namespace MPC {
00009 
00010         bool DDP_init(Dynamics dyn, DynamicsD dynd, CostFunction cost, CostFunctionD costd) {
00011                 bool res = true;
00012                 res &= setDynamicFunction(dyn, dynd);
00013                 res &= setCostFunction(cost, costd);
00014                 return res;
00015         }
00016         
00017         int forwardPass(const VectorT &x0, const MatrixT &u, const LocalValueFunction &v, const Trajectory &in, Trajectory &out, float alpha) {
00018      
00019                 float tolDiv = 1e6;
00020                 int n = x0.rows();
00021                 int m = u.rows();
00022                 int N = u.cols();
00023 
00024                 out.x.setZero(n, N+1);
00025                 out.x.col(0) = x0;
00026                 out.u.setZero(m, N);
00027                 out.cost.setZero(N+1);
00028 
00029                 for (int i = 0; i < N; ++i) {
00030                         VectorT dx = out.x.col(i) - in.x.col(i);
00031                         out.u.col(i) = u.col(i) + alpha * v.l.col(i) + v.L[i] * dx;
00032 
00033                         dynamicsAndCost(out.x.col(i), out.u.col(i), out.x.col(i+1), out.cost(i));
00034 
00035                         if (out.x.col(i+1).array().abs().maxCoeff() > tolDiv) {
00036                                 return 1;
00037                         }
00038                 }
00039 
00040                 out.cost(N) = dynamicsAndCost(out.x.col(N), VectorT::Zero(m));
00041                 return 0;
00042         }
00043 
00044         MatrixT weightedMatrixAvg(const VectorT &a, const std::vector<MatrixT> &B) {
00045                 assert(B.size() > 1);
00046                 assert(a.size() == int(B.size()));
00047                 int n = B[0].rows();
00048                 int m = B[0].cols();
00049                 MatrixT res = MatrixT::Zero(n, m);
00050                 for (size_t i = 0; i < B.size(); ++i) {
00051                         res += a[i] * B[i];
00052                 }
00053                 return res;
00054         }
00055 
00056 
00057         int backwardPass(const TrajectoryDeriv &td, LocalValueFunction &v, float lam) {
00058                 // get dimensions
00059                 int n = td[0].cx.rows();
00060                 int N = td.size() - 1;
00061                 int m = td[0].cu.rows();
00062                 
00063                 // init the local value function
00064                 v.dV.setZero(2);
00065                 v.l.setZero(m,N);
00066                 v.Vx.setZero(n,N+1);
00067                
00068                 assert(v.L.size() == N);
00069                 assert(v.Vxx.size() == N+1);
00070                 for (int k = 0; k < N; ++k) {
00071                         v.L[k].setZero(m,n);
00072                         v.Vxx[k].setZero(n,n);
00073                         //v.Vxx.push_back(MatrixT::Zero(n,n));
00074                 }
00075 
00076                 // copy last entry
00077                 v.Vx.col(N) = td[N].cx; 
00078                 v.Vxx[N] = td[N].cxx;
00079                 
00080                 int diverge=-1;
00081 
00082                 // traverse time backwards
00083                 for (int k = N-1; k >= 0; --k) {
00084                         /* TODO: remove debug
00085                            LOG(td[k].cu.rows() << "x" << td[k].cu.cols());
00086                            LOG(v.Vx.col(k+1).rows() << "x" << v.Vx.col(k+1).cols());
00087                            LOG(td[k].fu.rows() << "x" << td[k].fu.cols());
00088                            VectorT t = (v.Vx.col(k+1).transpose() *  td[k].fu);
00089                            LOG("t " << t.rows() << "x" << t.cols());
00090                            t += td[k].cu;
00091                            LOG("t " << t.rows() << "x" << t.cols());
00092                         */
00093                         VectorT Qu = (v.Vx.col(k+1).transpose() * td[k].fu);
00094                         Qu += td[k].cu;
00095                         VectorT Qx = (v.Vx.col(k+1).transpose() * td[k].fx);
00096                         Qx += td[k].cx;
00097                         MatrixT Qxu = td[k].cxu + td[k].fx.transpose() * (v.Vxx[k+1].transpose() * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxu);
00098                         
00099                         MatrixT Quu = td[k].cuu + td[k].fu.transpose() * (v.Vxx[k+1].transpose() * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fuu);
00100                         MatrixT Qxx = td[k].cxx + td[k].fx.transpose() * (v.Vxx[k+1].transpose() * td[k].fx) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxx);
00101                         MatrixT VxxF = v.Vxx[k+1] + lam * MatrixT::Identity(n,n);
00102                         MatrixT QxuF = td[k].cxu + td[k].fx.transpose() * (VxxF * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxu);
00103                         MatrixT QuuF = td[k].cuu + td[k].fu.transpose() * (VxxF * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fuu);
00104 
00105 
00106                         Eigen::EigenSolver<MatrixT> eigen_solver(QuuF);
00107                         Eigen::VectorXcf eigs = eigen_solver.eigenvalues();
00108                                 
00109                         if (eigs.imag().maxCoeff() != 0.) {
00110                                 diverge = k;
00111                                 break;
00112                         }
00113                                         
00114                         MatrixT Quu_inv = QuuF.inverse();
00115                         v.l.col(k) = -1. * Quu_inv * Qu;
00116                                 
00117                         v.L[k] = -1. * (Quu_inv * QxuF.transpose());
00118 
00119                         // TODO: check whether dot prod or not
00120                         v.dV(0) = v.dV(0) + v.l.col(k).dot(Qu);
00121 
00122                         v.dV(1) = v.dV(1) + 0.5 * (v.l.col(k).dot(Quu * v.l.col(k)));
00123 
00124                         v.Vx.col(k) = Qx + v.L[k].transpose() * Qu + Qxu * v.l.col(k) + v.L[k].transpose() * (Quu * v.l.col(k));
00125 
00126                         v.Vxx[k] = Qxx + v.L[k].transpose() * Qxu.transpose() + Qxu * v.L[k] + v.L[k].transpose()* (Quu * v.L[k]);
00127                  
00128                         v.Vxx[k] = 0.5 * (v.Vxx[k] + v.Vxx[k].transpose());                                                   
00129                 }  
00130 
00131                 return diverge;
00132         }
00133 
00134 
00135         bool update_lam_dlam(float &lam, float &dlam, const DDPParams &params) {
00136                 dlam = std::max(dlam * params.lambdaFactor, params.lambdaFactor);
00137                 lam = std::max(lam * dlam, params.lambdaMin);
00138                 if (lam > params.lambdaMax) {
00139                         LOG("reached lambdaMax curr: " << lam << " max " << params.lambdaMax);
00140                         return true;
00141                 }
00142                 return false;
00143         }
00144 
00145         void DDP(const VectorT &x0, const MatrixT &u0, const DDPParams &params, const int maxIter, Trajectory &curr_traj) {
00146                 bool DEBUG = false;
00147                 float lam = params.lamInit;
00148                 float dlam = params.dlamInit;
00149                 MatrixT grad_norm_tmp;
00150                 
00151                 LocalValueFunction v;
00152                 Trajectory next_traj;
00153                 TrajectoryDeriv deriv;
00154 
00155                 int n = x0.size();
00156                 int m = u0.rows();
00157                 int N = u0.cols();
00158 
00159                 // setup trajectory and v
00160                 curr_traj.x.setZero(n, N);
00161                 curr_traj.u.setZero(m, N);
00162                 curr_traj.cost.setZero(N);
00163 
00164                 next_traj.x.setZero(n, N);
00165                 next_traj.u.setZero(m, N);
00166                 next_traj.cost.setZero(N);
00167 
00168                 v.Vx.setZero(n, N);
00169                 v.dV.setZero(2);
00170                 v.l.setZero(m, N);
00171                 v.Vxx.resize(N+1);
00172                 v.L.resize(N);
00173 
00174                 grad_norm_tmp.setZero(m, N);
00175 
00176                 for (int k = 0; k < N; ++k) {
00177                         v.L[k].setZero(m,n);
00178                         v.Vxx[k].setZero(n,n);
00179                         //v.Vxx.push_back(MatrixT::Zero(n,n));
00180                 }
00181                 v.Vxx[N].setZero(n,n);
00182 
00183                 // first do a forward simulation of the initial trajectory
00184                 int diverge = forwardPass(x0, u0, v, curr_traj, curr_traj, 1.);
00185 
00186                 for (int i = 0; i < maxIter; ++i) {
00187                         // first step: differentiate dynamics and cost along the current trajectory
00188                         deriv.resize(curr_traj.x.cols());
00189                         dynamicsAndCostDerivative(curr_traj, deriv);
00190 
00191                         // second step: backward pass computing optimal control law and cost
00192                         while (true) {
00193                                 diverge = backwardPass(deriv, v, lam);
00194                                 if (diverge > -1) {
00195                                         LOG("increasing lambda diverging in step: " << diverge);
00196                                         if (update_lam_dlam(lam, dlam, params))
00197                                                 break;
00198                                 } else {
00199                                         // DONE yehaw :)
00200                                         break;
00201                                 }
00202                         }
00203 
00204            
00205                         grad_norm_tmp.array() = v.l.array().abs() / (curr_traj.u.array().abs() + 1.);
00206                         float g_norm = grad_norm_tmp.colwise().maxCoeff().mean();
00207                         if (g_norm < params.tolGrad) {
00208                                 LOG("Sucess gradient norm converged!");
00209                                 break;
00210                         }
00211                         // third step: forward pass again to find new policy and cost
00212                         float alpha = 1.;
00213                         while (true) {
00214                                 diverge = forwardPass(x0, curr_traj.u, v, curr_traj, next_traj, alpha);
00215                                 /* TODO remove debug only
00216                                 LOG("u0 " << u0);
00217                                 LOG("u " << curr_traj.u.transpose());
00218                                 LOG("ccost " << curr_traj.cost.transpose());
00219                                 LOG("ccost " << curr_traj.cost.sum());
00220                                 LOG("ncost " << next_traj.cost.sum());
00221                                 */
00222                                 float dcost = curr_traj.cost.sum() - next_traj.cost.sum() - 1e10 * diverge;
00223                                 float expected = -alpha * (v.dV(0) + alpha * v.dV(1));
00224                                 if (std::fabs(expected) < 1e-8) {
00225                                         float sign = (expected > 0.) ? 1. : -1.; 
00226                                         expected =  sign * 1e-8;
00227                                 }
00228 
00229                                 float z = 1.;
00230                                 if (std::fabs(dcost - expected) > 1e-4)
00231                                         z = dcost / expected;
00232                                 LOG("iteration " << i << " cost expected ratio: dcost = " << dcost << " expected = " << expected << " z = " << z);
00233                                 if (params.zMin > z) {
00234                                         alpha = alpha / 2.;
00235                                         if (alpha < 1e-4) {
00236                                                 update_lam_dlam(lam, dlam, params);
00237                                                 break;
00238                                         } else {
00239                                                 continue;
00240                                         }
00241                                 } else {
00242                                         curr_traj = next_traj;
00243                                         dlam = std::min(dlam / params.lambdaFactor, 1.f / params.lambdaFactor);
00244                                         lam  = lam * dlam * (lam > params.lambdaMin);
00245                                         break;
00246                                 }
00247                         }
00248                         if (lam > params.lambdaMax) {
00249                                 LOG("reached lambdaMax!");
00250                                 break;
00251                         }
00252                         if (DEBUG) { 
00253                                 // TODO: possibly add some debug output here
00254                         }
00255                 }
00256                 
00257         }
00258 }
00259 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Friends Defines


mpc
Author(s): Jost Tobias Springenberg
autogenerated on Wed Dec 26 2012 16:21:14