00001 #include <mpc/mpc.h>
00002
00003 #include <Eigen/Core>
00004 #include <Eigen/Eigenvalues>
00005 #include <algorithm>
00006 #include <cmath>
00007
00008 namespace MPC {
00009
00010 bool DDP_init(Dynamics dyn, DynamicsD dynd, CostFunction cost, CostFunctionD costd) {
00011 bool res = true;
00012 res &= setDynamicFunction(dyn, dynd);
00013 res &= setCostFunction(cost, costd);
00014 return res;
00015 }
00016
00017 int forwardPass(const VectorT &x0, const MatrixT &u, const LocalValueFunction &v, const Trajectory &in, Trajectory &out, float alpha) {
00018
00019 float tolDiv = 1e6;
00020 int n = x0.rows();
00021 int m = u.rows();
00022 int N = u.cols();
00023
00024 out.x.setZero(n, N+1);
00025 out.x.col(0) = x0;
00026 out.u.setZero(m, N);
00027 out.cost.setZero(N+1);
00028
00029 for (int i = 0; i < N; ++i) {
00030 VectorT dx = out.x.col(i) - in.x.col(i);
00031 out.u.col(i) = u.col(i) + alpha * v.l.col(i) + v.L[i] * dx;
00032
00033 dynamicsAndCost(out.x.col(i), out.u.col(i), out.x.col(i+1), out.cost(i));
00034
00035 if (out.x.col(i+1).array().abs().maxCoeff() > tolDiv) {
00036 return 1;
00037 }
00038 }
00039
00040 out.cost(N) = dynamicsAndCost(out.x.col(N), VectorT::Zero(m));
00041 return 0;
00042 }
00043
00044 MatrixT weightedMatrixAvg(const VectorT &a, const std::vector<MatrixT> &B) {
00045 assert(B.size() > 1);
00046 assert(a.size() == int(B.size()));
00047 int n = B[0].rows();
00048 int m = B[0].cols();
00049 MatrixT res = MatrixT::Zero(n, m);
00050 for (size_t i = 0; i < B.size(); ++i) {
00051 res += a[i] * B[i];
00052 }
00053 return res;
00054 }
00055
00056
00057 int backwardPass(const TrajectoryDeriv &td, LocalValueFunction &v, float lam) {
00058
00059 int n = td[0].cx.rows();
00060 int N = td.size() - 1;
00061 int m = td[0].cu.rows();
00062
00063
00064 v.dV.setZero(2);
00065 v.l.setZero(m,N);
00066 v.Vx.setZero(n,N+1);
00067
00068 assert(v.L.size() == N);
00069 assert(v.Vxx.size() == N+1);
00070 for (int k = 0; k < N; ++k) {
00071 v.L[k].setZero(m,n);
00072 v.Vxx[k].setZero(n,n);
00073
00074 }
00075
00076
00077 v.Vx.col(N) = td[N].cx;
00078 v.Vxx[N] = td[N].cxx;
00079
00080 int diverge=-1;
00081
00082
00083 for (int k = N-1; k >= 0; --k) {
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 VectorT Qu = (v.Vx.col(k+1).transpose() * td[k].fu);
00094 Qu += td[k].cu;
00095 VectorT Qx = (v.Vx.col(k+1).transpose() * td[k].fx);
00096 Qx += td[k].cx;
00097 MatrixT Qxu = td[k].cxu + td[k].fx.transpose() * (v.Vxx[k+1].transpose() * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxu);
00098
00099 MatrixT Quu = td[k].cuu + td[k].fu.transpose() * (v.Vxx[k+1].transpose() * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fuu);
00100 MatrixT Qxx = td[k].cxx + td[k].fx.transpose() * (v.Vxx[k+1].transpose() * td[k].fx) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxx);
00101 MatrixT VxxF = v.Vxx[k+1] + lam * MatrixT::Identity(n,n);
00102 MatrixT QxuF = td[k].cxu + td[k].fx.transpose() * (VxxF * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fxu);
00103 MatrixT QuuF = td[k].cuu + td[k].fu.transpose() * (VxxF * td[k].fu) + weightedMatrixAvg(v.Vx.col(k+1), td[k].fuu);
00104
00105
00106 Eigen::EigenSolver<MatrixT> eigen_solver(QuuF);
00107 Eigen::VectorXcf eigs = eigen_solver.eigenvalues();
00108
00109 if (eigs.imag().maxCoeff() != 0.) {
00110 diverge = k;
00111 break;
00112 }
00113
00114 MatrixT Quu_inv = QuuF.inverse();
00115 v.l.col(k) = -1. * Quu_inv * Qu;
00116
00117 v.L[k] = -1. * (Quu_inv * QxuF.transpose());
00118
00119
00120 v.dV(0) = v.dV(0) + v.l.col(k).dot(Qu);
00121
00122 v.dV(1) = v.dV(1) + 0.5 * (v.l.col(k).dot(Quu * v.l.col(k)));
00123
00124 v.Vx.col(k) = Qx + v.L[k].transpose() * Qu + Qxu * v.l.col(k) + v.L[k].transpose() * (Quu * v.l.col(k));
00125
00126 v.Vxx[k] = Qxx + v.L[k].transpose() * Qxu.transpose() + Qxu * v.L[k] + v.L[k].transpose()* (Quu * v.L[k]);
00127
00128 v.Vxx[k] = 0.5 * (v.Vxx[k] + v.Vxx[k].transpose());
00129 }
00130
00131 return diverge;
00132 }
00133
00134
00135 bool update_lam_dlam(float &lam, float &dlam, const DDPParams ¶ms) {
00136 dlam = std::max(dlam * params.lambdaFactor, params.lambdaFactor);
00137 lam = std::max(lam * dlam, params.lambdaMin);
00138 if (lam > params.lambdaMax) {
00139 LOG("reached lambdaMax curr: " << lam << " max " << params.lambdaMax);
00140 return true;
00141 }
00142 return false;
00143 }
00144
00145 void DDP(const VectorT &x0, const MatrixT &u0, const DDPParams ¶ms, const int maxIter, Trajectory &curr_traj) {
00146 bool DEBUG = false;
00147 float lam = params.lamInit;
00148 float dlam = params.dlamInit;
00149 MatrixT grad_norm_tmp;
00150
00151 LocalValueFunction v;
00152 Trajectory next_traj;
00153 TrajectoryDeriv deriv;
00154
00155 int n = x0.size();
00156 int m = u0.rows();
00157 int N = u0.cols();
00158
00159
00160 curr_traj.x.setZero(n, N);
00161 curr_traj.u.setZero(m, N);
00162 curr_traj.cost.setZero(N);
00163
00164 next_traj.x.setZero(n, N);
00165 next_traj.u.setZero(m, N);
00166 next_traj.cost.setZero(N);
00167
00168 v.Vx.setZero(n, N);
00169 v.dV.setZero(2);
00170 v.l.setZero(m, N);
00171 v.Vxx.resize(N+1);
00172 v.L.resize(N);
00173
00174 grad_norm_tmp.setZero(m, N);
00175
00176 for (int k = 0; k < N; ++k) {
00177 v.L[k].setZero(m,n);
00178 v.Vxx[k].setZero(n,n);
00179
00180 }
00181 v.Vxx[N].setZero(n,n);
00182
00183
00184 int diverge = forwardPass(x0, u0, v, curr_traj, curr_traj, 1.);
00185
00186 for (int i = 0; i < maxIter; ++i) {
00187
00188 deriv.resize(curr_traj.x.cols());
00189 dynamicsAndCostDerivative(curr_traj, deriv);
00190
00191
00192 while (true) {
00193 diverge = backwardPass(deriv, v, lam);
00194 if (diverge > -1) {
00195 LOG("increasing lambda diverging in step: " << diverge);
00196 if (update_lam_dlam(lam, dlam, params))
00197 break;
00198 } else {
00199
00200 break;
00201 }
00202 }
00203
00204
00205 grad_norm_tmp.array() = v.l.array().abs() / (curr_traj.u.array().abs() + 1.);
00206 float g_norm = grad_norm_tmp.colwise().maxCoeff().mean();
00207 if (g_norm < params.tolGrad) {
00208 LOG("Sucess gradient norm converged!");
00209 break;
00210 }
00211
00212 float alpha = 1.;
00213 while (true) {
00214 diverge = forwardPass(x0, curr_traj.u, v, curr_traj, next_traj, alpha);
00215
00216
00217
00218
00219
00220
00221
00222 float dcost = curr_traj.cost.sum() - next_traj.cost.sum() - 1e10 * diverge;
00223 float expected = -alpha * (v.dV(0) + alpha * v.dV(1));
00224 if (std::fabs(expected) < 1e-8) {
00225 float sign = (expected > 0.) ? 1. : -1.;
00226 expected = sign * 1e-8;
00227 }
00228
00229 float z = 1.;
00230 if (std::fabs(dcost - expected) > 1e-4)
00231 z = dcost / expected;
00232 LOG("iteration " << i << " cost expected ratio: dcost = " << dcost << " expected = " << expected << " z = " << z);
00233 if (params.zMin > z) {
00234 alpha = alpha / 2.;
00235 if (alpha < 1e-4) {
00236 update_lam_dlam(lam, dlam, params);
00237 break;
00238 } else {
00239 continue;
00240 }
00241 } else {
00242 curr_traj = next_traj;
00243 dlam = std::min(dlam / params.lambdaFactor, 1.f / params.lambdaFactor);
00244 lam = lam * dlam * (lam > params.lambdaMin);
00245 break;
00246 }
00247 }
00248 if (lam > params.lambdaMax) {
00249 LOG("reached lambdaMax!");
00250 break;
00251 }
00252 if (DEBUG) {
00253
00254 }
00255 }
00256
00257 }
00258 }
00259