abstract_ddp_solver.cpp
Go to the documentation of this file.
1 //
2 // Copyright (c) 2019-2020, University of Edinburgh, University of Oxford
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of nor the names of its contributors may be used to
14 // endorse or promote products derived from this software without specific
15 // prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 
31 
32 namespace exotica
33 {
34 void AbstractDDPSolver::Solve(Eigen::MatrixXd& solution)
35 {
36  if (!prob_) ThrowNamed("Solver has not been initialized!");
37  Timer planning_timer, backward_pass_timer, line_search_timer;
38 
39  T_ = prob_->get_T();
40  NU_ = prob_->GetScene()->get_num_controls();
41  NX_ = prob_->GetScene()->get_num_state();
42  NDX_ = prob_->GetScene()->get_num_state_derivative();
43  NV_ = prob_->GetScene()->get_num_velocities();
44  dt_ = dynamics_solver_->get_dt();
45  lambda_ = base_parameters_.RegularizationRate;
46  prob_->ResetCostEvolution(GetNumberOfMaxIterations() + 1);
47  prob_->PreUpdate();
48  solution.resize(T_ - 1, NU_);
49 
50  // Resizing and allocating logged variables
51  control_cost_evolution_.assign(GetNumberOfMaxIterations() + 1, std::numeric_limits<double>::quiet_NaN());
52  steplength_evolution_.assign(GetNumberOfMaxIterations() + 1, std::numeric_limits<double>::quiet_NaN());
53  regularization_evolution_.assign(GetNumberOfMaxIterations() + 1, std::numeric_limits<double>::quiet_NaN());
54 
55  // Perform initial roll-out
56  cost_ = 0.0;
57  control_cost_ = 0.0;
58  for (int t = 0; t < T_ - 1; ++t)
59  {
60  prob_->Update(prob_->get_U(t), t);
61 
62  // Running cost
63  control_cost_ += dt_ * prob_->GetControlCost(t);
64  cost_ += dt_ * prob_->GetStateCost(t);
65  }
66 
67  // Add terminal cost
68  cost_ += prob_->GetStateCost(T_ - 1) + control_cost_;
69  prob_->SetCostEvolution(0, cost_);
71 
72  // Initialize gain matrices
73  K_.assign(T_, Eigen::MatrixXd(NU_, NDX_));
74  k_.assign(T_, Eigen::VectorXd(NU_));
75 
76  // Allocate memory by resizing commonly reused matrices:
77  X_ref_.assign(T_, Eigen::VectorXd::Zero(NX_));
78  U_ref_.assign(T_ - 1, Eigen::VectorXd::Zero(NU_));
79  X_try_.assign(T_, Eigen::VectorXd::Zero(NX_));
80  U_try_.assign(T_ - 1, Eigen::VectorXd::Zero(NU_));
81  for (int t = 0; t < T_; ++t)
82  {
83  X_ref_[t] = prob_->get_X(t);
84  X_try_[t] = prob_->get_X(t);
85  }
86  for (int t = 0; t < T_ - 1; ++t)
87  {
88  U_ref_[t] = prob_->get_U(t);
89  U_try_[t] = prob_->get_U(t);
90  }
91 
92  Vxx_.assign(T_, Eigen::MatrixXd::Zero(NDX_, NDX_));
93  Vx_.assign(T_, Eigen::VectorXd::Zero(NDX_));
94 
95  Qx_.assign(T_ - 1, Eigen::VectorXd::Zero(NDX_));
96  Qu_.assign(T_ - 1, Eigen::VectorXd::Zero(NU_));
97  Qxx_.assign(T_ - 1, Eigen::MatrixXd::Zero(NDX_, NDX_));
98  Qux_.assign(T_ - 1, Eigen::MatrixXd::Zero(NU_, NDX_));
99  Quu_.assign(T_ - 1, Eigen::MatrixXd::Zero(NU_, NU_));
100  Quu_inv_.assign(T_ - 1, Eigen::MatrixXd::Zero(NU_, NU_));
101  fx_.assign(T_ - 1, Eigen::MatrixXd::Zero(NDX_, NDX_));
102  fu_.assign(T_ - 1, Eigen::MatrixXd::Zero(NDX_, NU_));
103 
104  if (debug_) HIGHLIGHT_NAMED("DDPSolver", "Running DDP solver for max " << GetNumberOfMaxIterations() << " iterations");
105 
106  cost_prev_ = cost_;
107  int last_best_iteration = 0;
108 
109  for (int iteration = 1; iteration <= GetNumberOfMaxIterations(); ++iteration)
110  {
111  // Check whether user interrupted (Ctrl+C)
112  if (Server::IsRos() && !ros::ok())
113  {
114  if (debug_) HIGHLIGHT("Solving cancelled by user");
115  prob_->termination_criterion = TerminationCriterion::UserDefined;
116  break;
117  }
118 
119  // Backward-pass computes the gains
120  backward_pass_timer.Reset();
121  BackwardPass();
122  time_taken_backward_pass_ = backward_pass_timer.GetDuration();
123 
124  // Forward-pass to compute new control trajectory
125  line_search_timer.Reset();
126 
127  double rollout_cost = cost_prev_;
128  // Perform a linear search to find the best rate
129  for (int ai = 0; ai < alpha_space_.size(); ++ai)
130  {
131  const double& alpha = alpha_space_(ai);
132  rollout_cost = ForwardPass(alpha);
133 
134  // TODO: More advanced line-search acceptance
135  if (rollout_cost < cost_)
136  {
137  cost_ = rollout_cost;
139  for (int t = 0; t < T_ - 1; ++t) U_try_[t] = prob_->get_U(t);
140  alpha_best_ = alpha;
141  break;
142  }
143  }
144  time_taken_forward_pass_ = line_search_timer.GetDuration();
145 
146  // Finiteness checks
147  // if (!U_try_.allFinite())
148  // {
149  // prob_->termination_criterion = TerminationCriterion::Divergence;
150  // WARNING_NAMED("DDPSolver", "Divergence: Controls are non-finite");
151  // return;
152  // }
153  if (!std::isfinite(cost_))
154  {
155  prob_->termination_criterion = TerminationCriterion::Divergence;
156  WARNING_NAMED("DDPSolver", "Divergence: Cost is non-finite: " << cost_);
157  return;
158  }
159 
160  if (debug_)
161  {
162  HIGHLIGHT_NAMED("DDPSolver", "Iteration " << iteration << std::setprecision(3) << ":\tBackward pass: " << time_taken_backward_pass_ << " s\tForward pass: " << time_taken_forward_pass_ << " s\tCost: " << cost_ << "\talpha: " << alpha_best_ << "\tRegularization: " << lambda_);
163  }
164 
165  //
166  // Stopping criteria checks
167  //
168 
169  // Relative function tolerance
170  // (f_t-1 - f_t) <= functionTolerance * max(1, abs(f_t))
171  if ((cost_prev_ - cost_) < base_parameters_.FunctionTolerance * std::max(1.0, std::abs(cost_)))
172  {
173  // Function tolerance patience check
174  if (base_parameters_.FunctionTolerancePatience > 0)
175  {
176  if (iteration - last_best_iteration > base_parameters_.FunctionTolerancePatience)
177  {
178  if (debug_) HIGHLIGHT_NAMED("DDPSolver", "Early stopping criterion reached (" << cost_ << " < " << cost_prev_ << "). Time: " << planning_timer.GetDuration());
179  prob_->termination_criterion = TerminationCriterion::FunctionTolerance;
180  break;
181  }
182  }
183  else
184  {
185  if (debug_) HIGHLIGHT_NAMED("DDPSolver", "Function tolerance reached (" << cost_ << " < " << cost_prev_ << "). Time: " << planning_timer.GetDuration());
186  prob_->termination_criterion = TerminationCriterion::FunctionTolerance;
187  break;
188  }
189  }
190  else
191  {
192  // Reset function tolerance patience
193  last_best_iteration = iteration;
194  }
195 
196  // Regularization
197  if (lambda_ != 0.0 && lambda_ > 1e9)
198  {
199  prob_->termination_criterion = TerminationCriterion::Divergence;
200  WARNING_NAMED("DDPSolver", "Divergence: Regularization too large (" << lambda_ << ")");
201  return;
202  }
203 
204  steplength_evolution_.at(iteration) = alpha_best_;
205  regularization_evolution_.at(iteration) = lambda_;
206 
207  // If better than previous iteration, copy solutions for next iteration
208  if (cost_ < cost_prev_)
209  {
210  cost_prev_ = cost_;
211  U_ref_ = U_try_;
212 
213  if (alpha_best_ < base_parameters_.ThresholdRegularizationDecrease)
214  {
216  }
217  else if (alpha_best_ > base_parameters_.ThresholdRegularizationIncrease)
218  {
219  if (lambda_ > base_parameters_.MinimumRegularization) DecreaseRegularization();
220  }
221  }
222  else
223  {
224  cost_ = cost_prev_;
225  control_cost_ = control_cost_evolution_.at(iteration - 1);
226  // Revert by not storing U_try_ as U_ref_ (maintain U_ref_)
227 
229  }
230 
231  // Roll-out and store reference state trajectory
232  // TODO: This roll-out may not be required => The line-search already does a roll-out.
233  for (int t = 0; t < T_ - 1; ++t)
234  prob_->Update(U_ref_[t], t);
235  for (int t = 0; t < T_; ++t)
236  X_ref_[t] = prob_->get_X(t);
237 
238  prob_->SetCostEvolution(iteration, cost_);
239  control_cost_evolution_.at(iteration) = control_cost_;
240 
241  // Iteration limit
242  if (iteration == GetNumberOfMaxIterations())
243  {
244  if (debug_) HIGHLIGHT_NAMED("DDPSolver", "Max iterations reached. Time: " << planning_timer.GetDuration());
245  prob_->termination_criterion = TerminationCriterion::IterationLimit;
246  }
247 
248  prob_->OnSolverIterationEnd();
249  }
250 
251  // Store the best solution found over all iterations
252  for (int t = 0; t < T_ - 1; ++t)
253  {
254  solution.row(t) = U_ref_[t].transpose();
255  prob_->Update(U_ref_[t], t); // TODO: This roll-out may also not be required...
256  }
257 
258  planning_time_ = planning_timer.GetDuration();
259 }
260 
262 {
263  if (pointer->type() != "exotica::DynamicTimeIndexedShootingProblem")
264  {
265  ThrowNamed("This DDPSolver can't solve problem of type '" << pointer->type() << "'!");
266  }
268  prob_ = std::static_pointer_cast<DynamicTimeIndexedShootingProblem>(pointer);
269  dynamics_solver_ = prob_->GetScene()->GetDynamicsSolver();
270 
271  // Set up backtracking line-search coefficients
272  alpha_space_ = Eigen::VectorXd::LinSpaced(11, 0.0, -3.0);
273  for (int ai = 0; ai < alpha_space_.size(); ++ai)
274  {
275  alpha_space_(ai) = std::pow(10.0, alpha_space_(ai));
276  }
277 
278  if (debug_) HIGHLIGHT_NAMED("DDPSolver", "initialized");
279 }
280 
281 double AbstractDDPSolver::ForwardPass(const double alpha)
282 {
283  cost_try_ = 0.0;
284  control_cost_try_ = 0.0;
285 
286  Eigen::VectorXd u_hat(NU_); // TODO: allocate outside
287  Eigen::VectorXd xdiff(NDX_);
288 
289  for (int t = 0; t < T_ - 1; ++t)
290  {
291  dynamics_solver_->StateDelta(prob_->get_X(t), X_ref_[t], xdiff);
292  u_hat = U_ref_[t];
293  u_hat.noalias() += alpha * k_[t];
294  u_hat.noalias() += K_[t] * xdiff;
295 
296  // Clamp controls, if desired:
297  if (base_parameters_.ClampControlsInForwardPass)
298  {
299  u_hat = u_hat.cwiseMax(dynamics_solver_->get_control_limits().col(0)).cwiseMin(dynamics_solver_->get_control_limits().col(1));
300  }
301 
302  prob_->Update(u_hat, t);
303  control_cost_try_ += dt_ * prob_->GetControlCost(t);
304  cost_try_ += dt_ * prob_->GetStateCost(t);
305  }
306 
307  // add terminal cost
308  cost_try_ += prob_->GetStateCost(T_ - 1) + control_cost_try_;
309  return cost_try_;
310 }
311 
313 {
314  Eigen::VectorXd u = U_ref_[t] + k_[t] + K_[t] * dynamics_solver_->StateDelta(x, X_ref_[t]);
315  return u.cwiseMax(dynamics_solver_->get_control_limits().col(0)).cwiseMin(dynamics_solver_->get_control_limits().col(1));
316 }
317 
318 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_Vxx() const { return Vxx_; }
319 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_Vx() const { return Vx_; }
320 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_Qxx() const { return Qxx_; }
321 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_Qux() const { return Qux_; }
322 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_Quu() const { return Quu_; }
323 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_Qx() const { return Qx_; }
324 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_Qu() const { return Qu_; }
325 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_K() const { return K_; }
326 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_k() const { return k_; }
327 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_X_try() const { return X_try_; }
328 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_U_try() const { return U_try_; }
329 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_X_ref() const { return X_ref_; }
330 const std::vector<Eigen::VectorXd>& AbstractDDPSolver::get_U_ref() const { return U_ref_; }
331 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_Quu_inv() const { return Quu_inv_; }
332 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_fx() const { return fx_; }
333 const std::vector<Eigen::MatrixXd>& AbstractDDPSolver::get_fu() const { return fu_; }
335 {
336  std::vector<double> ret;
337  ret.reserve(control_cost_evolution_.size());
338  for (size_t position = 0; position < control_cost_evolution_.size(); ++position)
339  {
340  if (std::isnan(control_cost_evolution_[position])) break;
341  ret.push_back(control_cost_evolution_[position]);
342  }
343  return ret;
344 }
345 
346 void AbstractDDPSolver::set_control_cost_evolution(const int index, const double cost)
347 {
348  if (index > -1 && index < static_cast<int>(control_cost_evolution_.size()))
349  {
350  control_cost_evolution_[index] = cost;
351  }
352  else if (index == -1)
353  {
355  }
356  else
357  {
358  ThrowPretty("Out of range: " << index << " where length=" << control_cost_evolution_.size());
359  }
360 }
361 
363 {
364  std::vector<double> ret;
365  ret.reserve(steplength_evolution_.size());
366  for (size_t position = 1; position < steplength_evolution_.size(); ++position)
367  {
368  if (std::isnan(steplength_evolution_[position])) break;
369  ret.push_back(steplength_evolution_[position]);
370  }
371  return ret;
372 }
373 
375 {
376  std::vector<double> ret;
377  ret.reserve(regularization_evolution_.size());
378  for (size_t position = 1; position < regularization_evolution_.size(); ++position)
379  {
380  if (std::isnan(regularization_evolution_[position])) break;
381  ret.push_back(regularization_evolution_[position]);
382  }
383  return ret;
384 }
385 } // namespace exotica
const std::vector< Eigen::VectorXd > & get_U_try() const
const std::vector< Eigen::VectorXd > & get_Qu() const
#define HIGHLIGHT(x)
const std::vector< Eigen::VectorXd > & get_U_ref() const
std::vector< double > steplength_evolution_
Evolution of the steplength.
const std::vector< Eigen::VectorXd > & get_X_ref() const
const std::vector< Eigen::MatrixXd > & get_Quu() const
Eigen::VectorXd alpha_space_
Backtracking line-search steplengths.
const std::vector< Eigen::MatrixXd > & get_fx() const
double dt_
Integration time-step.
const std::vector< Eigen::MatrixXd > & get_fu() const
const std::vector< Eigen::MatrixXd > & get_K() const
std::vector< double > regularization_evolution_
Evolution of the regularization (xreg/ureg)
double lambda_
Regularization (Vxx, Quu)
#define WARNING_NAMED(name, x)
int NV_
Size of velocity vector (tangent vector to the configuration)
int NX_
Size of state vector.
std::vector< Eigen::MatrixXd > Quu_
Hessian of the Hamiltonian.
double cost_try_
Total cost computed by line-search procedure.
const std::vector< Eigen::VectorXd > & get_Vx() const
#define ThrowPretty(m)
Eigen::VectorXd GetFeedbackControl(Eigen::VectorXdRefConst x, int t) const override
const std::vector< Eigen::MatrixXd > & get_Qux() const
virtual void SpecifyProblem(PlanningProblemPtr pointer)
int NU_
Size of control vector.
std::vector< double > get_steplength_evolution() const
std::vector< Eigen::MatrixXd > fx_
Derivative of the dynamics f w.r.t. x.
void set_control_cost_evolution(const int index, const double cost)
void SpecifyProblem(PlanningProblemPtr pointer) override
Binds the solver to a specific problem which must be pre-initalised.
const std::vector< Eigen::MatrixXd > & get_Vxx() const
int T_
Length of shooting problem, i.e., state trajectory. The control trajectory has length T_-1...
std::vector< Eigen::MatrixXd > Vxx_
Hessian of the Value function.
double control_cost_
Control cost during iteration.
std::vector< double > control_cost_evolution_
Evolution of the control cost (control regularization)
std::vector< Eigen::VectorXd > k_
Feed-forward terms.
std::vector< Eigen::MatrixXd > Qxx_
Hessian of the Hamiltonian.
std::vector< Eigen::VectorXd > X_ref_
Reference state trajectory for feedback control.
double cost_
Cost during iteration.
#define ThrowNamed(m)
std::vector< Eigen::MatrixXd > fu_
Derivative of the dynamics f w.r.t. u.
std::vector< Eigen::MatrixXd > Qux_
Hessian of the Hamiltonian.
ROSCPP_DECL bool ok()
double alpha_best_
Line-search step taken.
double cost_prev_
Cost during previous iteration.
std::vector< Eigen::VectorXd > U_ref_
Reference control trajectory for feedback control.
std::vector< Eigen::VectorXd > U_try_
Updated control trajectory during iteration (computed by line-search)
std::vector< Eigen::VectorXd > Vx_
Gradient of the Value function.
std::vector< double > get_regularization_evolution() const
void Solve(Eigen::MatrixXd &solution) override
Solves the problem.
double control_cost_try_
Total control cost computed by line-search procedure.
const Eigen::Ref< const Eigen::VectorXd > & VectorXdRefConst
const std::vector< Eigen::MatrixXd > & get_Quu_inv() const
std::vector< Eigen::VectorXd > Qu_
Gradient of the Hamiltonian.
std::vector< Eigen::MatrixXd > Quu_inv_
Inverse of the Hessian of the Hamiltonian.
std::vector< Eigen::VectorXd > Qx_
Gradient of the Hamiltonian.
#define HIGHLIGHT_NAMED(name, x)
const std::vector< Eigen::VectorXd > & get_Qx() const
DynamicsSolverPtr dynamics_solver_
Shared pointer to the dynamics solver.
int NDX_
Size of tangent vector to the state vector.
double GetDuration() const
virtual void BackwardPass()=0
Computes the control gains for a the trajectory in the associated DynamicTimeIndexedProblem.
std::shared_ptr< PlanningProblem > PlanningProblemPtr
const std::vector< Eigen::VectorXd > & get_k() const
DynamicTimeIndexedShootingProblemPtr prob_
Shared pointer to the planning problem.
std::vector< Eigen::VectorXd > X_try_
Updated state trajectory during iteration (computed by line-search)
static bool IsRos()
const std::vector< Eigen::VectorXd > & get_X_try() const
AbstractDDPSolverInitializer base_parameters_
const std::vector< Eigen::MatrixXd > & get_Qxx() const
double ForwardPass(const double alpha)
Forward simulates the dynamics using the gains computed in the last BackwardPass;.
std::vector< Eigen::MatrixXd > K_
Feedback gains.
std::vector< double > get_control_cost_evolution() const


exotica_ddp_solver
Author(s): Traiko Dinev
autogenerated on Sat Apr 10 2021 02:36:21