ergodic_control.hpp
Go to the documentation of this file.
1 /*********************************************************************
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2020 Northwestern University
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * * Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  *
13  * * Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * * Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from
19  * this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *********************************************************************/
38 #ifndef ERGODIC_CONTROL_HPP
39 #define ERGODIC_CONTROL_HPP
40 
41 #include <cmath>
42 #include <stdexcept>
43 #include <algorithm>
44 
45 #include <nav_msgs/Path.h>
47 
52 
53 namespace ergodic_exploration
54 {
55 using arma::span;
56 
65 inline vec rhodot(const vec& rho, const vec& gdx, const vec& dbar, const mat& fdx)
66 {
67  return -gdx - dbar - fdx.t() * rho;
68  // return -gdx - fdx.t() * rho;
69 }
70 
72 template <class ModelT>
74 {
75 public:
90  ErgodicControl(const ModelT& model, const Collision& collision, double dt,
91  double horizon, double resolution, double exploration_weight,
92  unsigned int num_basis, unsigned int buffer_size,
93  unsigned int batch_size, const mat& Rinv, const vec& umin,
94  const vec& umax);
95 
102  vec control(const GridMap& grid, const vec& x);
103 
107  mat optTraj() const;
108 
112  nav_msgs::Path path(const std::string& map_frame_id) const;
113 
118  void addStateMemory(const vec& x);
119 
121  double timeStep() const;
122 
127  void setTarget(const Target& target);
128 
134  void configTarget(const GridMap& grid);
135 
136 private:
144  mat gradErgodicMetric(const vec& ck, const mat& xt);
145 
152  void updateControl(const mat& xt, const mat& rhot);
153 
162  mat gradBarrier(const mat& xt);
163 
164 private:
165  ModelT model_; // robot's dynamic model
166  Collision collision_; // collision detection
167  double dt_; // time step in integration
168  double horizon_; // control horizon
169  double resolution_; // target grid resolution in meters
170  double expl_weight_; // ergodic exploration weight
171  unsigned int steps_; // number of steps used in integration
172  mat Rinv_; // inverse of the control weight matrix
173  mat ut_; // control signal
174  vec phik_; // target distribution fourier coefficients
175  vec rhoT_; // co-state terminal condition
176  vec map_pos_; // position of map
177  vec umin_; // lower limit on controls
178  vec umax_; // upper limit on controls
179  vec pose_; // current state
180  Basis basis_; // fourier basis
181  ReplayBuffer buffer_; // store past states in frame of occupancy map
182  RungeKutta rk4_; // Runge-Kutta integrator
183  CoStateFunc rhodot_; // co-state function
184  Target target_; // target distribution
185 };
186 
187 template <class ModelT>
188 ErgodicControl<ModelT>::ErgodicControl(const ModelT& model, const Collision& collision,
189  double dt, double horizon, double resolution,
190  double exploration_weight, unsigned int num_basis,
191  unsigned int buffer_size, unsigned int batch_size,
192  const mat& Rinv, const vec& umin, const vec& umax)
193  : model_(model)
194  , collision_(collision)
195  , dt_(dt)
196  , horizon_(horizon)
197  , resolution_(resolution)
198  , expl_weight_(exploration_weight)
199  , steps_(static_cast<unsigned int>(std::abs(horizon / dt)))
200  , Rinv_(Rinv)
201  , ut_(3, steps_, arma::fill::zeros) // body twist Vb = [vx, vy, w]
202  , phik_(num_basis * num_basis)
203  , rhoT_(model.state_space, arma::fill::zeros)
204  , map_pos_(2, arma::fill::zeros)
205  , umin_(umin)
206  , umax_(umax)
207  , pose_(model.state_space, arma::fill::zeros)
208  , basis_(0.0, 0.0, num_basis) // fourier domain init to 0
209  , buffer_(buffer_size, batch_size)
210  , rk4_(dt)
211 {
212  if (steps_ == 1)
213  {
214  throw std::invalid_argument("Need at least two steps in forward simulation. \
215  Increase the horizon or decrease the time step.");
216  }
217 
218  // arma::arma_rng::set_seed_random();
219 
220  rhodot_ = std::bind(rhodot, std::placeholders::_1, std::placeholders::_2,
221  std::placeholders::_3, std::placeholders::_4);
222 }
223 
224 template <class ModelT>
225 vec ErgodicControl<ModelT>::control(const GridMap& grid, const vec& x)
226 {
227  pose_ = x;
228 
229  // Update target grid if needed
230  configTarget(grid);
231 
232  // Shift columns to the left by 1 and set last column to zeros
233  ut_.cols(0, ut_.n_cols - 2) = ut_.cols(1, ut_.n_cols - 1);
234  ut_.col(ut_.n_cols - 1).fill(0.0);
235 
236  // Forward simulation
237  const mat traj = rk4_.solve(model_, pose_, ut_, horizon_);
238 
239  // Sample past states
240  mat xt_total = buffer_.sampleMemory(traj);
241 
242  // Transform from map frame to fourier frame
243  xt_total.row(0) -= map_pos_(0);
244  xt_total.row(1) -= map_pos_(1);
245 
247  // DEBUG
248  // for (unsigned int i = 0; i < xt_total.n_cols; i++)
249  // {
250  // if (xt_total(2, i) < -PI|| xt_total(2, i) > PI)
251  // {
252  // std::cout << "WARNING: heading is not normalized" << std::endl;
253  // }
254  //
255  // // if (xt_total(0, i) < 0.0 || xt_total(1, i) < 0.0)
256  // // {
257  // // std::cout << "WARNING: Trajectory is not within fourier domain" << std::endl;
258  // // // xt_total.rows(0, 1).print();
259  // // }
260  // }
262 
263  // Extract optimized trajectory in fourier domain
264  const mat xt = xt_total.cols(xt_total.n_cols - steps_, xt_total.n_cols - 1);
265 
266  // Update the trajectory fourier coefficients
267  const vec ck = basis_.trajCoeff(xt_total);
268 
269  // Gradient of the ergodic measure w.r.t the state
270  const mat edx = gradErgodicMetric(ck, xt);
271 
272  // Gradient of the barrier function w.r.t the state
273  const mat bdx = gradBarrier(xt);
274  // bdx_.print("bdx:");
275 
276  // Backwards pass
277  const mat rhot = rk4_.solve(rhodot_, model_, rhoT_, xt, ut_, edx, bdx, horizon_);
278 
280  // DEBUG
281  // const auto max_rho = max(max(rhot, 1));
282  // const auto min_rho = min(min(rhot, 1));
283  //
284  // std::cout << "max rho: " << max(max(rhot, 1)) << std::endl;
285  // std::cout << "min rho: " << min(min(rhot, 1)) << std::endl;
286 
287  // if (max_rho > 100.0)
288  // {
289  // std::cout << "OVERFLOW max rho: " << max_rho << std::endl;
290  // }
291  //
292  // if (min_rho < -100.0)
293  // {
294  // std::cout << "OVERFLOW min rho: " << min_rho << std::endl;
295  // }
296  //
297  // // if (any(abs(rhot.row(0)) < 1.0e-12) || any(abs(rhot.row(1)) < 1.0e-12) ||
298  // // any(abs(rhot.row(2)) < 1.0e-12))
299  // // {
300  // // std::cout << "UNDERFLOW: gradient is zero" << std::endl;
301  // // }
303 
304  // Update control signal
305  updateControl(xt, rhot);
306 
307  // Store current state in frame of map
308  // buffer_.append(x);
309 
310  return ut_.col(0);
311 }
312 
313 template <class ModelT>
315 {
316  return rk4_.solve(model_, pose_, ut_, horizon_);
317 }
318 
319 template <class ModelT>
320 nav_msgs::Path ErgodicControl<ModelT>::path(const std::string& map_frame_id) const
321 {
322  nav_msgs::Path path;
323  path.header.frame_id = map_frame_id;
324  path.poses.resize(steps_);
325 
326  const mat opt_traj = rk4_.solve(model_, pose_, ut_, horizon_);
327  for (unsigned int i = 0; i < opt_traj.n_cols; i++)
328  {
329  path.poses.at(i).pose.position.x = opt_traj(0, i);
330  path.poses.at(i).pose.position.y = opt_traj(1, i);
331 
332  tf2::Quaternion quat;
333  quat.setRPY(0.0, 0.0, normalize_angle_PI(opt_traj(2, i)));
334 
335  path.poses.at(i).pose.orientation.x = quat.x();
336  path.poses.at(i).pose.orientation.y = quat.y();
337  path.poses.at(i).pose.orientation.z = quat.z();
338  path.poses.at(i).pose.orientation.w = quat.w();
339  }
340 
341  return path;
342 }
343 
344 template <class ModelT>
346 {
347  buffer_.append(x);
348 }
349 
350 template <class ModelT>
352 {
353  return dt_;
354 }
355 
356 template <class ModelT>
358 {
359  target_ = target;
360 }
361 
362 template <class ModelT>
364 {
365  // translation from map to fourier domain
366  map_pos_(0) = grid.xmin();
367  map_pos_(1) = grid.ymin();
368 
369  // size of map
370  const auto mx = grid.xmax() - grid.xmin();
371  const auto my = grid.ymax() - grid.ymin();
372 
373  // Map is the same size
374  if (almost_equal(mx, basis_.lx_) && almost_equal(my, basis_.ly_))
375  {
376  return;
377  }
378 
379  std::cout << "Updating target distribution grid... ";
380 
381  // update phi_grid if map has grown
382  basis_.lx_ = mx;
383  basis_.ly_ = my;
384 
385  // Add 1 to include the boundary
386  // Use resolution that is specified for the target grid
387  const auto nx = axis_length(0.0, basis_.lx_, resolution_) + 1;
388  const auto ny = axis_length(0.0, basis_.ly_, resolution_) + 1;
389 
390  // Construct the grid
391  mat phi_grid(2, nx * ny);
392 
393  unsigned int col = 0;
394  auto y = 0.0;
395  for (unsigned int i = 0; i < ny; i++)
396  {
397  auto x = 0.0;
398  for (unsigned int j = 0; j < nx; j++)
399  {
400  phi_grid(0, col) = x;
401  phi_grid(1, col) = y;
402  // std::cout << x << " " << y << std::endl;
403 
404  col++;
405  x += resolution_;
406  }
407  y += resolution_;
408  }
409 
410  // Evaluate each grid cell
411  const vec phi_vals = target_.fill(map_pos_, phi_grid);
412 
413  phik_ = basis_.spatialCoeff(phi_vals, phi_grid);
414 
415  std::cout << "done" << std::endl;
416 }
417 
418 template <class ModelT>
419 mat ErgodicControl<ModelT>::gradErgodicMetric(const vec& ck, const mat& xt)
420 {
421  // element wise multiplication
422  const vec fourier_diff = basis_.lamdak_ % (ck - phik_);
423 
424  mat edx(model_.state_space, steps_);
425  for (unsigned int i = 0; i < steps_; i++)
426  {
427  edx(span(0, 1), span(i, i)) = basis_.gradFourierBasis(xt.col(i)) * fourier_diff;
428 
429  // set heading to zero
430  edx(2, i) = 0.0;
431  }
432 
433  edx.rows(0, 1) *= expl_weight_;
434  // edx_ *= 1.0 / steps_ * expl_weight_;
435  return edx;
436 }
437 
438 template <class ModelT>
439 void ErgodicControl<ModelT>::updateControl(const mat& xt, const mat& rhot)
440 {
441  for (unsigned int i = 0; i < xt.n_cols; i++)
442  {
443  // rhot is already sorted from t0 to tf
444  ut_.col(i) = -Rinv_ * model_.fdu(xt.col(i)).t() * rhot.col(i);
445  // ut_.col(i).print("u:");
446 
447  ut_(0, i) = std::clamp(ut_(0, i), umin_(0), umax_(0));
448  ut_(1, i) = std::clamp(ut_(1, i), umin_(1), umax_(1));
449  ut_(2, i) = std::clamp(ut_(2, i), umin_(2), umax_(2));
450  }
451 }
452 
453 template <class ModelT>
455 {
456  // TODO: load from param sever
457  const auto weight = 25.0;
458  const auto eps = 0.05;
459 
460  // set heading column to zeros
461  mat bdx(model_.state_space, steps_, arma::fill::zeros);
462  for (unsigned int i = 0; i < steps_; i++)
463  {
464  bdx(0, i) += 2.0 * (xt(0, i) > basis_.lx_ - eps) * (xt(0, i) - (basis_.lx_ - eps));
465  bdx(1, i) += 2.0 * (xt(1, i) > basis_.ly_ - eps) * (xt(1, i) - (basis_.ly_ - eps));
466 
467  bdx(0, i) += 2.0 * (xt(0, i) < eps) * (xt(0, i) - eps);
468  bdx(1, i) += 2.0 * (xt(1, i) < eps) * (xt(1, i) - eps);
469  }
470 
471  bdx.rows(0, 1) *= weight;
472 
473  return bdx;
474 }
475 } // namespace ergodic_exploration
476 #endif
ergodic_exploration::ErgodicControl::control
vec control(const GridMap &grid, const vec &x)
Update the control signal.
Definition: ergodic_control.hpp:225
target.hpp
Target distribution.
ergodic_exploration::ReplayBuffer
Store and smaple past states.
Definition: buffer.hpp:50
ergodic_exploration::ErgodicControl::umin_
vec umin_
Definition: ergodic_control.hpp:177
ergodic_exploration::ErgodicControl::buffer_
ReplayBuffer buffer_
Definition: ergodic_control.hpp:181
tf2::Quaternion::setRPY
void setRPY(const tf2Scalar &roll, const tf2Scalar &pitch, const tf2Scalar &yaw)
ergodic_exploration::ErgodicControl::ErgodicControl
ErgodicControl(const ModelT &model, const Collision &collision, double dt, double horizon, double resolution, double exploration_weight, unsigned int num_basis, unsigned int buffer_size, unsigned int batch_size, const mat &Rinv, const vec &umin, const vec &umax)
Constructor.
Definition: ergodic_control.hpp:188
ergodic_exploration::ErgodicControl::path
nav_msgs::Path path(const std::string &map_frame_id) const
Return optimized trajectory.
Definition: ergodic_control.hpp:320
ergodic_exploration::ErgodicControl::rhodot_
CoStateFunc rhodot_
Definition: ergodic_control.hpp:183
integrator.hpp
Numerical integration methods.
ergodic_exploration::ErgodicControl::horizon_
double horizon_
Definition: ergodic_control.hpp:168
ergodic_exploration::ErgodicControl::dt_
double dt_
Definition: ergodic_control.hpp:167
ergodic_exploration::ErgodicControl::basis_
Basis basis_
Definition: ergodic_control.hpp:180
ergodic_exploration::GridMap::ymax
double ymax() const
Return grid ymax.
Definition: grid.hpp:232
ergodic_exploration::ErgodicControl::steps_
unsigned int steps_
Definition: ergodic_control.hpp:171
ergodic_exploration::ErgodicControl::phik_
vec phik_
Definition: ergodic_control.hpp:174
ergodic_exploration::GridMap::xmin
double xmin() const
Return grid xmin.
Definition: grid.hpp:214
ergodic_exploration::normalize_angle_PI
double normalize_angle_PI(double rad)
Wraps angle between -pi and pi.
Definition: numerics.hpp:77
ergodic_exploration::ErgodicControl::pose_
vec pose_
Definition: ergodic_control.hpp:179
ergodic_exploration
Definition: basis.hpp:43
ergodic_exploration::GridMap
Constructs an 2D grid.
Definition: grid.hpp:79
ergodic_exploration::Target
Target distribution.
Definition: target.hpp:110
ergodic_exploration::Basis
Fourier cosine basis.
Definition: basis.hpp:51
Quaternion.h
ergodic_exploration::ErgodicControl::rk4_
RungeKutta rk4_
Definition: ergodic_control.hpp:182
ergodic_exploration::ErgodicControl::map_pos_
vec map_pos_
Definition: ergodic_control.hpp:176
ergodic_exploration::ErgodicControl::collision_
Collision collision_
Definition: ergodic_control.hpp:166
ergodic_exploration::ErgodicControl::addStateMemory
void addStateMemory(const vec &x)
Add the robot's state to memory.
Definition: ergodic_control.hpp:345
ergodic_exploration::almost_equal
bool almost_equal(double d1, double d2, double epsilon=1.0e-12)
approximately compare two floating-point numbers
Definition: numerics.hpp:67
ergodic_exploration::ErgodicControl::gradErgodicMetric
mat gradErgodicMetric(const vec &ck, const mat &xt)
Compose the gradient of the ergodic metric.
Definition: ergodic_control.hpp:419
ergodic_exploration::ErgodicControl::rhoT_
vec rhoT_
Definition: ergodic_control.hpp:175
ergodic_exploration::ErgodicControl::model_
ModelT model_
Definition: ergodic_control.hpp:165
ergodic_exploration::GridMap::ymin
double ymin() const
Return grid ymin.
Definition: grid.hpp:220
ergodic_exploration::ErgodicControl::updateControl
void updateControl(const mat &xt, const mat &rhot)
Update the control signal.
Definition: ergodic_control.hpp:439
ergodic_exploration::ErgodicControl::Rinv_
mat Rinv_
Definition: ergodic_control.hpp:172
ergodic_exploration::ErgodicControl::expl_weight_
double expl_weight_
Definition: ergodic_control.hpp:170
ergodic_exploration::CoStateFunc
std::function< vec(const vec &, const vec &, const vec &, const mat &)> CoStateFunc
Function representing the time derivatve of the co-state variable.
Definition: integrator.hpp:60
ergodic_exploration::ErgodicControl::ut_
mat ut_
Definition: ergodic_control.hpp:173
ergodic_exploration::GridMap::xmax
double xmax() const
Return grid xmax.
Definition: grid.hpp:226
ergodic_exploration::RungeKutta
4th order Runge-Kutta integration
Definition: integrator.hpp:63
std
basis.hpp
Fourier cosine basis.
ergodic_exploration::ErgodicControl::configTarget
void configTarget(const GridMap &grid)
Compose target distribution grid and spatial coefficients.
Definition: ergodic_control.hpp:363
ergodic_exploration::ErgodicControl::resolution_
double resolution_
Definition: ergodic_control.hpp:169
ergodic_exploration::ErgodicControl::timeStep
double timeStep() const
return time step
Definition: ergodic_control.hpp:351
tf2::Quaternion
buffer.hpp
Stores past states.
ergodic_exploration::ErgodicControl::optTraj
mat optTraj() const
Return optimized trajectory.
Definition: ergodic_control.hpp:314
ergodic_exploration::axis_length
unsigned int axis_length(double lower, double upper, double resolution)
Length of a grid axis.
Definition: grid.hpp:61
ergodic_exploration::ErgodicControl
Receding horizon ergodic trajectory optimization.
Definition: ergodic_control.hpp:73
ergodic_exploration::ErgodicControl::umax_
vec umax_
Definition: ergodic_control.hpp:178
ergodic_exploration::ErgodicControl::target_
Target target_
Definition: ergodic_control.hpp:184
ergodic_exploration::ErgodicControl::setTarget
void setTarget(const Target &target)
Set the target distribution.
Definition: ergodic_control.hpp:357
ergodic_exploration::ErgodicControl::gradBarrier
mat gradBarrier(const mat &xt)
Gradient of the barrier function.
Definition: ergodic_control.hpp:454
ergodic_exploration::rhodot
vec rhodot(const vec &rho, const vec &gdx, const vec &dbar, const mat &fdx)
Time derivatve of the co-state variable.
Definition: ergodic_control.hpp:65
ergodic_exploration::Collision
2D collision detection
Definition: collision.hpp:85


ergodic_exploration
Author(s): bostoncleek
autogenerated on Wed Mar 2 2022 00:17:13