stomp_core: stomp.cpp Source File

Go to the documentation of this file.
00001 
00027 #include <ros/console.h>
00028 #include <limits.h>
00029 #include <Eigen/LU>
00030 #include <Eigen/Cholesky>
00031 #include <math.h>
00032 #include <stomp_core/utils.h>
00033 #include <numeric>
00034 #include "stomp_core/stomp.h"
00035 
00036 static const double DEFAULT_NOISY_COST_IMPORTANCE_WEIGHT = 1.0; 
00037 static const double MIN_COST_DIFFERENCE = 1e-8; 
00038 static const double MIN_CONTROL_COST_WEIGHT = 1e-8; 
00047 static void computeLinearInterpolation(const std::vector<double>& first,const std::vector<double>& last,
00048                          int num_timesteps,
00049                          Eigen::MatrixXd& trajectory_joints)
00050 {
00051   trajectory_joints.setZero(first.size(),num_timesteps);
00052   for(int unsigned i = 0; i < first.size(); i++)
00053   {
00054     double dtheta = (last[i] - first[i])/(num_timesteps - 1);
00055     for(unsigned int j = 0; j < num_timesteps; j++)
00056     {
00057       trajectory_joints(i,j) = first[i] + j * dtheta;
00058     }
00059   }
00060 }
00061 
00070 static void computeCubicInterpolation(const std::vector<double>& first,const std::vector<double>& last,
00071                          int num_points,double dt,
00072                          Eigen::MatrixXd& trajectory_joints)
00073 {
00074   std::vector<double> coeffs(4,0);
00075   double total_time = (num_points - 1) * dt;
00076   for(int unsigned i = 0; i < first.size(); i++)
00077   {
00078     coeffs[0] = first[i];
00079     coeffs[2] = (3/(pow(total_time,2))) * (last[i] - first[i]);
00080     coeffs[3] = (-2/(pow(total_time,3))) * (last[i] - first[i]);
00081 
00082     double t;
00083     for(unsigned j = 0; j < num_points; j++)
00084     {
00085       t = j*dt;
00086       trajectory_joints(i,j) = coeffs[0] + coeffs[2]*pow(t,2) + coeffs[3]*pow(t,3);
00087     }
00088   }
00089 }
00090 
00100 bool computeMinCostTrajectory(const std::vector<double>& first,
00101                               const std::vector<double>& last,
00102                               const Eigen::MatrixXd& control_cost_matrix_R_padded,
00103                               const Eigen::MatrixXd& inv_control_cost_matrix_R,
00104                               Eigen::MatrixXd& trajectory_joints)
00105 {
00106   using namespace stomp_core;
00107 
00108   if(control_cost_matrix_R_padded.rows() != control_cost_matrix_R_padded.cols())
00109   {
00110     ROS_ERROR("Control Cost Matrix is not square");
00111     return false;
00112   }
00113 
00114 
00115   int timesteps = control_cost_matrix_R_padded.rows() - 2*(FINITE_DIFF_RULE_LENGTH - 1);
00116   int start_index_padded = FINITE_DIFF_RULE_LENGTH - 1;
00117   int end_index_padded = start_index_padded + timesteps-1;
00118   std::vector<Eigen::VectorXd> linear_control_cost(first.size(),Eigen::VectorXd::Zero(timesteps));
00119   trajectory_joints.setZero(first.size(),timesteps);
00120 
00121 
00122   for(unsigned int d = 0; d < first.size(); d++)
00123   {
00124     linear_control_cost[d].transpose() = first[d] * Eigen::VectorXd::Ones(FINITE_DIFF_RULE_LENGTH - 1).transpose() *
00125         control_cost_matrix_R_padded.block(0,start_index_padded,FINITE_DIFF_RULE_LENGTH - 1,timesteps);
00126 
00127     linear_control_cost[d].transpose() += last[d] * Eigen::VectorXd::Ones(FINITE_DIFF_RULE_LENGTH - 1).transpose() *
00128         control_cost_matrix_R_padded.block(end_index_padded + 1,start_index_padded,FINITE_DIFF_RULE_LENGTH - 1,
00129                                     timesteps);
00130 
00131     linear_control_cost[d] *=2;
00132 
00133     trajectory_joints.row(d) = -0.5*inv_control_cost_matrix_R*linear_control_cost[d];
00134     trajectory_joints(d,0) = first[d];
00135     trajectory_joints(d,timesteps - 1) = last[d];
00136   }
00137 
00138   return true;
00139 }
00140 
00149 void computeParametersControlCosts(const Eigen::MatrixXd& parameters,
00150                                           double dt,
00151                                           double control_cost_weight,
00152                                           const Eigen::MatrixXd& control_cost_matrix_R,
00153                                           Eigen::MatrixXd& control_costs)
00154 {
00155   std::size_t num_timesteps = parameters.cols();
00156   double cost = 0;
00157   for(auto d = 0u; d < parameters.rows(); d++)
00158   {
00159     cost = double(parameters.row(d)*(control_cost_matrix_R*parameters.row(d).transpose()));
00160     control_costs.row(d).setConstant( 0.5*(1/dt)*cost );
00161   }
00162 
00163   double max_coeff = control_costs.maxCoeff();
00164   control_costs /= (max_coeff > 1e-8) ? max_coeff : 1;
00165   control_costs *= control_cost_weight;
00166 }
00167 
00168 
00169 namespace stomp_core {
00170 
00171 
00172 Stomp::Stomp(const StompConfiguration& config,TaskPtr task):
00173     config_(config),
00174     task_(task)
00175 {
00176 
00177   resetVariables();
00178 
00179 }
00180 
00181 bool Stomp::clear()
00182 {
00183   return resetVariables();
00184 }
00185 
00186 void Stomp::setConfig(const StompConfiguration& config)
00187 {
00188   config_ = config;
00189   resetVariables();
00190 }
00191 
00192 bool Stomp::solve(const std::vector<double>& first,const std::vector<double>& last,
00193                   Eigen::MatrixXd& parameters_optimized)
00194 {
00195   // initialize trajectory
00196   if(!computeInitialTrajectory(first,last))
00197   {
00198     ROS_ERROR("Unable to generate initial trajectory");
00199   }
00200 
00201   return solve(parameters_optimized_,parameters_optimized);
00202 }
00203 
00204 bool Stomp::solve(const Eigen::VectorXd& first,const Eigen::VectorXd& last,
00205                   Eigen::MatrixXd& parameters_optimized)
00206 {
00207   // converting to std vectors
00208   std::vector<double> start(first.size());
00209   std::vector<double> end(last.size());
00210 
00211   Eigen::VectorXd::Map(&start[0],first.size()) = first;
00212   Eigen::VectorXd::Map(&end[0],last.size()) = last;
00213 
00214   return solve(start,end,parameters_optimized);
00215 }
00216 
00217 bool Stomp::solve(const Eigen::MatrixXd& initial_parameters,
00218                   Eigen::MatrixXd& parameters_optimized)
00219 {
00220   if(parameters_optimized_.isZero())
00221   {
00222     parameters_optimized_ = initial_parameters;
00223   }
00224 
00225   // check initial trajectory size
00226   if(initial_parameters.rows() != config_.num_dimensions || initial_parameters.cols() != config_.num_timesteps)
00227   {
00228     ROS_ERROR("Initial trajectory dimensions is incorrect");
00229     return false;
00230   }
00231   else
00232   {
00233     if(initial_parameters.cols() != config_.num_timesteps)
00234     {
00235       ROS_ERROR("Initial trajectory number of time steps is incorrect");
00236       return false;
00237     }
00238   }
00239 
00240   current_iteration_ = 1;
00241   unsigned int valid_iterations = 0;
00242   current_lowest_cost_ = std::numeric_limits<double>::max();
00243 
00244   // computing initialial trajectory cost
00245   if(!computeOptimizedCost())
00246   {
00247     ROS_ERROR("Failed to calculate initial trajectory cost");
00248     return false;
00249   }
00250 
00251   while(current_iteration_ <= config_.num_iterations && runSingleIteration())
00252   {
00253 
00254     ROS_DEBUG("STOMP completed iteration %i with cost %f",current_iteration_,current_lowest_cost_);
00255 
00256 
00257     if(parameters_valid_)
00258     {
00259       ROS_DEBUG("Found valid solution, will iterate %i more time(s) ",
00260                config_.num_iterations_after_valid - valid_iterations);
00261 
00262       valid_iterations++;
00263     }
00264     else
00265     {
00266       valid_iterations = 0;
00267     }
00268 
00269     if(valid_iterations > config_.num_iterations_after_valid)
00270     {
00271       break;
00272     }
00273 
00274     current_iteration_++;
00275   }
00276 
00277   if(parameters_valid_)
00278   {
00279     ROS_INFO("STOMP found a valid solution with cost %f after %i iterations",
00280              current_lowest_cost_,current_iteration_);
00281   }
00282   else
00283   {
00284     if (proceed_)
00285       ROS_ERROR("STOMP failed to find a valid solution after %i iterations",current_iteration_);
00286     else
00287       ROS_ERROR_STREAM("Stomp was terminated");
00288   }
00289 
00290   parameters_optimized = parameters_optimized_;
00291 
00292   // notifying task
00293   task_->done(parameters_valid_,current_iteration_,current_lowest_cost_,parameters_optimized);
00294 
00295   return parameters_valid_;
00296 }
00297 
00298 bool Stomp::resetVariables()
00299 {
00300   proceed_= true;
00301   parameters_total_cost_ = 0;
00302   parameters_valid_ = false;
00303   num_active_rollouts_ = 0;
00304   current_iteration_ = 0;
00305 
00306   // verifying configuration
00307   if(config_.max_rollouts <= config_.num_rollouts)
00308   {
00309     ROS_DEBUG_STREAM("'max_rollouts' must be greater than 'num_rollouts_per_iteration'.");
00310     config_.max_rollouts = config_.num_rollouts + 1; // one more to accommodate optimized trajectory
00311   }
00312 
00313   // noisy rollouts allocation
00314   int d = config_.num_dimensions;
00315   num_active_rollouts_ = 0;
00316   noisy_rollouts_.resize(config_.max_rollouts);
00317   reused_rollouts_.resize(config_.max_rollouts);
00318 
00319   // initializing rollout
00320   Rollout rollout;
00321   rollout.noise.resize(d, config_.num_timesteps);
00322   rollout.noise.setZero();
00323 
00324   rollout.parameters_noise.resize(d, config_.num_timesteps);
00325   rollout.parameters_noise.setZero();
00326 
00327   rollout.probabilities.resize(d, config_.num_timesteps);
00328   rollout.probabilities.setZero();
00329 
00330   rollout.full_probabilities.clear();
00331   rollout.full_probabilities.resize(d);
00332 
00333   rollout.full_costs.clear();
00334   rollout.full_costs.resize(d);
00335 
00336   rollout.control_costs.resize(d, config_.num_timesteps);
00337   rollout.control_costs.setZero();
00338 
00339   rollout.total_costs.resize(d, config_.num_timesteps);
00340   rollout.total_costs.setZero();
00341 
00342   rollout.state_costs.resize(config_.num_timesteps);
00343   rollout.state_costs.setZero();
00344 
00345   rollout.importance_weight = DEFAULT_NOISY_COST_IMPORTANCE_WEIGHT;
00346 
00347   for(unsigned int r = 0; r < config_.max_rollouts ; r++)
00348   {
00349     noisy_rollouts_[r] = rollout;
00350     reused_rollouts_[r] = rollout;
00351   }
00352 
00353   // parameter updates
00354   parameters_updates_.resize(d, config_.num_timesteps);
00355   parameters_updates_.setZero();
00356 
00357   parameters_control_costs_.resize(d, config_.num_timesteps);
00358   parameters_control_costs_.setZero();
00359 
00360   parameters_state_costs_.resize(config_.num_timesteps);
00361   parameters_state_costs_.setZero();
00362 
00363   parameters_optimized_.resize(config_.num_dimensions,config_.num_timesteps);
00364   parameters_optimized_.setZero();
00365 
00366   // generate finite difference matrix
00367   start_index_padded_ = FINITE_DIFF_RULE_LENGTH-1;
00368   num_timesteps_padded_ = config_.num_timesteps + 2*(FINITE_DIFF_RULE_LENGTH-1);
00369   generateFiniteDifferenceMatrix(num_timesteps_padded_,DerivativeOrders::STOMP_ACCELERATION,
00370                                  config_.delta_t,finite_diff_matrix_A_padded_);
00371 
00372   /* control cost matrix (R = A_transpose * A):
00373    * Note: Original code multiplies the A product by the time interval.  However this is not
00374    * what was described in the literature
00375    */
00376   control_cost_matrix_R_padded_ = config_.delta_t*finite_diff_matrix_A_padded_.transpose() * finite_diff_matrix_A_padded_;
00377   control_cost_matrix_R_ = control_cost_matrix_R_padded_.block(
00378       start_index_padded_,start_index_padded_,config_.num_timesteps,config_.num_timesteps);
00379   inv_control_cost_matrix_R_ = control_cost_matrix_R_.fullPivLu().inverse();
00380 
00381   /*
00382    * Applying scale factor to ensure that max(R^-1)==1
00383    */
00384   double maxVal = std::abs(inv_control_cost_matrix_R_.maxCoeff());
00385   control_cost_matrix_R_padded_ *= maxVal;
00386   control_cost_matrix_R_ *= maxVal;
00387   inv_control_cost_matrix_R_ /= maxVal; // used in computing the minimum control cost initial trajectory
00388 
00389   return true;
00390 }
00391 
00392 bool Stomp::computeInitialTrajectory(const std::vector<double>& first,const std::vector<double>& last)
00393 {
00394   bool valid = true;
00395 
00396   switch(config_.initialization_method)
00397   {
00398     case TrajectoryInitializations::CUBIC_POLYNOMIAL_INTERPOLATION:
00399 
00400       computeCubicInterpolation(first,last,config_.num_timesteps,config_.delta_t,parameters_optimized_);
00401       break;
00402     case TrajectoryInitializations::LINEAR_INTERPOLATION:
00403 
00404       computeLinearInterpolation(first,last,config_.num_timesteps,parameters_optimized_);
00405       break;
00406     case TrajectoryInitializations::MININUM_CONTROL_COST:
00407 
00408       valid = computeMinCostTrajectory(first,last,control_cost_matrix_R_padded_,inv_control_cost_matrix_R_,parameters_optimized_);
00409       break;
00410   }
00411 
00412   return valid;
00413 }
00414 
00415 bool Stomp::cancel()
00416 {
00417   ROS_WARN("Interrupting STOMP");
00418   proceed_ = false;
00419   return !proceed_;
00420 }
00421 
00422 bool Stomp::runSingleIteration()
00423 {
00424   if(!proceed_)
00425   {
00426     return false;
00427   }
00428 
00429   bool proceed = generateNoisyRollouts() &&
00430       computeNoisyRolloutsCosts() &&
00431       filterNoisyRollouts() &&
00432       computeProbabilities() &&
00433       updateParameters() &&
00434       computeOptimizedCost();
00435 
00436   // notifying end of iteration
00437   task_->postIteration(0,config_.num_timesteps,current_iteration_,current_lowest_cost_,parameters_optimized_);
00438 
00439   return proceed;
00440 }
00441 
00442 bool Stomp::generateNoisyRollouts()
00443 {
00444   // calculating number of rollouts to reuse from previous iteration
00445   std::vector< std::pair<double,int> > rollout_cost_sorter; // Used to sort noisy trajectories in ascending order wrt their total cost
00446   double h = config_.exponentiated_cost_sensitivity;
00447   int rollouts_stored = num_active_rollouts_-1; // don't take the optimized rollout into account
00448   rollouts_stored = rollouts_stored < 0 ? 0 : rollouts_stored;
00449   int rollouts_generate = config_.num_rollouts;
00450   int rollouts_total = rollouts_generate + rollouts_stored +1;
00451   int rollouts_reuse =  rollouts_total < config_.max_rollouts  ? rollouts_stored :  config_.max_rollouts - (rollouts_generate + 1) ; // +1 for optimized params
00452 
00453   // selecting least costly rollouts from previous iteration
00454   if(rollouts_reuse > 0)
00455   {
00456     // find min and max cost for exponential cost scaling
00457     double min_cost = std::numeric_limits<double>::max();
00458     double max_cost = std::numeric_limits<double>::min();
00459     for (int r=1; r<rollouts_stored; ++r)
00460     {
00461       double c = noisy_rollouts_[r].total_cost;
00462       if (c < min_cost)
00463         min_cost = c;
00464       if (c > max_cost)
00465         max_cost = c;
00466     }
00467 
00468     double cost_denom = max_cost - min_cost;
00469     if (cost_denom < 1e-8)
00470       cost_denom = 1e-8;
00471 
00472     // compute weighted cost on all rollouts
00473     double cost_prob;
00474     double weighted_prob;
00475     for (auto r = 0u; r<rollouts_stored; ++r)
00476     {
00477 
00478       // Apply noise generated on the previous iteration onto the current trajectory
00479       noisy_rollouts_[r].noise = noisy_rollouts_[r].parameters_noise
00480           - parameters_optimized_;
00481 
00482       cost_prob = exp(-h*(noisy_rollouts_[r].total_cost - min_cost)/cost_denom);
00483       weighted_prob = cost_prob * noisy_rollouts_[r].importance_weight;
00484       rollout_cost_sorter.push_back(std::make_pair(-weighted_prob,r));
00485     }
00486 
00487 
00488     std::sort(rollout_cost_sorter.begin(), rollout_cost_sorter.end());
00489 
00490     // use the best ones: (copy them into reused_rollouts)
00491     for (auto r = 0u; r<rollouts_stored; ++r)
00492     {
00493       int reuse_index = rollout_cost_sorter[r].second;
00494       reused_rollouts_[r] = noisy_rollouts_[reuse_index];
00495     }
00496 
00497     // copy them back from reused_rollouts_ into rollouts_
00498     for (auto r = 0u; r<rollouts_reuse; ++r)
00499     {
00500       noisy_rollouts_[rollouts_generate + r ] = reused_rollouts_[r];
00501     }
00502   }
00503 
00504   // adding optimized trajectory as the last rollout
00505   noisy_rollouts_[rollouts_generate + rollouts_reuse].parameters_noise = parameters_optimized_;
00506   noisy_rollouts_[rollouts_generate + rollouts_reuse].noise.setZero();
00507   noisy_rollouts_[rollouts_generate + rollouts_reuse].state_costs = parameters_state_costs_;
00508   noisy_rollouts_[rollouts_generate + rollouts_reuse].control_costs = parameters_control_costs_;
00509 
00510 
00511   // generate new noisy rollouts
00512   for(auto r = 0u; r < rollouts_generate; r++)
00513   {
00514     if(!task_->generateNoisyParameters(parameters_optimized_,
00515                                       0,config_.num_timesteps,
00516                                       current_iteration_,r,
00517                                       noisy_rollouts_[r].parameters_noise,
00518                                       noisy_rollouts_[r].noise))
00519     {
00520       ROS_ERROR("Failed to generate noisy parameters at iteration %i",current_iteration_);
00521       return false;
00522     }
00523 
00524   }
00525 
00526   // update total active rollouts
00527   num_active_rollouts_ = rollouts_reuse + rollouts_generate + 1;
00528 
00529   return true;
00530 }
00531 
00532 bool Stomp::filterNoisyRollouts()
00533 {
00534   // apply post noise generation filters
00535   bool filtered = false;
00536   for(auto r = 0u ; r < config_.num_rollouts; r++)
00537   {
00538     if(!task_->filterNoisyParameters(0,config_.num_timesteps,current_iteration_,r,noisy_rollouts_[r].parameters_noise,filtered))
00539     {
00540       ROS_ERROR_STREAM("Failed to filter noisy parameters");
00541       return false;
00542     }
00543 
00544     if(filtered)
00545     {
00546       noisy_rollouts_[r].noise = noisy_rollouts_[r].parameters_noise - parameters_optimized_;
00547     }
00548   }
00549 
00550   return true;
00551 }
00552 
00553 bool Stomp::computeNoisyRolloutsCosts()
00554 {
00555   // computing state and control costs
00556   bool valid = computeRolloutsStateCosts() && computeRolloutsControlCosts();
00557 
00558   if(valid)
00559   {
00560     // compute total costs
00561     double total_state_cost ;
00562     double total_control_cost;
00563 
00564     for(auto r = 0u ; r < num_active_rollouts_;r++)
00565     {
00566       Rollout& rollout = noisy_rollouts_[r];
00567       total_state_cost = rollout.state_costs.sum();
00568 
00569       // Compute control + state cost for each joint
00570       total_control_cost = 0;
00571       double ccost = 0;
00572       for(auto d = 0u; d < config_.num_dimensions; d++)
00573       {
00574         ccost = rollout.control_costs.row(d).sum();
00575         total_control_cost += ccost;
00576         rollout.full_costs[d] = ccost + total_state_cost;
00577       }
00578       rollout.total_cost = total_state_cost + total_control_cost;
00579 
00580       // Compute total cost for each time step
00581       for(auto d = 0u; d < config_.num_dimensions; d++)
00582       {
00583         rollout.total_costs.row(d) = rollout.state_costs.transpose() + rollout.control_costs.row(d);
00584       }
00585     }
00586   }
00587 
00588   return valid;
00589 }
00590 
00591 bool Stomp::computeRolloutsStateCosts()
00592 {
00593 
00594   bool all_valid = true;
00595   bool proceed = true;
00596   for(auto r = 0u ; r < config_.num_rollouts; r++)
00597   {
00598     if(!proceed_)
00599     {
00600       proceed = false;
00601       break;
00602     }
00603 
00604     Rollout& rollout = noisy_rollouts_[r];
00605     if(!task_->computeNoisyCosts(rollout.parameters_noise,0,
00606                             config_.num_timesteps,
00607                             current_iteration_,r,
00608                             rollout.state_costs,all_valid))
00609     {
00610       ROS_ERROR("Trajectory cost computation failed for rollout %i.",r);
00611       proceed = false;
00612       break;
00613     }
00614   }
00615 
00616   return proceed;
00617 }
00618 bool Stomp::computeRolloutsControlCosts()
00619 {
00620   Eigen::ArrayXXd Ax; // accelerations
00621   for(auto r = 0u ; r < num_active_rollouts_; r++)
00622   {
00623     Rollout& rollout = noisy_rollouts_[r];
00624 
00625     if(config_.control_cost_weight < MIN_CONTROL_COST_WEIGHT)
00626     {
00627       for(auto d = 0u; d < config_.num_dimensions; d++)
00628       {
00629         rollout.control_costs.row(d).setConstant(0);
00630       }
00631     }
00632     else
00633     {
00634       computeParametersControlCosts(rollout.parameters_noise,
00635                                     config_.delta_t,
00636                                     config_.control_cost_weight,
00637                                     control_cost_matrix_R_,rollout.control_costs);
00638     }
00639   }
00640   return true;
00641 }
00642 
00643 bool Stomp::computeProbabilities()
00644 {
00645 
00646   double cost;
00647   double min_cost;
00648   double max_cost;
00649   double denom;
00650   double numerator;
00651   double probl_sum = 0.0; // total probability sum of all rollouts for each joint
00652   const double h = config_.exponentiated_cost_sensitivity;
00653   double exponent = 0;
00654 
00655   for (auto d = 0u; d<config_.num_dimensions; ++d)
00656   {
00657 
00658     for (auto t = 0u; t<config_.num_timesteps; t++)
00659     {
00660 
00661       // find min and max cost over all rollouts at timestep 't':
00662       min_cost = noisy_rollouts_[0].total_costs(d,t);
00663       max_cost = min_cost;
00664       for (auto r=0u; r<num_active_rollouts_; ++r)
00665       {
00666           cost = noisy_rollouts_[r].total_costs(d,t);
00667           if (cost < min_cost)
00668               min_cost = cost;
00669           if (cost > max_cost)
00670               max_cost = cost;
00671       }
00672 
00673       denom = max_cost - min_cost;
00674 
00675       // prevent division by zero:
00676       if (denom < MIN_COST_DIFFERENCE)
00677       {
00678         denom = MIN_COST_DIFFERENCE;
00679       }
00680 
00681       probl_sum = 0.0;
00682       for (auto r = 0u; r<num_active_rollouts_; ++r)
00683       {
00684         // this is the exponential term in the probability calculation described in the literature
00685         exponent = -h*(noisy_rollouts_[r].total_costs(d,t) - min_cost)/denom;
00686         noisy_rollouts_[r].probabilities(d,t) = noisy_rollouts_[r].importance_weight *
00687             exp(exponent);
00688 
00689         probl_sum += noisy_rollouts_[r].probabilities(d,t);
00690       }
00691 
00692       // scaling each probability value by the sum of all probabilities corresponding to all rollouts at time "t"
00693       for (auto r = 0u; r<num_active_rollouts_; ++r)
00694       {
00695         noisy_rollouts_[r].probabilities(d,t) /= probl_sum;
00696       }
00697     }
00698 
00699 
00700     // computing full probabilities
00701     min_cost = noisy_rollouts_[0].full_costs[d];
00702     max_cost = min_cost;
00703     double c = 0.0;
00704     for (int r=1; r<num_active_rollouts_; ++r)
00705     {
00706       c = noisy_rollouts_[r].full_costs[d];
00707       if (c < min_cost)
00708         min_cost = c;
00709       if (c > max_cost)
00710         max_cost = c;
00711     }
00712 
00713     denom = max_cost - min_cost;
00714     denom = denom < MIN_COST_DIFFERENCE ? MIN_COST_DIFFERENCE : denom;
00715 
00716     probl_sum = 0.0;
00717     for (int r=0; r<num_active_rollouts_; ++r)
00718     {
00719       noisy_rollouts_[r].full_probabilities[d] = noisy_rollouts_[r].importance_weight *
00720           exp(-h*(noisy_rollouts_[r].full_costs[d] - min_cost)/denom);
00721       probl_sum += noisy_rollouts_[r].full_probabilities[d];
00722     }
00723     for (int r=0; r<num_active_rollouts_; ++r)
00724     {
00725         noisy_rollouts_[r].full_probabilities[d] /= probl_sum;
00726     }
00727   }
00728 
00729   return true;
00730 }
00731 
00732 bool Stomp::updateParameters()
00733 {
00734   // computing updates from probabilities using convex combination
00735   parameters_updates_.setZero();
00736   for(auto d = 0u; d < config_.num_dimensions ; d++)
00737   {
00738 
00739     for(auto r = 0u; r < num_active_rollouts_; r++)
00740     {
00741       auto& rollout = noisy_rollouts_[r];
00742       parameters_updates_.row(d) +=  (rollout.noise.row(d).array() * rollout.probabilities.row(d).array()).matrix();
00743     }
00744 
00745   }
00746 
00747   // filtering updates
00748   if(!task_->filterParameterUpdates(0,config_.num_timesteps,current_iteration_,parameters_optimized_,parameters_updates_))
00749   {
00750     ROS_ERROR("Updates filtering step failed");
00751     return false;
00752   }
00753 
00754   // updating parameters
00755   parameters_optimized_ += parameters_updates_;
00756 
00757   return true;
00758 }
00759 
00760 bool Stomp::computeOptimizedCost()
00761 {
00762 
00763   // control costs
00764   parameters_total_cost_ = 0;
00765   if(config_.control_cost_weight > MIN_CONTROL_COST_WEIGHT)
00766   {
00767     computeParametersControlCosts(parameters_optimized_,
00768                                   config_.delta_t,
00769                                   config_.control_cost_weight,
00770                                   control_cost_matrix_R_,
00771                                   parameters_control_costs_);
00772 
00773     // adding all costs
00774     parameters_total_cost_ = parameters_control_costs_.rowwise().sum().sum();
00775 
00776   }
00777 
00778   // state costs
00779   if(task_->computeCosts(parameters_optimized_,
00780                          0,config_.num_timesteps,current_iteration_,parameters_state_costs_,parameters_valid_))
00781   {
00782 
00783 
00784     parameters_total_cost_ += parameters_state_costs_.sum();
00785   }
00786   else
00787   {
00788     return false;
00789   }
00790 
00791   if(current_lowest_cost_ > parameters_total_cost_)
00792   {
00793     current_lowest_cost_ = parameters_total_cost_;
00794   }
00795   else
00796   {
00797     // reverting updates as no improvement was made
00798     parameters_optimized_ -= parameters_updates_;
00799   }
00800 
00801   return true;
00802 }
00803 
00804 } /* namespace stomp */