RobotCarVel.cc
Go to the documentation of this file.
00001 
00011 #include <rl_env/RobotCarVel.hh>
00012 
00013 // normal: true values of each
00014 RobotCarVel::RobotCarVel(Random &rand, bool randomVel, bool upVel, bool tenToSix, bool lag):
00015   rng(rand),
00016   s(4),
00017   hidden(2),
00018   targetVel(s[0]),
00019   currVel(s[1]),
00020   trueThrottle(hidden[0]),
00021   trueBrake(hidden[1]),
00022   throttleTarget(s[2]),
00023   brakeTarget(s[3]),
00024   randomVel(randomVel),
00025   upVel(upVel),
00026   tenToSix(tenToSix),
00027   lag(lag)
00028 {
00029   reset();
00030 }
00031  
00032 
00033 
00034 RobotCarVel::~RobotCarVel() { }
00035 
00036 const std::vector<float> &RobotCarVel::sensation() const { 
00037   return s; 
00038 }
00039 
00040 float RobotCarVel::apply(int action) {
00041 
00042   float HZ = 10.0;
00043 
00044   actNum++;
00045 
00046   // figure out reward based on target/curr vel
00047   float reward = -10.0 * fabs(currVel - targetVel);
00048 
00049   float throttleChangePct = 1.0;//0.9; //1.0;
00050   float brakeChangePct = 1.0;//0.9; //1.0;
00051   if (lag){
00052     brakeChangePct = brakePosVel / HZ;
00053     float brakeVelTarget = 3.0*(brakeTarget - trueBrake);
00054     brakePosVel += (brakeVelTarget - brakePosVel) * 3.0 / HZ;
00055     trueBrake += brakeChangePct;
00056   } else {
00057     trueBrake += (brakeTarget-trueBrake) * brakeChangePct;
00058   }
00059   trueBrake = bound(trueBrake, 0.0, 1.0);
00060 
00061   // figure out the change of true brake/throttle position based on last targets
00062   trueThrottle += (throttleTarget-trueThrottle) * throttleChangePct;
00063   trueThrottle = bound(trueThrottle, 0.0, 0.4);
00064 
00065   // figure out new velocity based on those positions 
00066   // from the stage simulation
00067   float g = 9.81;         // acceleration due to gravity
00068   float throttle_accel = g;
00069   float brake_decel = g;
00070   float rolling_resistance = 0.01 * g;
00071   float drag_coeff = 0.01;
00072   float idle_accel = (rolling_resistance
00073                       + drag_coeff * 3.1 * 3.1);
00074   float wind_resistance = drag_coeff * currVel * currVel;
00075   float accel = (idle_accel
00076                   + trueThrottle * throttle_accel
00077                   - trueBrake * brake_decel
00078                   - rolling_resistance
00079                   - wind_resistance);
00080   currVel += (accel / HZ);
00081   currVel = bound(currVel, 0.0, 12.0);
00082   
00083 
00084   // figure out action's adjustment to throttle/brake targets
00085   if (action == THROTTLE_UP){
00086     brakeTarget = 0.0;
00087     if (throttleTarget < 0.4)
00088       throttleTarget += 0.1;
00089   }
00090   else if (action == THROTTLE_DOWN){
00091     brakeTarget = 0.0;
00092     if (throttleTarget > 0.0)
00093       throttleTarget -= 0.1;
00094   }
00095   else if (action == BRAKE_UP){
00096     throttleTarget = 0.0;
00097     if (brakeTarget < 1.0)
00098       brakeTarget += 0.1;
00099   }
00100   else if (action == BRAKE_DOWN){
00101     throttleTarget = 0.0;
00102     if (brakeTarget > 0.0)
00103       brakeTarget -= 0.1;
00104   }
00105   else if (action != NOTHING){
00106     cout << "invalid action " << action << endl;
00107   }
00108   throttleTarget = bound(throttleTarget, 0.0, 0.4);
00109   brakeTarget = bound(brakeTarget, 0.0, 1.0);
00110   throttleTarget = 0.1 * (float)((int)(throttleTarget*10.0));
00111   brakeTarget = 0.1 * (float)((int)(brakeTarget*10.0));
00112   
00113   /*
00114   cout << action << ", throt: " << throttleTarget << ", brake: " << brakeTarget
00115        << ", trueBrake: " << trueBrake 
00116        << ", currVel: " << currVel << ", reward: " << reward << endl;
00117   */
00118 
00119   return reward;
00120 
00121 }
00122 
00123 
00124 bool RobotCarVel::terminal() const {
00125   return false;
00126 }
00127 
00128 
00129 
00130 void RobotCarVel::reset() {
00131 
00132   // for now
00133   if (randomVel){
00134     targetVel = rng.uniformDiscrete(0, 11);
00135     currVel = rng.uniformDiscrete(0, 11);
00136   } else {
00137     if (tenToSix){ // 10 to 6
00138       if (upVel){
00139         targetVel = 10.0;
00140         currVel = 6.0;
00141       } else {
00142         targetVel = 6.0;
00143         currVel = 10.0;
00144       } 
00145     } else { // 7 to 2
00146       if (upVel){
00147         targetVel = 7.0;
00148         currVel = 2.0;
00149       } else {
00150         targetVel = 2.0;
00151         currVel = 7.0;
00152       } 
00153     }
00154   }
00155 
00156   actNum = 0;
00157   throttleTarget = rng.uniformDiscrete(0,4) * 0.1;
00158   brakeTarget = 0.0;
00159   trueThrottle = throttleTarget;
00160   trueBrake = brakeTarget;
00161   brakePosVel = 0.0;
00162 
00163 }
00164 
00165 
00166 
00167 int RobotCarVel::getNumActions(){
00168   return 5;
00169 }
00170 
00171 
00172 void RobotCarVel::setSensation(std::vector<float> newS){
00173   if (s.size() != newS.size()){
00174     cerr << "Error in sensation sizes" << endl;
00175   }
00176 
00177   for (unsigned i = 0; i < newS.size(); i++){
00178     s[i] = newS[i];
00179   }
00180 }
00181 
00182 std::vector<experience> RobotCarVel::getSeedings() {
00183 
00184   // return seedings
00185   std::vector<experience> seeds;
00186   //return seeds;
00187 
00188   
00189   reset();
00190 
00191   /*
00192   // seeds of perfect velocity
00193   if (randomVel){
00194     for (int i = 0; i < 12; i++){
00195       // 3 random of each
00196       for (int j = 0; j < 3; j++)
00197         seeds.push_back(getRandomVelSeed(i));
00198     } 
00199   } else {
00200     // just for target velocity
00201     // 5 random seeds
00202     for (int j = 0; j < 5; j++){
00203       seeds.push_back(getRandomVelSeed(targetVel));
00204     }
00205   }
00206   */
00207 
00208   
00209   // some completely random (non target)
00210   for (int i = 0; i < 25; i++){
00211     float vel = rng.uniform(0,11);
00212 
00213     float throt = 0;
00214     float brake = 0;
00215     if (rng.bernoulli(0.5)){
00216       throt = rng.uniformDiscrete(0,4)*0.1;
00217     } else {
00218       brake = rng.uniformDiscrete(0,9)*0.1;
00219     } 
00220     int act = i%getNumActions();
00221     seeds.push_back(getExp(targetVel,vel,throt,brake,act));
00222   }
00223 
00224   reset();
00225 
00226   return seeds;
00227 }
00228 
00229 experience RobotCarVel::getRandomVelSeed(float target){
00230   float throt = 0;
00231   float brake = 0;
00232   if (rng.bernoulli(0.5)){
00233     throt = rng.uniformDiscrete(0,4)*0.1;
00234   } else {
00235     brake = rng.uniformDiscrete(0,4)*0.1;
00236   } 
00237 
00238   return getExp(target, target, throt, brake, rng.uniformDiscrete(0,4));
00239 }
00240 
00241 experience RobotCarVel::getExp(float s0, float s1, float s2, float s3, int a){
00242 
00243   if (!randomVel) s0 = targetVel;
00244 
00245   experience e;
00246 
00247   e.s.resize(4, 0.0);
00248   e.next.resize(4, 0.0);
00249 
00250   targetVel = s0;
00251   currVel = s1;
00252   throttleTarget = s2;
00253   brakeTarget = s3;
00254   trueThrottle = throttleTarget;
00255   trueBrake = brakeTarget;
00256   brakePosVel = 0.0;
00257 
00258   e.act = a;
00259   e.s = sensation();
00260   e.reward = apply(e.act);
00261 
00262   e.terminal = terminal();
00263   e.next = sensation();
00264 
00265   /*
00266   cout << "seed from state: ";
00267   for (unsigned i = 0; i < e.s.size(); i++){
00268     cout << e.s[i] << ", ";
00269   } 
00270   cout << "act: " << e.act << " reward: " << e.reward << " next: ";
00271   for (unsigned i = 0; i < e.next.size(); i++){
00272     cout << e.next[i] << ", ";
00273   } 
00274   cout << endl;
00275   */
00276 
00277   reset();
00278 
00279   return e;
00280 }
00281 
00282 
00283 void RobotCarVel::getMinMaxFeatures(std::vector<float> *minFeat,
00284                                     std::vector<float> *maxFeat){
00285   
00286   minFeat->resize(s.size(), 0.0);
00287   maxFeat->resize(s.size(), 12.0);
00288 
00289   (*maxFeat)[2] = 0.4;
00290   (*maxFeat)[3] = 1.0;
00291 
00292 }
00293 
00294 void RobotCarVel::getMinMaxReward(float *minR,
00295                                   float *maxR){
00296   
00297   *minR = -120.0;
00298   *maxR = 0.0;    
00299   
00300 }
00301 
00302 float RobotCarVel::bound(float val, float min, float max){
00303   if (val < min)
00304     return min;
00305   if (val > max)
00306     return max;
00307   return val;
00308 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:24