rl_env: MountainCar.cc Source File

Go to the documentation of this file.
00001 
00007 #include <rl_env/MountainCar.hh>
00008 
00009  
00010 MountainCar::MountainCar(Random &rand):
00011   noisy(false),
00012   rng(rand),
00013   s(2),
00014   pos(s[0]),
00015   vel(s[1]),
00016   linear(false),
00017   delay(0)
00018 {
00019   reset();
00020   //cout << *this << endl;
00021 }
00022  
00023 
00024 MountainCar::MountainCar(Random &rand, bool stochastic, bool lin, int delay):
00025   noisy(stochastic),
00026   rng(rand),
00027   s(2),
00028   pos(s[0]),
00029   vel(s[1]),
00030   linear(lin),
00031   delay(delay)
00032 {
00033   reset();
00034 }
00035 
00036 
00037 MountainCar::~MountainCar() { }
00038 
00039 const std::vector<float> &MountainCar::sensation() const { 
00040   //cout << "At state " << s[0] << ", " << s[1] << endl;
00041 
00042   return s; 
00043 }
00044 
00045 float MountainCar::apply(int action) {
00046 
00047   //cout << "Taking action " << action << endl;
00048 
00049   float actVal = ((float)action-1.0);
00050   if (noisy){
00051     actVal += rng.uniform(-0.5, 0.5);
00052   }
00053 
00054   float newVel = vel;
00055   if (linear){
00056     // for now, make this linear
00057     newVel = vel + 0.001 * actVal + -0.0075*pos;
00058   } else {
00059     newVel = vel + 0.001 * actVal + -0.0025*cos(3.0*pos);
00060   }
00061 
00062   newVel = bound(newVel, -0.07, 0.07);
00063 
00064   float newPos = pos + vel;
00065   if (newPos < -1.2f && newVel < 0.0f)
00066     newVel = 0.0;
00067   newPos = bound(newPos, -1.2, 0.6);
00068 
00069   pos = newPos;
00070   vel = newVel;
00071 
00072   if (delay > 0){
00073     posHistory.push_back(newPos);
00074     pos = posHistory.front();
00075     posHistory.pop_front();
00076     velHistory.push_back(newVel);
00077     vel = velHistory.front();
00078     velHistory.pop_front();
00079     //    cout << "new pos: " << newPos << " observed: " << pos << endl;
00080     //cout << "new vel: " << newVel << " observed: " << vel << endl;
00081   }
00082 
00083   return reward();
00084 
00085 }
00086 
00087 float MountainCar::bound(float val, float min, float max){
00088   if (val < min)
00089     return min;
00090   if (val > max)
00091     return max;
00092   return val;
00093 }
00094 
00095 
00096 float MountainCar::reward() {
00097   
00098   // normally -1 and 0 on goal
00099   if (terminal())
00100     return 0;
00101   else 
00102     return -1;
00103   
00104 }
00105 
00106 
00107 bool MountainCar::terminal() const {
00108   // current position equal to goal??
00109   return (pos >= 0.6); 
00110 }
00111 
00112 
00113 
00114 void MountainCar::reset() {
00115 
00116   if (noisy){
00117     pos = rng.uniform(-1.2, 0.59);
00118     vel = rng.uniform(-0.07, 0.07);
00119   } else {
00120     pos = 0;
00121     vel = 0;
00122   }
00123 
00124   pos = rng.uniform(-1.2, 0.59);
00125   vel = rng.uniform(-0.07, 0.07);
00126 
00127   if (delay > 0){
00128     posHistory.clear();
00129     velHistory.clear();
00130     for (int i = 0; i < delay; i++){
00131       posHistory.push_back(pos);
00132       velHistory.push_back(vel);
00133     }
00134   }
00135 
00136 }
00137 
00138 
00139 int MountainCar::getNumActions(){
00140   return 3;
00141 }
00142 
00143 
00144 void MountainCar::setSensation(std::vector<float> newS){
00145   if (s.size() != newS.size()){
00146     cerr << "Error in sensation sizes" << endl;
00147   }
00148 
00149   for (unsigned i = 0; i < newS.size(); i++){
00150     s[i] = newS[i];
00151   }
00152 }
00153 
00154 std::vector<experience> MountainCar::getSeedings() {
00155 
00156   int origDelay = delay;
00157   delay = 0;
00158 
00159   // return seedings
00160   std::vector<experience> seeds;
00161 
00162   // two seeds of terminal state
00163   seeds.push_back(getExp(0.58, 0.03, 2));
00164   //seeds.push_back(getExp(0.57, 0.06, 2));
00165 
00166   // random seed of each action
00167   for (int i = 0; i < getNumActions(); i++){
00168     float p = rng.uniform(-1.2, 0.6);
00169     float v = rng.uniform(-0.07, 0.07);
00170     seeds.push_back(getExp(p, v, i));
00171   }
00172 
00173   delay = origDelay;
00174 
00175   reset();
00176 
00177   return seeds;
00178 
00179 }
00180 
00181 experience MountainCar::getExp(float s0, float s1, int a){
00182 
00183   experience e;
00184 
00185   e.s.resize(2, 0.0);
00186   e.next.resize(2, 0.0);
00187 
00188   pos = s0;
00189   vel = s1;
00190 
00191   e.act = a;
00192   e.s = sensation();
00193   e.reward = apply(e.act);
00194 
00195   e.terminal = terminal();
00196   e.next = sensation();
00197 
00198   reset();
00199 
00200   return e;
00201 }
00202 
00203 
00204 void MountainCar::getMinMaxFeatures(std::vector<float> *minFeat,
00205                                     std::vector<float> *maxFeat){
00206   
00207   minFeat->resize(s.size(), 0.0);
00208   maxFeat->resize(s.size(), 1.0);
00209 
00210   (*minFeat)[0] = -1.2;
00211   (*maxFeat)[0] = 0.6;
00212 
00213   (*minFeat)[1] = -0.07;
00214   (*maxFeat)[1] = 0.07;
00215 
00216 }
00217 
00218 void MountainCar::getMinMaxReward(float *minR,
00219                               float *maxR){
00220   
00221   *minR = -1.0;
00222   *maxR = 0.0;    
00223   
00224 }