taxi.cc
Go to the documentation of this file.
00001 
00008 #include <rl_env/taxi.hh>
00009 
00010 const Taxi::DefaultLandmarks Taxi::defaultlandmarks;
00011 
00012 Taxi::DefaultLandmarks::DefaultLandmarks() {
00013   push_back(value_type(4.,0.));
00014   push_back(value_type(0.,3.));
00015   push_back(value_type(4.,4.));
00016   push_back(value_type(0.,0.));
00017 }
00018 
00019 Taxi::Taxi(Random &rand, const Gridworld *gridworld, bool stochastic):
00020   grid(gridworld), landmarks(4), noisy(stochastic), rng(rand),
00021   s(4),
00022   ns(s[0]),
00023   ew(s[1]),
00024   pass(s[2]),
00025   dest(s[3])
00026 {
00027   randomize_landmarks_to_corners();
00028   reset();
00029 }
00030 
00031 Taxi::Taxi(Random &rand):
00032   grid(create_default_map()),
00033   landmarks(defaultlandmarks),
00034   noisy(false),
00035   rng(rand),
00036   s(4),
00037   ns(s[0]),
00038   ew(s[1]),
00039   pass(s[2]),
00040   dest(s[3])
00041 {
00042   reset();
00043 }
00044 
00045 Taxi::Taxi(Random &rand, bool stochastic):
00046   grid(create_default_map()),
00047   landmarks(defaultlandmarks),
00048   noisy(stochastic),
00049   rng(rand),
00050   s(4),
00051   ns(s[0]),
00052   ew(s[1]),
00053   pass(s[2]),
00054   dest(s[3])
00055 {
00056   reset();
00057 }
00058 
00059 Taxi::Taxi(Random &rand, unsigned width, unsigned height, bool stochastic):
00060   grid(new Gridworld(height, width, rand)),
00061   landmarks(4), noisy(stochastic), rng(rand),
00062   s(4),
00063   ns(s[0]),
00064   ew(s[1]),
00065   pass(s[2]),
00066   dest(s[3])
00067 {
00068   randomize_landmarks_to_corners();
00069   reset();
00070 }
00071 
00072 Taxi::~Taxi() { delete grid; }
00073 
00074 const std::vector<float> &Taxi::sensation() const { return s; }
00075 
00076 float Taxi::apply(int action) {
00077   const taxi_action_t effect =
00078     noisy
00079     ? add_noise(static_cast<taxi_action_t>(action))
00080     : static_cast<taxi_action_t>(action);
00081   switch(effect) {
00082   case NORTH:
00083     if (!grid->wall(static_cast<unsigned>(ns),
00084                     static_cast<unsigned>(ew),
00085                     effect))
00086       {
00087         ++ns;
00088         apply_fickle_passenger();
00089       }
00090     return -1;
00091   case SOUTH:
00092     if (!grid->wall(static_cast<unsigned>(ns),
00093                     static_cast<unsigned>(ew),
00094                     effect))
00095       {
00096         --ns;
00097         apply_fickle_passenger();
00098       }
00099     return -1;
00100   case EAST:
00101     if (!grid->wall(static_cast<unsigned>(ns),
00102                     static_cast<unsigned>(ew),
00103                     effect))
00104       {
00105         ++ew;
00106         apply_fickle_passenger();
00107       }
00108     return -1;
00109   case WEST:
00110     if (!grid->wall(static_cast<unsigned>(ns),
00111                     static_cast<unsigned>(ew),
00112                     effect))
00113       {
00114         --ew;
00115         apply_fickle_passenger();
00116       }
00117     return -1;
00118   case PICKUP: {
00119     if (pass < landmarks.size()
00120         && coord_t(ns,ew) == landmarks[static_cast<unsigned>(pass)])
00121       {
00122         pass = landmarks.size();
00123         fickle = noisy;
00124         return -1;
00125       } else
00126       return -10;
00127   }
00128   case PUTDOWN:
00129     if (pass == landmarks.size()
00130         && coord_t(ns,ew) == landmarks[static_cast<unsigned>(dest)]) {
00131       pass = dest;
00132       return 20;
00133     } else
00134       return -10;
00135   }
00136   std::cerr << "Unreachable point reached in Taxi::apply!!!\n";
00137   return 0; // unreachable, I hope
00138 }
00139 
00140 bool Taxi::terminal() const {
00141   return pass == dest;
00142 }
00143 
00144 void Taxi::reset() {
00145   ns = rng.uniformDiscrete(1, grid->height()) - 1;
00146   ew = rng.uniformDiscrete(1, grid->width()) - 1;
00147   pass = rng.uniformDiscrete(1, landmarks.size()) - 1;
00148   do dest = rng.uniformDiscrete(1, landmarks.size()) - 1;
00149   while (dest == pass);
00150   fickle = false;
00151 }
00152 
00153 
00154 
00155 int Taxi::getNumActions() {
00156   return 6;
00157 }
00158 
00159 
00160 const Gridworld *Taxi::create_default_map() {
00161   std::vector<std::vector<bool> > nsv(5, std::vector<bool>(4,false));
00162   std::vector<std::vector<bool> > ewv(5, std::vector<bool>(4,false));
00163   ewv[0][0] = true;
00164   ewv[0][2] = true;
00165   ewv[1][0] = true;
00166   ewv[1][2] = true;
00167   ewv[3][1] = true;
00168   ewv[4][1] = true;
00169   return new Gridworld(5,5,nsv,ewv);
00170 }
00171 
00172 Taxi::taxi_action_t Taxi::add_noise(taxi_action_t action) {
00173   switch(action) {
00174   case NORTH:
00175   case SOUTH:
00176     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00177   case EAST:
00178   case WEST:
00179     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00180   default:
00181     return action;
00182   }
00183 }
00184 
00185 void Taxi::apply_fickle_passenger() {
00186 
00187   if (fickle) {
00188     fickle = false;
00189     if (rng.bernoulli(0.3)) {
00190       dest += rng.uniformDiscrete(1, landmarks.size() - 1);
00191       dest = static_cast<int>(dest) % landmarks.size();
00192     }
00193   }
00194 
00195 }
00196 
00197 void Taxi::randomize_landmarks() {
00198   std::vector<unsigned> indices(landmarks.size());
00199   const unsigned n = grid->height() * grid->width();
00200   for (unsigned i = 0; i < indices.size(); ++i) {
00201     unsigned index;
00202     bool duplicate;
00203     do {
00204       index = rng.uniformDiscrete(1,n) - 1;
00205       duplicate = false;
00206       for (unsigned j = 0; j < i; ++j)
00207         if (index == indices[j])
00208           duplicate = true;
00209     } while (duplicate);
00210     indices[i] = index;
00211   }
00212   for (unsigned i = 0; i < indices.size(); ++i)
00213     landmarks[i] = coord_t(indices[i] / grid->width(),
00214                            indices[i] % grid->width());
00215 }
00216 
00217 void Taxi::randomize_landmarks_to_corners() {
00218   for (unsigned i = 0; i < landmarks.size(); ++i) {
00219     int ns = rng.uniformDiscrete(0,1);
00220     int ew = rng.uniformDiscrete(0,1);
00221     if (1 == i/2)
00222       ns = grid->height() - ns - 1;
00223     if (1 == i%2)
00224       ew = grid->width() - ew - 1;
00225     landmarks[i] = coord_t(ns,ew);
00226   }
00227 }
00228 
00229 
00230 void Taxi::setSensation(std::vector<float> newS){
00231   if (s.size() != newS.size()){
00232     cerr << "Error in sensation sizes" << endl;
00233   }
00234 
00235   for (unsigned i = 0; i < newS.size(); i++){
00236     s[i] = (int)newS[i];
00237   }
00238 }
00239 
00240 std::vector<experience> Taxi::getSeedings() {
00241 
00242   // return seedings
00243   std::vector<experience> seeds;
00244 
00245   if (true)
00246     return seeds;
00247   // REMOVE THIS TO USE SEEDINGS
00248 
00249   // single seed for each of 4 drop off and pickup cases
00250   for (int i = 0; i < 4; i++){
00251     // drop off
00252     seeds.push_back(getExp(landmarks[i].first, landmarks[i].second, 4, i, PUTDOWN));
00253     // pick up
00254     seeds.push_back(getExp(landmarks[i].first, landmarks[i].second, i, rng.uniformDiscrete(0,3), PICKUP));
00255   }
00256 
00257   reset();
00258 
00259   return seeds;
00260 
00261 }
00262 
00263 experience Taxi::getExp(float s0, float s1, float s2, float s3, int a){
00264 
00265   experience e;
00266 
00267   e.s.resize(4, 0.0);
00268   e.next.resize(4, 0.0);
00269 
00270   ns = s0;
00271   ew = s1;
00272   pass = s2;
00273   dest = s3;
00274 
00275   e.act = a;
00276   e.s = sensation();
00277   e.reward = apply(e.act);
00278 
00279   e.terminal = terminal();
00280   e.next = sensation();
00281 
00282   return e;
00283 }
00284 
00285 
00286 void Taxi::getMinMaxFeatures(std::vector<float> *minFeat,
00287                              std::vector<float> *maxFeat){
00288 
00289   minFeat->resize(s.size(), 0.0);
00290   maxFeat->resize(s.size(), 1.0);
00291 
00292   (*minFeat)[0] = 0.0;
00293   (*maxFeat)[0] = 4.0;
00294   (*minFeat)[1] = 0.0;
00295   (*maxFeat)[1] = 4.0;
00296   (*minFeat)[2] = 0.0;
00297   (*maxFeat)[2] = 4.0;
00298   (*minFeat)[3] = 0.0;
00299   (*maxFeat)[3] = 3.0;
00300 
00301 }
00302 
00303 void Taxi::getMinMaxReward(float *minR,
00304                            float *maxR){
00305 
00306   *minR = -10.0;
00307   *maxR = 20.0;
00308 
00309 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:24