energyrooms.cc
Go to the documentation of this file.
00001 #include <rl_env/energyrooms.hh>
00002 
00003 /*
00004   EnergyRooms::EnergyRooms(Random &rand, const Gridworld *gridworld, bool stochastic):
00005   grid(gridworld), goal(coord_t(2.,2.)), noisy(stochastic), rng(rand),
00006   s(2),
00007   ns(s[0]),
00008   ew(s[1])
00009   {
00010   randomize_goal();
00011   reset();
00012   }
00013 */
00014 
00015 EnergyRooms::EnergyRooms(Random &rand, bool negReward):
00016   grid(create_default_map()),
00017   goal(coord_t(1.,10.)),
00018   negReward(negReward),
00019   noisy(false),
00020   rng(rand),
00021   doorway(coord_t(2.,4.)),
00022   s(3),
00023   ns(s[0]),
00024   ew(s[1]),
00025   energy(s[2]),
00026   goalOption(false),
00027   fuel(false)
00028 {
00029   reset();
00030   //cout << *this << endl;
00031 }
00032 
00033 EnergyRooms::EnergyRooms(Random &rand, bool stochastic, bool negReward,
00034                          bool goalOption):
00035   grid(create_default_map()),
00036   goal(coord_t(1.,10.)),
00037   negReward(negReward),
00038   noisy(stochastic),
00039   rng(rand),
00040   doorway(coord_t(2.,4.)),
00041   s(3),
00042   ns(s[0]),
00043   ew(s[1]),
00044   energy(s[2]),
00045   goalOption(goalOption),
00046   fuel(false)
00047 {
00048   reset();
00049 }
00050 
00051 EnergyRooms::EnergyRooms(Random &rand, bool stochastic, bool negReward,
00052                          bool goalOption, bool fuel):
00053   grid(create_default_map()),
00054   goal(coord_t(1.,10.)),
00055   negReward(negReward),
00056   noisy(stochastic),
00057   rng(rand),
00058   doorway(coord_t(2.,4.)),
00059   s(3),
00060   ns(s[0]),
00061   ew(s[1]),
00062   energy(s[2]),
00063   goalOption(goalOption),
00064   fuel(fuel)
00065 {
00066   reset();
00067 }
00068 
00069 
00070 /*
00071   EnergyRooms::EnergyRooms(Random &rand, unsigned width, unsigned height, bool stochastic):
00072   grid(new Gridworld(height, width, rand)),
00073   goal(coord_t(2.,2.)),
00074   noisy(stochastic), rng(rand),
00075   doorway(NULL),
00076   s(2),
00077   ns(s[0]),
00078   ew(s[1])
00079   {
00080   randomize_goal();
00081   reset();
00082   }
00083 */
00084 
00085 EnergyRooms::~EnergyRooms() { delete grid; }
00086 
00087 const std::vector<float> &EnergyRooms::sensation() const {
00088   //cout << "At state " << s[0] << ", " << s[1] << endl;
00089 
00090   return s;
00091 }
00092 
00093 float EnergyRooms::apply(int action) {
00094 
00095   //cout << "Taking action " << static_cast<room_action_t>(action) << endl;
00096 
00097   // 80% of the time, lose one energy
00098   if (!noisy || rng.bernoulli(0.8))
00099     energy--;
00100 
00101   // many fuel squares, with varying amounts of fuel and reward
00102   if (fuel){
00103     if ((int)ns % 3 == 0 && (int)ew % 3 == 0){
00104       energy += (int)(5 + ns/3 + (11-ew)/3);
00105     }
00106     if (energy > 20.0)
00107       energy = 20.0;
00108   }
00109   else {
00110     // certain squares reset you to 10
00111     if (ns == 7 && ew == 3)
00112       energy = 10;
00113     if (ns == 7 && ew == 7)
00114       energy = 10;
00115     if (ns == 3 && ew == 3)
00116       energy = 10;
00117     if (ns == 7 && ew == 7)
00118       energy = 10;
00119   }
00120 
00121   // never go below 0
00122   if (energy < 0.0)
00123     energy = 0;
00124 
00125 
00126   const room_action_t effect =
00127     noisy
00128     ? add_noise(static_cast<room_action_t>(action))
00129     : static_cast<room_action_t>(action);
00130   switch(effect) {
00131   case NORTH:
00132     if (!grid->wall(static_cast<unsigned>(ns),
00133                     static_cast<unsigned>(ew),
00134                     effect))
00135       {
00136         ++ns;
00137       }
00138     return reward();
00139   case SOUTH:
00140     if (!grid->wall(static_cast<unsigned>(ns),
00141                     static_cast<unsigned>(ew),
00142                     effect))
00143       {
00144         --ns;
00145       }
00146     return reward();
00147   case EAST:
00148     if (!grid->wall(static_cast<unsigned>(ns),
00149                     static_cast<unsigned>(ew),
00150                     effect))
00151       {
00152         ++ew;
00153       }
00154     return reward();
00155   case WEST:
00156     if (!grid->wall(static_cast<unsigned>(ns),
00157                     static_cast<unsigned>(ew),
00158                     effect))
00159       {
00160         --ew;
00161       }
00162     return reward();
00163   }
00164 
00165   std::cerr << "Unreachable point reached in EnergyRooms::apply!!!\n";
00166   return 0; // unreachable, I hope
00167 }
00168 
00169 
00170 float EnergyRooms::reward() {
00171 
00172   if (negReward){
00173     // normally -1 and 0 on goal
00174     if (terminal())
00175       return 0;
00176     else if (energy <= 0.1){
00177       if (fuel)
00178         return -10;
00179       else
00180         return -2;
00181     }
00182     // normal square and normal energy
00183     else{
00184       // many squares of random cost
00185       if (fuel){
00186         if ((int)ns % 3 == 0 && (int)ew % 3 == 0)
00187           return -2 + (int)(-ew/5 -((int)ns%4));
00188 
00189         else
00190           return -1;
00191       }
00192       else
00193         return -1;
00194     }
00195 
00196   } // not negReward
00197   else{
00198 
00199     // or we could do 0 and 1 on goal
00200     if (terminal())
00201       return 1;
00202     else if (energy <= 0.1){
00203       if (fuel)
00204         return -10;
00205       else
00206         return -2;
00207     }else
00208       return 0;
00209   }
00210 }
00211 
00212 
00213 bool EnergyRooms::terminal() const {
00214   // current position equal to goal??
00215   return coord_t(ns,ew) == goal;
00216 }
00217 
00218 
00219 void EnergyRooms::reset() {
00220   // start randomly in upper left room (goal is lower right)
00221   ns = rng.uniformDiscrete(6, grid->height()-1);
00222   ew = rng.uniformDiscrete(0, 4);
00223   energy = 10;
00224 
00225   // if fuel, start with random amount of fuel
00226   if (fuel) {
00227     energy = rng.uniformDiscrete(1, 20);
00228   }
00229 
00230   //ns = 8;
00231   //ew = 2;
00232 
00233   //ns = 4;
00234   //ew = 9;
00235 }
00236 
00237 
00238 std::vector<std::vector<float> >  EnergyRooms::getSubgoals(){
00239 
00240   //cout << "Getting room subgoals " << endl;
00241 
00242   // Create vector of state representations, each is a subgoal
00243   std::vector<std::vector<float> > subgoals;
00244 
00245 
00246   std::vector<float> subgoal(2);
00247 
00248   // between two left rooms
00249   subgoal[0] = 5;
00250   subgoal[1] = 1;
00251   subgoals.push_back(subgoal);
00252 
00253   // between two right rooms
00254   subgoal[0] = 4;
00255   subgoal[1] = 8;
00256   subgoals.push_back(subgoal);
00257 
00258   // between two top rooms
00259   subgoal[0] = 8;
00260   subgoal[1] = 5;
00261   subgoals.push_back(subgoal);
00262 
00263   // between two lower rooms
00264   subgoal[0] = 1;
00265   subgoal[1] = 5;
00266   subgoals.push_back(subgoal);
00267 
00268   if (goalOption){
00269     // actual goal
00270     subgoal[0] = 1;
00271     subgoal[1] = 10;
00272     subgoals.push_back(subgoal);
00273   }
00274 
00275   return subgoals;
00276 
00277 }
00278 
00279 
00280 int EnergyRooms::getNumActions(){
00281   return 4;
00282 }
00283 
00284 
00285 std::ostream &operator<<(std::ostream &out, const EnergyRooms &rooms) {
00286   out << "Map:\n" << *rooms.grid;
00287 
00288   // print goal
00289   out << "Goal: row " << rooms.goal.first
00290       << ", column " << rooms.goal.second << "\n";
00291 
00292   // print doorway
00293   out << "Doorway: row " << rooms.doorway.first
00294       << ", column " << rooms.doorway.second << "\n";
00295 
00296   return out;
00297 }
00298 
00299 const Gridworld *EnergyRooms::create_default_map() {
00300   int width = 11;
00301   int height = 11;
00302   std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00303   std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00304 
00305   // put the vertical wall between the two rooms
00306   for (int j = 0; j < height; j++){
00307     // skip doorways at 1 and 8
00308     if (j == 1 || j == 8)
00309       continue;
00310     ewv[j][4] = true;
00311     ewv[j][5] = true;
00312   }
00313 
00314   nsv[5][0] = true;
00315   nsv[5][1] = true;
00316   nsv[5][7] = true;
00317   nsv[5][8] = true;
00318 
00319   // put the horizontal wall for the left room
00320   for (int i = 0; i < 6; i++){
00321     // skip doorway at 1
00322     if (i == 1)
00323       continue;
00324     nsv[i][4] = true;
00325     nsv[i][5] = true;
00326   }
00327 
00328   ewv[5][0] = true;
00329   ewv[5][1] = true;
00330 
00331   // put the horizontal wall for the right room
00332   for (int i = 5; i < width; i++){
00333     // skip doorway at 8
00334     if (i == 8)
00335       continue;
00336     nsv[i][3] = true;
00337     nsv[i][4] = true;
00338   }
00339 
00340   ewv[4][7] = true;
00341   ewv[4][8] = true;
00342 
00343   return new Gridworld(height, width, nsv, ewv);
00344 }
00345 
00346 EnergyRooms::room_action_t EnergyRooms::add_noise(room_action_t action) {
00347   switch(action) {
00348   case NORTH:
00349   case SOUTH:
00350     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00351   case EAST:
00352   case WEST:
00353     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00354   default:
00355     return action;
00356   }
00357 }
00358 
00359 
00360 void EnergyRooms::randomize_goal() {
00361   const unsigned n = grid->height() * grid->width();
00362   unsigned index = rng.uniformDiscrete(1,n) - 1;
00363   goal = coord_t(index / grid->width(), index % grid->width());
00364 }
00365 
00366 
00367 void EnergyRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00368                                     std::vector<float> *maxFeat){
00369 
00370   minFeat->resize(s.size(), 0.0);
00371   maxFeat->resize(s.size(), 10.0);
00372 
00373   (*maxFeat)[2] = 20.0;
00374 
00375 }
00376 
00377 void EnergyRooms::getMinMaxReward(float *minR,
00378                                  float *maxR){
00379 
00380   *minR = -10.0;
00381   *maxR = 1.0;
00382 
00383 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:23