fourrooms.cc
Go to the documentation of this file.
00001 #include <rl_env/fourrooms.hh>
00002 
00003 /*
00004 FourRooms::FourRooms(Random &rand, const Gridworld *gridworld, bool stochastic):
00005   grid(gridworld), goal(coord_t(2.,2.)), noisy(stochastic), rng(rand),
00006   s(2),
00007   ns(s[0]),
00008   ew(s[1])
00009 {
00010   randomize_goal();
00011   reset();
00012 }
00013 */
00014 
00015  
00016 FourRooms::FourRooms(Random &rand):
00017   grid(create_default_map()),
00018   goal(coord_t(1.,10.)), 
00019   negReward(true),
00020   noisy(false),
00021   extraReward(false),
00022   rewardSensor(false),
00023   rng(rand),
00024   doorway(coord_t(2.,4.)),
00025   s(2),
00026   unused(6),
00027   ns(s[0]),
00028   ew(s[1]),
00029   distN(unused[0]),
00030   distS(unused[1]),
00031   distE(unused[2]),
00032   distW(unused[3]),
00033   rewardEW(unused[4]),
00034   rewardNS(unused[5]),
00035   goalOption(false)
00036 {
00037   reset();
00038   //cout << *this << endl;
00039 }
00040  
00041 
00042 FourRooms::FourRooms(Random &rand, bool stochastic, bool negReward, 
00043                      bool exReward):
00044   grid(create_default_map()),
00045   goal(coord_t(1.,10.)), 
00046   negReward(negReward),
00047   noisy(stochastic),
00048   extraReward(exReward),
00049   rewardSensor(false),
00050   rng(rand),
00051   doorway(coord_t(2.,4.)),
00052   s(2),
00053   unused(6),
00054   ns(s[0]),
00055   ew(s[1]),
00056   distN(unused[0]),
00057   distS(unused[1]),
00058   distE(unused[2]),
00059   distW(unused[3]),
00060   rewardEW(unused[4]),
00061   rewardNS(unused[5]),
00062   goalOption(goalOption)
00063 {
00064   reset();
00065 }
00066 
00067 // Create the version with extra state features for wall distances
00068 FourRooms::FourRooms(Random &rand, bool stochastic, bool negReward):
00069   grid(create_default_map()),
00070   goal(coord_t(1.,10.)), 
00071   negReward(negReward),
00072   noisy(stochastic),
00073   extraReward(true), //false),
00074   rewardSensor(false),
00075   rng(rand),
00076   doorway(coord_t(2.,4.)),
00077   s(6),
00078   unused(2),
00079   ns(s[0]),
00080   ew(s[1]),
00081   distN(s[2]),
00082   distS(s[3]),
00083   distE(s[4]),
00084   distW(s[5]),
00085   rewardEW(unused[0]),
00086   rewardNS(unused[1]),
00087   goalOption(false)
00088 {
00089   reset();
00090 }
00091 
00092 
00093 // Create the version with extra state features for wall distances and 
00094 // reward distance
00095 FourRooms::FourRooms(Random &rand, bool stochastic):
00096   grid(create_default_map()),
00097   goal(coord_t(1.,10.)),
00098   negReward(true),
00099   noisy(stochastic),
00100   extraReward(false),
00101   rewardSensor(false),
00102   rng(rand),
00103   doorway(coord_t(2.,4.)),
00104   s(8),
00105   unused(0),
00106   ns(s[0]),
00107   ew(s[1]),
00108   distN(s[2]),
00109   distS(s[3]),
00110   distE(s[4]),
00111   distW(s[5]),
00112   rewardEW(s[6]),
00113   rewardNS(s[7]),
00114   goalOption(false)
00115 {
00116   //  cout <<  "Four room with wall dist and reward sensor" << endl;
00117   reset();
00118 }
00119 
00120 
00121 
00122 /*
00123 FourRooms::FourRooms(Random &rand, unsigned width, unsigned height, bool stochastic):
00124   grid(new Gridworld(height, width, rand)),
00125   goal(coord_t(2.,2.)), 
00126   noisy(stochastic), rng(rand),
00127   doorway(NULL), 
00128   s(2),
00129   ns(s[0]),
00130   ew(s[1])
00131 {
00132   randomize_goal();
00133   reset();
00134 }
00135 */
00136 
00137 FourRooms::~FourRooms() { delete grid; }
00138 
00139 const std::vector<float> &FourRooms::sensation() const { 
00140   //cout << "At state " << s[0] << ", " << s[1] << endl;
00141 
00142   return s; 
00143 }
00144 
00145 float FourRooms::apply(int action) {
00146 
00147   //cout << "Taking action " << static_cast<room_action_t>(action) << endl;
00148 
00149   const room_action_t effect =
00150     noisy
00151     ? add_noise(static_cast<room_action_t>(action)) 
00152     : static_cast<room_action_t>(action);
00153   switch(effect) {
00154   case NORTH:
00155     if (!grid->wall(static_cast<unsigned>(ns),
00156                     static_cast<unsigned>(ew),
00157                     effect))
00158       {
00159         ++ns;
00160         calcWallDistances();
00161       }
00162     return reward(effect);
00163   case SOUTH:
00164     if (!grid->wall(static_cast<unsigned>(ns),
00165                     static_cast<unsigned>(ew),
00166                     effect))
00167       {
00168         --ns;
00169         calcWallDistances();
00170       }
00171     return reward(effect);
00172   case EAST:
00173     if (!grid->wall(static_cast<unsigned>(ns),
00174                     static_cast<unsigned>(ew),
00175                     effect))
00176       {
00177         ++ew;
00178         calcWallDistances();
00179       }
00180     return reward(effect);
00181   case WEST:
00182     if (!grid->wall(static_cast<unsigned>(ns),
00183                     static_cast<unsigned>(ew),
00184                     effect))
00185       {
00186         --ew;
00187         calcWallDistances();
00188       }
00189     return reward(effect);
00190   }
00191   std::cerr << "Unreachable point reached in FourRooms::apply!!!\n";
00192   return 0; // unreachable, I hope
00193 }
00194 
00195 
00196 float FourRooms::reward(int effect) {
00197   
00198   if (extraReward){
00199     // 0 on goal
00200     if (terminal())
00201       return 0;
00202 
00203     // 0 when heading right dir towards door
00204     // towards top middle door
00205     if (ew < 6 && ns == 8 && effect == EAST){
00206       return -1;
00207     }
00208 
00209     // towards left door
00210     if (ew == 1 && ns > 4 && effect == SOUTH){
00211       return -1;
00212     }
00213 
00214     // towards right door
00215     if (ew == 8 && ns > 3 && effect == SOUTH){
00216       return -1;
00217     }
00218 
00219     // towrads bottom door
00220     if (ew < 6 && ns == 1 && effect == EAST){
00221       return -1;
00222     }
00223 
00224     // 0 when heading towards goal
00225     if (ns == 1 && effect == EAST){
00226       return -1;
00227     }
00228     if (ew == 10 && ns < 4 && effect == SOUTH){
00229       return -1;
00230     }
00231 
00232     // normally -2
00233     return -2;
00234 
00235   }
00236 
00237 
00238   if (negReward){
00239     // normally -1 and 0 on goal
00240     if (terminal())
00241       return 0;
00242     else 
00243       return -1;
00244     
00245   }else{
00246 
00247     // or we could do 0 and 1 on goal
00248     if (terminal())
00249       return 1;
00250     else 
00251       return 0;
00252   }
00253 }
00254 
00255 
00256 bool FourRooms::terminal() const {
00257   // current position equal to goal??
00258   return coord_t(ns,ew) == goal;
00259 }
00260 
00261 
00262 void FourRooms::calcWallDistances(){
00263 
00264   // calculate distances East and West
00265   // if we're not in the same row as a doorway
00266   if (ns != 1 && ns != 8){
00267     // left side of wall
00268     if (ew < 5){
00269       distW = ew;
00270       distE = 4 - ew;
00271     }
00272     // right side of wall
00273     else {
00274       distW = ew - 6;
00275       distE = 10 - ew;
00276     }
00277   } 
00278   // doorway
00279   else {
00280     distW = ew;
00281     distE = 10 - ew;
00282   }
00283 
00284   // in a vertical doorway
00285   if (ns == 5 && ew == 1){
00286     distW = 0;
00287     distE = 0;
00288   }
00289   if (ns == 4 && ew == 8){
00290     distW = 0;
00291     distE = 0;
00292   }
00293 
00294   // calculate NS
00295   // left side
00296   if (ew < 5){
00297     // not in doorway column
00298     if (ew != 1){
00299       // top room
00300       if (ns > 5){
00301         distN = 10 - ns;
00302         distS = ns - 6;
00303       }
00304       // bottom room
00305       else {
00306         distN = 4 - ns;
00307         distS = ns;
00308       }
00309     }
00310     // doorway column
00311     else {
00312       distN = 10 - ns;
00313       distS = ns;
00314     }
00315   }
00316   // right side
00317   else {
00318     // not in doorway column
00319     if (ew != 8){
00320       // top room
00321       if (ns > 4){
00322         distN = 10-ns;
00323         distS = ns - 5;
00324       }
00325       // bottom room
00326       else {
00327         distN = 3 - ns;
00328         distS = ns;
00329       }
00330     }
00331     // doorway column
00332     else {
00333       distN = 10-ns;
00334       distS = ns;
00335     }
00336   }
00337 
00338   // in horiz doorway
00339   if (ew == 5 && (ns == 1 || ns == 8)){
00340     distN = 0;
00341     distS = 0;
00342   }
00343 
00344 
00345   // calculate reward distances
00346   // can see it e/w
00347   if (ns == 1){
00348     rewardEW = 10 - ew;
00349   }
00350   else {
00351     rewardEW = 100;
00352   }
00353 
00354   // can see ns
00355   if (ew == 10 && ns < 4){
00356     rewardNS = 1 - ns;
00357   }
00358   else {
00359     rewardNS = 100;
00360   }
00361   
00362   /*
00363   cout << "x,y: " << ew << ", " << ns << " N,S,E,W: " 
00364        << distN << ", " << distS << ", " 
00365        << distE << ", " << distW << " reward EW, NS: " 
00366        << rewardEW << ", " << rewardNS << endl;
00367   */
00368 }
00369 
00370 
00371 void FourRooms::reset() {
00372   // start randomly in upper left room (goal is lower right)
00373   ns = rng.uniformDiscrete(6, grid->height()-1);
00374   ew = rng.uniformDiscrete(0, 4);
00375 
00376   //ns = 8;
00377   //ew = 2;
00378 
00379   //ns = 4;
00380   //ew = 9;
00381 
00382   calcWallDistances();
00383 }
00384 
00385 
00386 std::vector<std::vector<float> >  FourRooms::getSubgoals(){
00387 
00388   //cout << "Getting room subgoals " << endl;
00389 
00390   // Create vector of state representations, each is a subgoal
00391   std::vector<std::vector<float> > subgoals;
00392 
00393   
00394   std::vector<float> subgoal(2);
00395 
00396   // between two left rooms
00397   subgoal[0] = 5;
00398   subgoal[1] = 1;
00399   subgoals.push_back(subgoal);
00400   
00401   // between two right rooms
00402   subgoal[0] = 4;
00403   subgoal[1] = 8;
00404   subgoals.push_back(subgoal);
00405   
00406   // between two top rooms
00407   subgoal[0] = 8;
00408   subgoal[1] = 5;
00409   subgoals.push_back(subgoal);
00410   
00411   // between two lower rooms
00412   subgoal[0] = 1;
00413   subgoal[1] = 5;
00414   subgoals.push_back(subgoal);
00415 
00416   if (goalOption){
00417     // actual goal
00418     subgoal[0] = 1;
00419     subgoal[1] = 10;
00420     subgoals.push_back(subgoal);
00421   }
00422 
00423   return subgoals;
00424 
00425 }
00426 
00427 
00428 int FourRooms::getNumActions(){
00429   return 4;
00430 }
00431 
00432 
00433 std::ostream &operator<<(std::ostream &out, const FourRooms &rooms) {
00434   out << "Map:\n" << *rooms.grid;
00435 
00436   // print goal
00437   out << "Goal: row " << rooms.goal.first
00438       << ", column " << rooms.goal.second << "\n";
00439 
00440   // print doorway
00441   out << "Doorway: row " << rooms.doorway.first
00442       << ", column " << rooms.doorway.second << "\n";
00443 
00444   return out;
00445 }
00446 
00447 const Gridworld *FourRooms::create_default_map() {
00448   int width = 11;
00449   int height = 11;
00450   std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00451   std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00452 
00453   // put the vertical wall between the two rooms
00454   for (int j = 0; j < height; j++){
00455         // skip doorways at 1 and 8
00456         if (j == 1 || j == 8)
00457                 continue;
00458     ewv[j][4] = true;
00459     ewv[j][5] = true;
00460   }
00461   
00462   nsv[5][0] = true;
00463   nsv[5][1] = true;
00464   nsv[5][7] = true;
00465   nsv[5][8] = true;
00466 
00467   // put the horizontal wall for the left room
00468   for (int i = 0; i < 6; i++){
00469         // skip doorway at 1
00470         if (i == 1)
00471                 continue;
00472         nsv[i][4] = true;
00473         nsv[i][5] = true;
00474   }
00475 
00476   ewv[5][0] = true;
00477   ewv[5][1] = true;
00478 
00479   // put the horizontal wall for the right room
00480   for (int i = 5; i < width; i++){
00481         // skip doorway at 8
00482         if (i == 8)
00483                 continue;
00484         nsv[i][3] = true;
00485         nsv[i][4] = true;
00486   }     
00487   
00488   ewv[4][7] = true;
00489   ewv[4][8] = true;
00490 
00491   return new Gridworld(height, width, nsv, ewv);
00492 }
00493 
00494 FourRooms::room_action_t FourRooms::add_noise(room_action_t action) {
00495   switch(action) {
00496   case NORTH:
00497   case SOUTH:
00498     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00499   case EAST:
00500   case WEST:
00501     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00502   default:
00503     return action;
00504   }
00505 }
00506 
00507 
00508 void FourRooms::randomize_goal() {
00509   const unsigned n = grid->height() * grid->width();
00510   unsigned index = rng.uniformDiscrete(1,n) - 1;
00511   goal = coord_t(index / grid->width(), index % grid->width());
00512 }
00513 
00514 
00516 void FourRooms::setSensation(std::vector<float> newS){
00517   if (s.size() != newS.size()){
00518     cerr << "Error in sensation sizes" << endl;
00519   }
00520 
00521   for (unsigned i = 0; i < newS.size(); i++){
00522     s[i] = (int)newS[i];
00523   }
00524 }
00525 
00526 
00527 void FourRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00528                                   std::vector<float> *maxFeat){
00529   
00530   minFeat->resize(s.size(), 0.0);
00531   maxFeat->resize(s.size(), 10.0);
00532   
00533   if (s.size() > 2) {
00534     for (unsigned i = 2; i < s.size(); i++){
00535       (*minFeat)[i] = -10.0;
00536     }
00537   }
00538 
00539 }
00540 
00541 void FourRooms::getMinMaxReward(float *minR,
00542                                float *maxR){
00543   
00544   if (extraReward){
00545     *minR = -2.0;
00546     *maxR = 0.0;
00547   }
00548   else if (negReward){
00549     *minR = -1.0;
00550     *maxR = 0.0;    
00551   }else{
00552     *minR = 0.0;
00553     *maxR = 1.0;
00554   }
00555 
00556 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:23