tworooms.cc
Go to the documentation of this file.
00001 
00007 #include <rl_env/tworooms.hh>
00008 
00009 
00010 TwoRooms::TwoRooms(Random &rand, bool stochastic, bool rewardType, 
00011                    int actDelay, bool multiGoal):
00012   grid(create_default_map()),
00013   goal(coord_t(1.,1.)), 
00014   goal2(coord_t(4.,1.)),
00015   negReward(rewardType),
00016   noisy(stochastic),
00017   actDelay(actDelay),
00018   multiGoal(multiGoal),
00019   rng(rand),
00020   doorway(coord_t(2.,5.)),
00021   s(2),
00022   ns(s[0]),
00023   ew(s[1])
00024 {
00025   reset();
00026 }
00027 
00028 
00029 TwoRooms::~TwoRooms() { delete grid; }
00030 
00031 const std::vector<float> &TwoRooms::sensation() const { 
00032   //cout << "At state " << s[0] << ", " << s[1] << endl;
00033 
00034   return s; 
00035 }
00036 
00037 float TwoRooms::apply(int action) {
00038 
00039   //cout << "Taking action " << static_cast<room_action_t>(action) << endl;
00040 
00041   int actUsed = action;
00042 
00043   if (actDelay > 0){
00044     actUsed = actHistory.front();
00045     actHistory.push_back(action);
00046     actHistory.pop_front();
00047   }
00048 
00049   if (actUsed > -1){
00050 
00051     const room_action_t effect =
00052       noisy
00053       ? add_noise(static_cast<room_action_t>(actUsed)) 
00054       : static_cast<room_action_t>(actUsed);
00055     switch(effect) {
00056     case NORTH:
00057       if (!grid->wall(static_cast<unsigned>(ns),
00058                       static_cast<unsigned>(ew),
00059                       effect))
00060         {
00061           ++ns;
00062         }
00063       return reward();
00064     case SOUTH:
00065       if (!grid->wall(static_cast<unsigned>(ns),
00066                       static_cast<unsigned>(ew),
00067                       effect))
00068         {
00069           --ns;
00070         }
00071       return reward();
00072     case EAST:
00073       if (!grid->wall(static_cast<unsigned>(ns),
00074                       static_cast<unsigned>(ew),
00075                       effect))
00076         {
00077           ++ew;
00078         }
00079       return reward();
00080     case WEST:
00081       if (!grid->wall(static_cast<unsigned>(ns),
00082                       static_cast<unsigned>(ew),
00083                       effect))
00084         {
00085           --ew;
00086         }
00087       return reward();
00088     }
00089 
00090     std::cerr << "Unreachable point reached in TwoRooms::apply!!!\n";
00091   }
00092   
00093   return 0; 
00094 }
00095 
00096 // return the reward for this move
00097 float TwoRooms::reward() {
00098 
00099   /*
00100   if (coord_t(ns,ew) == goal2)
00101     cout << "At goal 2, " << useGoal2 << endl;
00102   if (coord_t(ns,ew) == goal)
00103     cout << "At goal 1, " << !useGoal2 << endl;
00104   */
00105 
00106   if (negReward){
00107     // normally -1 and 0 on goal
00108     if (terminal())
00109       return 0;
00110     else 
00111       return -1;
00112     
00113   }else{
00114 
00115     // or we could do 0 and 1 on goal
00116     if (terminal())
00117       return 1;
00118     else 
00119       return 0;
00120   }
00121 }
00122 
00123 
00124 
00125 bool TwoRooms::terminal() const {
00126   // current position equal to goal??
00127   if (useGoal2)
00128     return coord_t(ns,ew) == goal2;
00129   else
00130     return coord_t(ns,ew) == goal;
00131 }
00132 
00133 
00134 void TwoRooms::reset() {
00135   // start randomly in right room
00136   ns = rng.uniformDiscrete(0, grid->height() - 1 );
00137   ew = rng.uniformDiscrete(6, grid->width() - 1);
00138 
00139   // a history of no_acts
00140   actHistory.clear();
00141   actHistory.assign(actDelay, -1);
00142 
00143   if (multiGoal){
00144     useGoal2 = rng.bernoulli(0.5);
00145     //cout << "goal2? " << useGoal2 << endl;
00146   }
00147   else {
00148     useGoal2 = false;
00149   }
00150 
00151   //ns = 4;
00152   //ew = 9;
00153 }
00154 
00155 
00156 
00157 int TwoRooms::getNumActions(){
00158   return 4;
00159 }
00160 
00161 
00162 const Gridworld *TwoRooms::create_default_map() {
00163   int width = 11;
00164   int height = 5;
00165   std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00166   std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00167 
00168   // put the wall between the two rooms
00169   for (int j = 0; j < height; j++){
00170     // skip doorway
00171     if (j == 2)
00172       continue;
00173     ewv[j][4] = true;
00174     ewv[j][5] = true;
00175   }
00176 
00177   nsv[5][1] = true;
00178   nsv[5][2] = true;
00179 
00180   // add a doorway
00181   doorway = coord_t(2, 5);
00182 
00183   return new Gridworld(height, width, nsv, ewv);
00184 }
00185 
00186 TwoRooms::room_action_t TwoRooms::add_noise(room_action_t action) {
00187   switch(action) {
00188   case NORTH:
00189   case SOUTH:
00190     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00191   case EAST:
00192   case WEST:
00193     return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00194   default:
00195     return action;
00196   }
00197 }
00198 
00199 
00200 void TwoRooms::randomize_goal() {
00201   const unsigned n = grid->height() * grid->width();
00202   unsigned index = rng.uniformDiscrete(1,n) - 1;
00203   goal = coord_t(index / grid->width(), index % grid->width());
00204 }
00205 
00206 
00207 void TwoRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00208                                  std::vector<float> *maxFeat){
00209   
00210   minFeat->resize(s.size(), 0.0);
00211   maxFeat->resize(s.size(), 10.0);
00212 
00213   (*maxFeat)[0] = 5.0;
00214 
00215 }
00216 
00217 void TwoRooms::getMinMaxReward(float *minR,
00218                               float *maxR){
00219   if (negReward){
00220     *minR = -1.0;
00221     *maxR = 0.0;    
00222   }else{
00223     *minR = 0.0;
00224     *maxR = 1.0;
00225   }
00226 }
00227 
00228 
00229 std::vector<experience> TwoRooms::getSeedings() {
00230 
00231   // return seedings
00232   std::vector<experience> seeds;
00233 
00234   //if (true)
00235   // return seeds;
00236   // REMOVE THIS TO USE SEEDINGS
00237 
00238   // single seed of terminal state
00239   useGoal2 = false;
00240   actHistory.clear();
00241   actHistory.assign(actDelay, SOUTH);
00242   seeds.push_back(getExp(2,1,SOUTH));
00243   
00244   // possible seed of 2nd goal
00245   if (multiGoal){
00246     useGoal2 = true;
00247     actHistory.clear();
00248     actHistory.assign(actDelay, NORTH);
00249     seeds.push_back(getExp(3,1,NORTH));
00250   }
00251 
00252   // single seed of doorway
00253   actHistory.clear();
00254   actHistory.assign(actDelay, WEST);
00255   seeds.push_back(getExp(2,6,WEST));
00256 
00257   reset();
00258 
00259   return seeds;
00260 
00261 }
00262 
00263 experience TwoRooms::getExp(float s0, float s1, int a){
00264 
00265   experience e;
00266 
00267   e.s.resize(2, 0.0);
00268   e.next.resize(2, 0.0);
00269 
00270   ns = s0;
00271   ew = s1;
00272 
00273   e.act = a;
00274   e.s = sensation();
00275   e.reward = apply(e.act);
00276 
00277   e.terminal = terminal();
00278   e.next = sensation();
00279 
00280   /*
00281   cout << "Seed from " << e.s[0] << "," << e.s[1] << " a: " << e.act
00282        << " r: " << e.reward << " term: " << e.terminal << endl;
00283   */
00284 
00285   reset();
00286 
00287   return e;
00288 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:24