FuelRooms.cc
Go to the documentation of this file.
00001 
00008 #include <rl_env/FuelRooms.hh>
00009 
00010 
00011 FuelRooms::FuelRooms(Random &rand, bool extraVariation, bool stoch):
00012   height(20), width(30),
00013   goal(coord_t(11.0,24.0)), 
00014   extraVar(extraVariation),
00015   noisy(stoch),
00016   rng(rand),
00017   s(3),
00018   ns(s[0]),
00019   ew(s[1]),
00020   energy(s[2])
00021 {
00022 
00023   fuelVisited = 0;
00024   totalVisited = 0;
00025 
00026   stateVisits = new int*[21];
00027   for (int i = 0; i < 21; i++){
00028     stateVisits[i] = new int[31];
00029   }
00030 
00031   reset();
00032   resetVisits();
00033 }
00034 
00035 
00036 
00037 FuelRooms::~FuelRooms() { 
00038   for (int i = 0; i < 21; i++){
00039     delete [] stateVisits[i];
00040   }
00041   delete [] stateVisits;
00042 }
00043 
00044 const std::vector<float> &FuelRooms::sensation() const { 
00045   return s; 
00046 }
00047 
00048 float FuelRooms::apply(int action) {
00049 
00050   checkVisits();
00051 
00052   if (terminal())
00053     return 0.0;
00054 
00055   //cout << "Taking action " << static_cast<room_action_t>(action) << endl;
00056 
00057   //cout << "state: " << s[0] << ", " << s[1] << ", " << s[2] 
00058   //    << " act: " << action << endl;
00059 
00060   // 20% lose none
00061   // 20% lose two
00062   // 80% of the time, lose one energy
00063   //  if (!noisy || rng.bernoulli(0.8))
00064   energy--;
00065 
00066   // many fuel squares, with varying amounts reward
00067   if ((int)ns == 0 || (int)ns == height){
00068     energy += 20.0;
00069   }
00070   
00071 
00072   if (energy > 60.0) 
00073     energy = 60.0; 
00074 
00075   const room_action_t effect =
00076     noisy
00077     ? add_noise(static_cast<room_action_t>(action)) 
00078     : static_cast<room_action_t>(action);
00079 
00080   float r = reward(effect);
00081 
00082   if (effect == NORTH || effect == NORTHWEST || effect == NORTHEAST)
00083     if (ns < height)
00084       ns++;
00085 
00086   if (effect == SOUTH || effect == SOUTHWEST || effect == SOUTHEAST)
00087     if (ns > 0)
00088       ns--;
00089 
00090   if (effect == EAST || effect == SOUTHEAST || effect == NORTHEAST)
00091     if (ew < width)
00092       ew++;
00093 
00094   if (effect == WEST || effect == SOUTHWEST || effect == NORTHWEST)
00095     if (ew > 0)
00096       ew--;
00097   
00098   return r;
00099   
00100   std::cerr << "Unreachable point reached in FuelRooms::apply!!!\n";
00101   return 0; // unreachable, I hope
00102 }
00103 
00104 
00105 
00106 float FuelRooms::reward(int effect) {
00107   
00108   if (energy < 0.0){
00109     return -400.0;
00110   }
00111 
00112   if (terminal()){
00113     //cout << "Found goal!!!!" << endl;
00114     return 0.0;
00115   }
00116 
00117   // extra cost at fuel stations
00118   /*
00119   if (ns == 0 || ns == height){
00120     return -5.0;
00121   }
00122   */
00123 
00124   if (ns == 0 || ns == height){
00125     float base = -10.0;
00126     if (ns == 0)
00127       base = -13.0;
00128   
00129     // extra variation
00130     float var = 1.0;
00131     if (extraVar)
00132       var = 5.0;
00133     else
00134       base -= 8.0;
00135 
00136     return base - (((int)ew % 5) * var);
00137       
00138   }
00139  
00140   if (effect == NORTH || effect == SOUTH || effect == EAST || effect == WEST)
00141     return -1.0;
00142   else
00143     return -1.4;
00144   
00145 }
00146 
00147 
00148 bool FuelRooms::terminal() const {
00149   // current position equal to goal??
00150   // or out of fuel
00151   return (coord_t(ns,ew) == goal) || (energy < 0.0);
00152 }
00153 
00154 
00155 void FuelRooms::reset() {
00156   // start randomly in left region
00157   ns = rng.uniformDiscrete(7, 12);
00158   ew = rng.uniformDiscrete(0, 4);
00159 
00160   // start with random amount of fuel
00161   // enough to get to gas stations, not enough to get to goal
00162   // gas stations up to 9 steps away, goal at least 20 steps away
00163   energy = rng.uniformDiscrete(14, 18);
00164 
00165 }
00166 
00167 
00168 void FuelRooms::resetVisits(){
00169   fuelVisited = 0;
00170   totalVisited = 0;
00171 
00172   for (int i = 0; i < 21; i++)
00173     for (int j = 0; j < 31; j++)
00174       stateVisits[i][j] = 0;
00175 }
00176 
00177 void FuelRooms::checkVisits(){
00178   stateVisits[(int)ns][(int)ew]++;
00179   // first visit to a state
00180   if (stateVisits[(int)ns][(int)ew] == 1){
00181     totalVisited++;
00182     if (ns == 0 || ns == 20)
00183       fuelVisited++;
00184   }
00185 }
00186 
00187 void FuelRooms::printVisits(){
00188   float totalStates = 31.0 * 21.0;
00189   float fuelStates = 31.0 * 2.0;
00190   float otherStates = totalStates - fuelStates;
00191   cout << (fuelVisited/fuelStates) << endl << ((totalVisited-fuelVisited)/otherStates) << endl << (totalVisited/totalStates) << endl;
00192 }
00193 
00194 void FuelRooms::printVisitMap(string filename){
00195  ofstream fout(filename.c_str());
00196   for (int i = 0; i < 21; i++){
00197     for (int j = 0; j < 31; j++){
00198       fout << stateVisits[i][j] << "\t";
00199     }
00200     fout << endl;
00201   }
00202   fout.close();
00203 }
00204 
00205 
00206 int FuelRooms::getNumActions(){
00207   return 8;
00208 }
00209 
00210 
00211 FuelRooms::room_action_t FuelRooms::add_noise(room_action_t action) {
00212 
00213   int newAct = rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? action+1 : action-1);
00214 
00215   if (newAct < 0)
00216     newAct = getNumActions()-1;
00217   if (newAct >= getNumActions())
00218     newAct = 0;
00219 
00220   return (room_action_t)newAct;
00221 }
00222 
00223 
00224 
00225 std::vector<experience> FuelRooms::getSeedings() {
00226 
00227   // return seedings
00228   std::vector<experience> seeds;
00229 
00230   // how many copies of each?
00231   for (int i = 0; i < 1; i++){
00232 
00233     // single seed of terminal state
00234     seeds.push_back(getExp(11,24,rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));
00235     
00236     // one seed from each fuel row
00237     seeds.push_back(getExp(0, rng.uniformDiscrete(1,29),rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));  
00238     seeds.push_back(getExp(20,rng.uniformDiscrete(1,29),rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));
00239 
00240     // seed of terminal
00241     seeds.push_back(getExp(rng.uniformDiscrete(1,19),rng.uniformDiscrete(1,22),0,rng.uniformDiscrete(0,7)));
00242   }
00243 
00244     /*
00245   // two seeds around the goal state
00246   seeds.push_back(getExp(10,24,4,NORTH));
00247   seeds.push_back(getExp(11,25,42,EAST));
00248 
00249   // one of death
00250   seeds.push_back(getExp(9,15,0,SOUTHEAST));
00251   */
00252 
00253   // lots of seeds of various shit
00254   /*
00255     for (int i = 0; i < 3; i++){
00256 
00257     // each wall
00258     //seeds.push_back(getExp(0,11,10,SOUTH));
00259     //seeds.push_back(getExp(0,27,14,SOUTH));
00260     //seeds.push_back(getExp(0,17,10,NORTH));
00261     
00262     //seeds.push_back(getExp(20,12,20,SOUTH));
00263     //seeds.push_back(getExp(20,28,24,NORTH));
00264     //seeds.push_back(getExp(20,18,20,NORTH));
00265     
00266     //seeds.push_back(getExp(10,30,30,EAST));
00267     //seeds.push_back(getExp(12,30,34,EAST));
00268     //seeds.push_back(getExp(10,30,30,WEST));
00269     
00270     //seeds.push_back(getExp(13,0,2,WEST));
00271     //seeds.push_back(getExp(17,0,4,WEST));
00272     //seeds.push_back(getExp(13,0,2,EAST));
00273     
00274     // experiences showing where the goal state is (11,24)
00275     seeds.push_back(getExp(10,24,20,NORTH));
00276     seeds.push_back(getExp(12,24,44,SOUTH));
00277     seeds.push_back(getExp(11,23,52,EAST));
00278     seeds.push_back(getExp(11,25,7,WEST));
00279     seeds.push_back(getExp(10,23,11,NORTHEAST));
00280     seeds.push_back(getExp(10,25,16,NORTHWEST));
00281     seeds.push_back(getExp(12,23,21,SOUTHEAST));
00282     seeds.push_back(getExp(12,25,36,SOUTHWEST));
00283 
00284     // near the goal state
00285     seeds.push_back(getExp(11,23,45,NORTH));
00286     seeds.push_back(getExp(10,24,31,NORTHEAST));
00287     seeds.push_back(getExp(11,25,11,SOUTH));
00288     seeds.push_back(getExp(12,24,18,WEST));
00289     seeds.push_back(getExp(10,23,18,SOUTHWEST));
00290 
00291     // a few normal
00292     seeds.push_back(getExp(17,14,52,SOUTH));
00293     seeds.push_back(getExp(1,6,43,EAST));
00294     seeds.push_back(getExp(9,18,24,NORTH));
00295     seeds.push_back(getExp(12,8,3,WEST));
00296     seeds.push_back(getExp(7,1,42,SOUTHEAST));
00297     seeds.push_back(getExp(6,9,7,NORTHEAST));
00298     seeds.push_back(getExp(19,28,28,NORTHWEST));
00299     seeds.push_back(getExp(2,18,33,SOUTHWEST));
00300 
00301     // actions do different things from one state!
00302     seeds.push_back(getExp(9,3,19,SOUTHEAST));
00303     seeds.push_back(getExp(9,3,19,EAST));
00304     seeds.push_back(getExp(9,3,19,NORTHWEST));
00305     seeds.push_back(getExp(9,3,19,WEST));
00306     seeds.push_back(getExp(9,3,19,NORTH));
00307     seeds.push_back(getExp(9,3,19,SOUTHWEST));
00308 
00309     // and fuel running out
00310     seeds.push_back(getExp(7,4,0,NORTH));
00311     seeds.push_back(getExp(19,21,0,SOUTHWEST));
00312     seeds.push_back(getExp(3,16,0,NORTHEAST));
00313     seeds.push_back(getExp(13,1,0,SOUTH));
00314     
00315     // general gas stations 
00316     seeds.push_back(getExp(0,5,12,EAST));
00317     seeds.push_back(getExp(0,23,9,WEST));
00318     seeds.push_back(getExp(0,26,3,NORTHWEST));
00319     seeds.push_back(getExp(20,7,7,SOUTHEAST));
00320     seeds.push_back(getExp(20,18,4,SOUTH));
00321     seeds.push_back(getExp(20,25,4,SOUTHWEST));
00322 
00323     // terminal states
00324     seeds.push_back(getExp(3,24,-1,WEST));
00325     seeds.push_back(getExp(9,14,-1,SOUTH));
00326     seeds.push_back(getExp(7,4,-1,EAST));
00327     seeds.push_back(getExp(14,18,-1,NORTH));
00328     seeds.push_back(getExp(7,23,-1,NORTHWEST));
00329     seeds.push_back(getExp(16,5,-1,SOUTHWEST));
00330     seeds.push_back(getExp(17,14,-1,NORTHEAST));
00331     seeds.push_back(getExp(4,28,-1,SOUTHEAST));
00332 
00333     seeds.push_back(getExp(11,24,12,NORTH));
00334     seeds.push_back(getExp(11,24,22,WEST));
00335     seeds.push_back(getExp(11,24,32,EAST));
00336     seeds.push_back(getExp(11,24,2,SOUTH));
00337     seeds.push_back(getExp(11,24,17,NORTHWEST));
00338     seeds.push_back(getExp(11,24,27,SOUTHWEST));
00339     seeds.push_back(getExp(11,24,37,NORTHEAST));
00340     seeds.push_back(getExp(11,24,7,SOUTHEAST));
00341   }
00342   */
00343 
00344   /*
00345   // a bunch of random seeds
00346   for (int j = 0; j < 1000; j++){
00347     seeds.push_back(getExp(rng.uniformDiscrete(0,20),rng.uniformDiscrete(0,30),rng.uniformDiscrete(-1,60),rng.uniformDiscrete(0,3)));
00348   }
00349   */
00350 
00351   reset();
00352   resetVisits();
00353 
00354   return seeds;
00355 
00356 }
00357 
00358 
00359 experience FuelRooms::getExp(int s0, int s1, int s2, int a){
00360 
00361   experience e;
00362 
00363   e.s.resize(3, 0.0);
00364   e.next.resize(3, 0.0);
00365 
00366   ns = s0;
00367   ew = s1;
00368   energy = s2;
00369   e.act = a;
00370   e.s = sensation();
00371   e.reward = apply(e.act);
00372 
00373   e.terminal = terminal();
00374   e.next = sensation();
00375 
00376   /*
00377   cout << "Seed experience from state (" << e.s[0] << ", "
00378        << e.s[1] << ", " << e.s[2] << ") action: " << e.act
00379        << " to (" << e.next[0] << ", " << e.next[1] << ", " << e.next[2] 
00380        << ") with reward " << e.reward << " and term: " << e.terminal << endl;
00381   */
00382 
00383   return e;
00384 }
00385 
00386 void FuelRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00387                                     std::vector<float> *maxFeat){
00388   
00389   minFeat->resize(s.size(), 0.0);
00390   maxFeat->resize(s.size(), 10.0);
00391 
00392   (*maxFeat)[0] = 20.0;
00393   (*maxFeat)[1] = 30.0;
00394   (*maxFeat)[2] = 60.0;
00395   (*minFeat)[2] = -1.0;
00396 }
00397 
00398 void FuelRooms::getMinMaxReward(float *minR,
00399                                float *maxR){
00400   
00401   *minR = -400.0;
00402   *maxR = 0.0;    
00403   
00404 }


rl_env
Author(s):
autogenerated on Thu Jun 6 2019 22:00:24