00001
00008 #include <rl_env/FuelRooms.hh>
00009
00010
00011 FuelRooms::FuelRooms(Random &rand, bool extraVariation, bool stoch):
00012 height(20), width(30),
00013 goal(coord_t(11.0,24.0)),
00014 extraVar(extraVariation),
00015 noisy(stoch),
00016 rng(rand),
00017 s(3),
00018 ns(s[0]),
00019 ew(s[1]),
00020 energy(s[2])
00021 {
00022
00023 fuelVisited = 0;
00024 totalVisited = 0;
00025
00026 stateVisits = new int*[21];
00027 for (int i = 0; i < 21; i++){
00028 stateVisits[i] = new int[31];
00029 }
00030
00031 reset();
00032 resetVisits();
00033 }
00034
00035
00036
00037 FuelRooms::~FuelRooms() {
00038 for (int i = 0; i < 21; i++){
00039 delete [] stateVisits[i];
00040 }
00041 delete [] stateVisits;
00042 }
00043
00044 const std::vector<float> &FuelRooms::sensation() const {
00045 return s;
00046 }
00047
00048 float FuelRooms::apply(int action) {
00049
00050 checkVisits();
00051
00052 if (terminal())
00053 return 0.0;
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 energy--;
00065
00066
00067 if ((int)ns == 0 || (int)ns == height){
00068 energy += 20.0;
00069 }
00070
00071
00072 if (energy > 60.0)
00073 energy = 60.0;
00074
00075 const room_action_t effect =
00076 noisy
00077 ? add_noise(static_cast<room_action_t>(action))
00078 : static_cast<room_action_t>(action);
00079
00080 float r = reward(effect);
00081
00082 if (effect == NORTH || effect == NORTHWEST || effect == NORTHEAST)
00083 if (ns < height)
00084 ns++;
00085
00086 if (effect == SOUTH || effect == SOUTHWEST || effect == SOUTHEAST)
00087 if (ns > 0)
00088 ns--;
00089
00090 if (effect == EAST || effect == SOUTHEAST || effect == NORTHEAST)
00091 if (ew < width)
00092 ew++;
00093
00094 if (effect == WEST || effect == SOUTHWEST || effect == NORTHWEST)
00095 if (ew > 0)
00096 ew--;
00097
00098 return r;
00099
00100 std::cerr << "Unreachable point reached in FuelRooms::apply!!!\n";
00101 return 0;
00102 }
00103
00104
00105
00106 float FuelRooms::reward(int effect) {
00107
00108 if (energy < 0.0){
00109 return -400.0;
00110 }
00111
00112 if (terminal()){
00113
00114 return 0.0;
00115 }
00116
00117
00118
00119
00120
00121
00122
00123
00124 if (ns == 0 || ns == height){
00125 float base = -10.0;
00126 if (ns == 0)
00127 base = -13.0;
00128
00129
00130 float var = 1.0;
00131 if (extraVar)
00132 var = 5.0;
00133 else
00134 base -= 8.0;
00135
00136 return base - (((int)ew % 5) * var);
00137
00138 }
00139
00140 if (effect == NORTH || effect == SOUTH || effect == EAST || effect == WEST)
00141 return -1.0;
00142 else
00143 return -1.4;
00144
00145 }
00146
00147
00148 bool FuelRooms::terminal() const {
00149
00150
00151 return (coord_t(ns,ew) == goal) || (energy < 0.0);
00152 }
00153
00154
00155 void FuelRooms::reset() {
00156
00157 ns = rng.uniformDiscrete(7, 12);
00158 ew = rng.uniformDiscrete(0, 4);
00159
00160
00161
00162
00163 energy = rng.uniformDiscrete(14, 18);
00164
00165 }
00166
00167
00168 void FuelRooms::resetVisits(){
00169 fuelVisited = 0;
00170 totalVisited = 0;
00171
00172 for (int i = 0; i < 21; i++)
00173 for (int j = 0; j < 31; j++)
00174 stateVisits[i][j] = 0;
00175 }
00176
00177 void FuelRooms::checkVisits(){
00178 stateVisits[(int)ns][(int)ew]++;
00179
00180 if (stateVisits[(int)ns][(int)ew] == 1){
00181 totalVisited++;
00182 if (ns == 0 || ns == 20)
00183 fuelVisited++;
00184 }
00185 }
00186
00187 void FuelRooms::printVisits(){
00188 float totalStates = 31.0 * 21.0;
00189 float fuelStates = 31.0 * 2.0;
00190 float otherStates = totalStates - fuelStates;
00191 cout << (fuelVisited/fuelStates) << endl << ((totalVisited-fuelVisited)/otherStates) << endl << (totalVisited/totalStates) << endl;
00192 }
00193
00194 void FuelRooms::printVisitMap(string filename){
00195 ofstream fout(filename.c_str());
00196 for (int i = 0; i < 21; i++){
00197 for (int j = 0; j < 31; j++){
00198 fout << stateVisits[i][j] << "\t";
00199 }
00200 fout << endl;
00201 }
00202 fout.close();
00203 }
00204
00205
00206 int FuelRooms::getNumActions(){
00207 return 8;
00208 }
00209
00210
00211 FuelRooms::room_action_t FuelRooms::add_noise(room_action_t action) {
00212
00213 int newAct = rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? action+1 : action-1);
00214
00215 if (newAct < 0)
00216 newAct = getNumActions()-1;
00217 if (newAct >= getNumActions())
00218 newAct = 0;
00219
00220 return (room_action_t)newAct;
00221 }
00222
00223
00224
00225 std::vector<experience> FuelRooms::getSeedings() {
00226
00227
00228 std::vector<experience> seeds;
00229
00230
00231 for (int i = 0; i < 1; i++){
00232
00233
00234 seeds.push_back(getExp(11,24,rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));
00235
00236
00237 seeds.push_back(getExp(0, rng.uniformDiscrete(1,29),rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));
00238 seeds.push_back(getExp(20,rng.uniformDiscrete(1,29),rng.uniformDiscrete(2,40),rng.uniformDiscrete(0,7)));
00239
00240
00241 seeds.push_back(getExp(rng.uniformDiscrete(1,19),rng.uniformDiscrete(1,22),0,rng.uniformDiscrete(0,7)));
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351 reset();
00352 resetVisits();
00353
00354 return seeds;
00355
00356 }
00357
00358
00359 experience FuelRooms::getExp(int s0, int s1, int s2, int a){
00360
00361 experience e;
00362
00363 e.s.resize(3, 0.0);
00364 e.next.resize(3, 0.0);
00365
00366 ns = s0;
00367 ew = s1;
00368 energy = s2;
00369 e.act = a;
00370 e.s = sensation();
00371 e.reward = apply(e.act);
00372
00373 e.terminal = terminal();
00374 e.next = sensation();
00375
00376
00377
00378
00379
00380
00381
00382
00383 return e;
00384 }
00385
00386 void FuelRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00387 std::vector<float> *maxFeat){
00388
00389 minFeat->resize(s.size(), 0.0);
00390 maxFeat->resize(s.size(), 10.0);
00391
00392 (*maxFeat)[0] = 20.0;
00393 (*maxFeat)[1] = 30.0;
00394 (*maxFeat)[2] = 60.0;
00395 (*minFeat)[2] = -1.0;
00396 }
00397
00398 void FuelRooms::getMinMaxReward(float *minR,
00399 float *maxR){
00400
00401 *minR = -400.0;
00402 *maxR = 0.0;
00403
00404 }