00001 #include <rl_env/energyrooms.hh>
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 EnergyRooms::EnergyRooms(Random &rand, bool negReward):
00016 grid(create_default_map()),
00017 goal(coord_t(1.,10.)),
00018 negReward(negReward),
00019 noisy(false),
00020 rng(rand),
00021 doorway(coord_t(2.,4.)),
00022 s(3),
00023 ns(s[0]),
00024 ew(s[1]),
00025 energy(s[2]),
00026 goalOption(false),
00027 fuel(false)
00028 {
00029 reset();
00030
00031 }
00032
00033 EnergyRooms::EnergyRooms(Random &rand, bool stochastic, bool negReward,
00034 bool goalOption):
00035 grid(create_default_map()),
00036 goal(coord_t(1.,10.)),
00037 negReward(negReward),
00038 noisy(stochastic),
00039 rng(rand),
00040 doorway(coord_t(2.,4.)),
00041 s(3),
00042 ns(s[0]),
00043 ew(s[1]),
00044 energy(s[2]),
00045 goalOption(goalOption),
00046 fuel(false)
00047 {
00048 reset();
00049 }
00050
00051 EnergyRooms::EnergyRooms(Random &rand, bool stochastic, bool negReward,
00052 bool goalOption, bool fuel):
00053 grid(create_default_map()),
00054 goal(coord_t(1.,10.)),
00055 negReward(negReward),
00056 noisy(stochastic),
00057 rng(rand),
00058 doorway(coord_t(2.,4.)),
00059 s(3),
00060 ns(s[0]),
00061 ew(s[1]),
00062 energy(s[2]),
00063 goalOption(goalOption),
00064 fuel(fuel)
00065 {
00066 reset();
00067 }
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085 EnergyRooms::~EnergyRooms() { delete grid; }
00086
00087 const std::vector<float> &EnergyRooms::sensation() const {
00088
00089
00090 return s;
00091 }
00092
00093 float EnergyRooms::apply(int action) {
00094
00095
00096
00097
00098 if (!noisy || rng.bernoulli(0.8))
00099 energy--;
00100
00101
00102 if (fuel){
00103 if ((int)ns % 3 == 0 && (int)ew % 3 == 0){
00104 energy += (int)(5 + ns/3 + (11-ew)/3);
00105 }
00106 if (energy > 20.0)
00107 energy = 20.0;
00108 }
00109 else {
00110
00111 if (ns == 7 && ew == 3)
00112 energy = 10;
00113 if (ns == 7 && ew == 7)
00114 energy = 10;
00115 if (ns == 3 && ew == 3)
00116 energy = 10;
00117 if (ns == 7 && ew == 7)
00118 energy = 10;
00119 }
00120
00121
00122 if (energy < 0.0)
00123 energy = 0;
00124
00125
00126 const room_action_t effect =
00127 noisy
00128 ? add_noise(static_cast<room_action_t>(action))
00129 : static_cast<room_action_t>(action);
00130 switch(effect) {
00131 case NORTH:
00132 if (!grid->wall(static_cast<unsigned>(ns),
00133 static_cast<unsigned>(ew),
00134 effect))
00135 {
00136 ++ns;
00137 }
00138 return reward();
00139 case SOUTH:
00140 if (!grid->wall(static_cast<unsigned>(ns),
00141 static_cast<unsigned>(ew),
00142 effect))
00143 {
00144 --ns;
00145 }
00146 return reward();
00147 case EAST:
00148 if (!grid->wall(static_cast<unsigned>(ns),
00149 static_cast<unsigned>(ew),
00150 effect))
00151 {
00152 ++ew;
00153 }
00154 return reward();
00155 case WEST:
00156 if (!grid->wall(static_cast<unsigned>(ns),
00157 static_cast<unsigned>(ew),
00158 effect))
00159 {
00160 --ew;
00161 }
00162 return reward();
00163 }
00164
00165 std::cerr << "Unreachable point reached in EnergyRooms::apply!!!\n";
00166 return 0;
00167 }
00168
00169
00170 float EnergyRooms::reward() {
00171
00172 if (negReward){
00173
00174 if (terminal())
00175 return 0;
00176 else if (energy <= 0.1){
00177 if (fuel)
00178 return -10;
00179 else
00180 return -2;
00181 }
00182
00183 else{
00184
00185 if (fuel){
00186 if ((int)ns % 3 == 0 && (int)ew % 3 == 0)
00187 return -2 + (int)(-ew/5 -((int)ns%4));
00188
00189 else
00190 return -1;
00191 }
00192 else
00193 return -1;
00194 }
00195
00196 }
00197 else{
00198
00199
00200 if (terminal())
00201 return 1;
00202 else if (energy <= 0.1){
00203 if (fuel)
00204 return -10;
00205 else
00206 return -2;
00207 }else
00208 return 0;
00209 }
00210 }
00211
00212
00213 bool EnergyRooms::terminal() const {
00214
00215 return coord_t(ns,ew) == goal;
00216 }
00217
00218
00219 void EnergyRooms::reset() {
00220
00221 ns = rng.uniformDiscrete(6, grid->height()-1);
00222 ew = rng.uniformDiscrete(0, 4);
00223 energy = 10;
00224
00225
00226 if (fuel) {
00227 energy = rng.uniformDiscrete(1, 20);
00228 }
00229
00230
00231
00232
00233
00234
00235 }
00236
00237
00238 std::vector<std::vector<float> > EnergyRooms::getSubgoals(){
00239
00240
00241
00242
00243 std::vector<std::vector<float> > subgoals;
00244
00245
00246 std::vector<float> subgoal(2);
00247
00248
00249 subgoal[0] = 5;
00250 subgoal[1] = 1;
00251 subgoals.push_back(subgoal);
00252
00253
00254 subgoal[0] = 4;
00255 subgoal[1] = 8;
00256 subgoals.push_back(subgoal);
00257
00258
00259 subgoal[0] = 8;
00260 subgoal[1] = 5;
00261 subgoals.push_back(subgoal);
00262
00263
00264 subgoal[0] = 1;
00265 subgoal[1] = 5;
00266 subgoals.push_back(subgoal);
00267
00268 if (goalOption){
00269
00270 subgoal[0] = 1;
00271 subgoal[1] = 10;
00272 subgoals.push_back(subgoal);
00273 }
00274
00275 return subgoals;
00276
00277 }
00278
00279
00280 int EnergyRooms::getNumActions(){
00281 return 4;
00282 }
00283
00284
00285 std::ostream &operator<<(std::ostream &out, const EnergyRooms &rooms) {
00286 out << "Map:\n" << *rooms.grid;
00287
00288
00289 out << "Goal: row " << rooms.goal.first
00290 << ", column " << rooms.goal.second << "\n";
00291
00292
00293 out << "Doorway: row " << rooms.doorway.first
00294 << ", column " << rooms.doorway.second << "\n";
00295
00296 return out;
00297 }
00298
00299 const Gridworld *EnergyRooms::create_default_map() {
00300 int width = 11;
00301 int height = 11;
00302 std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00303 std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00304
00305
00306 for (int j = 0; j < height; j++){
00307
00308 if (j == 1 || j == 8)
00309 continue;
00310 ewv[j][4] = true;
00311 ewv[j][5] = true;
00312 }
00313
00314 nsv[5][0] = true;
00315 nsv[5][1] = true;
00316 nsv[5][7] = true;
00317 nsv[5][8] = true;
00318
00319
00320 for (int i = 0; i < 6; i++){
00321
00322 if (i == 1)
00323 continue;
00324 nsv[i][4] = true;
00325 nsv[i][5] = true;
00326 }
00327
00328 ewv[5][0] = true;
00329 ewv[5][1] = true;
00330
00331
00332 for (int i = 5; i < width; i++){
00333
00334 if (i == 8)
00335 continue;
00336 nsv[i][3] = true;
00337 nsv[i][4] = true;
00338 }
00339
00340 ewv[4][7] = true;
00341 ewv[4][8] = true;
00342
00343 return new Gridworld(height, width, nsv, ewv);
00344 }
00345
00346 EnergyRooms::room_action_t EnergyRooms::add_noise(room_action_t action) {
00347 switch(action) {
00348 case NORTH:
00349 case SOUTH:
00350 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00351 case EAST:
00352 case WEST:
00353 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00354 default:
00355 return action;
00356 }
00357 }
00358
00359
00360 void EnergyRooms::randomize_goal() {
00361 const unsigned n = grid->height() * grid->width();
00362 unsigned index = rng.uniformDiscrete(1,n) - 1;
00363 goal = coord_t(index / grid->width(), index % grid->width());
00364 }
00365
00366
00367 void EnergyRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00368 std::vector<float> *maxFeat){
00369
00370 minFeat->resize(s.size(), 0.0);
00371 maxFeat->resize(s.size(), 10.0);
00372
00373 (*maxFeat)[2] = 20.0;
00374
00375 }
00376
00377 void EnergyRooms::getMinMaxReward(float *minR,
00378 float *maxR){
00379
00380 *minR = -10.0;
00381 *maxR = 1.0;
00382
00383 }