00001 #include <rl_env/fourrooms.hh>
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 FourRooms::FourRooms(Random &rand):
00017 grid(create_default_map()),
00018 goal(coord_t(1.,10.)),
00019 negReward(true),
00020 noisy(false),
00021 extraReward(false),
00022 rewardSensor(false),
00023 rng(rand),
00024 doorway(coord_t(2.,4.)),
00025 s(2),
00026 unused(6),
00027 ns(s[0]),
00028 ew(s[1]),
00029 distN(unused[0]),
00030 distS(unused[1]),
00031 distE(unused[2]),
00032 distW(unused[3]),
00033 rewardEW(unused[4]),
00034 rewardNS(unused[5]),
00035 goalOption(false)
00036 {
00037 reset();
00038
00039 }
00040
00041
00042 FourRooms::FourRooms(Random &rand, bool stochastic, bool negReward,
00043 bool exReward):
00044 grid(create_default_map()),
00045 goal(coord_t(1.,10.)),
00046 negReward(negReward),
00047 noisy(stochastic),
00048 extraReward(exReward),
00049 rewardSensor(false),
00050 rng(rand),
00051 doorway(coord_t(2.,4.)),
00052 s(2),
00053 unused(6),
00054 ns(s[0]),
00055 ew(s[1]),
00056 distN(unused[0]),
00057 distS(unused[1]),
00058 distE(unused[2]),
00059 distW(unused[3]),
00060 rewardEW(unused[4]),
00061 rewardNS(unused[5]),
00062 goalOption(goalOption)
00063 {
00064 reset();
00065 }
00066
00067
00068 FourRooms::FourRooms(Random &rand, bool stochastic, bool negReward):
00069 grid(create_default_map()),
00070 goal(coord_t(1.,10.)),
00071 negReward(negReward),
00072 noisy(stochastic),
00073 extraReward(true),
00074 rewardSensor(false),
00075 rng(rand),
00076 doorway(coord_t(2.,4.)),
00077 s(6),
00078 unused(2),
00079 ns(s[0]),
00080 ew(s[1]),
00081 distN(s[2]),
00082 distS(s[3]),
00083 distE(s[4]),
00084 distW(s[5]),
00085 rewardEW(unused[0]),
00086 rewardNS(unused[1]),
00087 goalOption(false)
00088 {
00089 reset();
00090 }
00091
00092
00093
00094
00095 FourRooms::FourRooms(Random &rand, bool stochastic):
00096 grid(create_default_map()),
00097 goal(coord_t(1.,10.)),
00098 negReward(true),
00099 noisy(stochastic),
00100 extraReward(false),
00101 rewardSensor(false),
00102 rng(rand),
00103 doorway(coord_t(2.,4.)),
00104 s(8),
00105 unused(0),
00106 ns(s[0]),
00107 ew(s[1]),
00108 distN(s[2]),
00109 distS(s[3]),
00110 distE(s[4]),
00111 distW(s[5]),
00112 rewardEW(s[6]),
00113 rewardNS(s[7]),
00114 goalOption(false)
00115 {
00116
00117 reset();
00118 }
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137 FourRooms::~FourRooms() { delete grid; }
00138
00139 const std::vector<float> &FourRooms::sensation() const {
00140
00141
00142 return s;
00143 }
00144
00145 float FourRooms::apply(int action) {
00146
00147
00148
00149 const room_action_t effect =
00150 noisy
00151 ? add_noise(static_cast<room_action_t>(action))
00152 : static_cast<room_action_t>(action);
00153 switch(effect) {
00154 case NORTH:
00155 if (!grid->wall(static_cast<unsigned>(ns),
00156 static_cast<unsigned>(ew),
00157 effect))
00158 {
00159 ++ns;
00160 calcWallDistances();
00161 }
00162 return reward(effect);
00163 case SOUTH:
00164 if (!grid->wall(static_cast<unsigned>(ns),
00165 static_cast<unsigned>(ew),
00166 effect))
00167 {
00168 --ns;
00169 calcWallDistances();
00170 }
00171 return reward(effect);
00172 case EAST:
00173 if (!grid->wall(static_cast<unsigned>(ns),
00174 static_cast<unsigned>(ew),
00175 effect))
00176 {
00177 ++ew;
00178 calcWallDistances();
00179 }
00180 return reward(effect);
00181 case WEST:
00182 if (!grid->wall(static_cast<unsigned>(ns),
00183 static_cast<unsigned>(ew),
00184 effect))
00185 {
00186 --ew;
00187 calcWallDistances();
00188 }
00189 return reward(effect);
00190 }
00191 std::cerr << "Unreachable point reached in FourRooms::apply!!!\n";
00192 return 0;
00193 }
00194
00195
00196 float FourRooms::reward(int effect) {
00197
00198 if (extraReward){
00199
00200 if (terminal())
00201 return 0;
00202
00203
00204
00205 if (ew < 6 && ns == 8 && effect == EAST){
00206 return -1;
00207 }
00208
00209
00210 if (ew == 1 && ns > 4 && effect == SOUTH){
00211 return -1;
00212 }
00213
00214
00215 if (ew == 8 && ns > 3 && effect == SOUTH){
00216 return -1;
00217 }
00218
00219
00220 if (ew < 6 && ns == 1 && effect == EAST){
00221 return -1;
00222 }
00223
00224
00225 if (ns == 1 && effect == EAST){
00226 return -1;
00227 }
00228 if (ew == 10 && ns < 4 && effect == SOUTH){
00229 return -1;
00230 }
00231
00232
00233 return -2;
00234
00235 }
00236
00237
00238 if (negReward){
00239
00240 if (terminal())
00241 return 0;
00242 else
00243 return -1;
00244
00245 }else{
00246
00247
00248 if (terminal())
00249 return 1;
00250 else
00251 return 0;
00252 }
00253 }
00254
00255
00256 bool FourRooms::terminal() const {
00257
00258 return coord_t(ns,ew) == goal;
00259 }
00260
00261
00262 void FourRooms::calcWallDistances(){
00263
00264
00265
00266 if (ns != 1 && ns != 8){
00267
00268 if (ew < 5){
00269 distW = ew;
00270 distE = 4 - ew;
00271 }
00272
00273 else {
00274 distW = ew - 6;
00275 distE = 10 - ew;
00276 }
00277 }
00278
00279 else {
00280 distW = ew;
00281 distE = 10 - ew;
00282 }
00283
00284
00285 if (ns == 5 && ew == 1){
00286 distW = 0;
00287 distE = 0;
00288 }
00289 if (ns == 4 && ew == 8){
00290 distW = 0;
00291 distE = 0;
00292 }
00293
00294
00295
00296 if (ew < 5){
00297
00298 if (ew != 1){
00299
00300 if (ns > 5){
00301 distN = 10 - ns;
00302 distS = ns - 6;
00303 }
00304
00305 else {
00306 distN = 4 - ns;
00307 distS = ns;
00308 }
00309 }
00310
00311 else {
00312 distN = 10 - ns;
00313 distS = ns;
00314 }
00315 }
00316
00317 else {
00318
00319 if (ew != 8){
00320
00321 if (ns > 4){
00322 distN = 10-ns;
00323 distS = ns - 5;
00324 }
00325
00326 else {
00327 distN = 3 - ns;
00328 distS = ns;
00329 }
00330 }
00331
00332 else {
00333 distN = 10-ns;
00334 distS = ns;
00335 }
00336 }
00337
00338
00339 if (ew == 5 && (ns == 1 || ns == 8)){
00340 distN = 0;
00341 distS = 0;
00342 }
00343
00344
00345
00346
00347 if (ns == 1){
00348 rewardEW = 10 - ew;
00349 }
00350 else {
00351 rewardEW = 100;
00352 }
00353
00354
00355 if (ew == 10 && ns < 4){
00356 rewardNS = 1 - ns;
00357 }
00358 else {
00359 rewardNS = 100;
00360 }
00361
00362
00363
00364
00365
00366
00367
00368 }
00369
00370
00371 void FourRooms::reset() {
00372
00373 ns = rng.uniformDiscrete(6, grid->height()-1);
00374 ew = rng.uniformDiscrete(0, 4);
00375
00376
00377
00378
00379
00380
00381
00382 calcWallDistances();
00383 }
00384
00385
00386 std::vector<std::vector<float> > FourRooms::getSubgoals(){
00387
00388
00389
00390
00391 std::vector<std::vector<float> > subgoals;
00392
00393
00394 std::vector<float> subgoal(2);
00395
00396
00397 subgoal[0] = 5;
00398 subgoal[1] = 1;
00399 subgoals.push_back(subgoal);
00400
00401
00402 subgoal[0] = 4;
00403 subgoal[1] = 8;
00404 subgoals.push_back(subgoal);
00405
00406
00407 subgoal[0] = 8;
00408 subgoal[1] = 5;
00409 subgoals.push_back(subgoal);
00410
00411
00412 subgoal[0] = 1;
00413 subgoal[1] = 5;
00414 subgoals.push_back(subgoal);
00415
00416 if (goalOption){
00417
00418 subgoal[0] = 1;
00419 subgoal[1] = 10;
00420 subgoals.push_back(subgoal);
00421 }
00422
00423 return subgoals;
00424
00425 }
00426
00427
00428 int FourRooms::getNumActions(){
00429 return 4;
00430 }
00431
00432
00433 std::ostream &operator<<(std::ostream &out, const FourRooms &rooms) {
00434 out << "Map:\n" << *rooms.grid;
00435
00436
00437 out << "Goal: row " << rooms.goal.first
00438 << ", column " << rooms.goal.second << "\n";
00439
00440
00441 out << "Doorway: row " << rooms.doorway.first
00442 << ", column " << rooms.doorway.second << "\n";
00443
00444 return out;
00445 }
00446
00447 const Gridworld *FourRooms::create_default_map() {
00448 int width = 11;
00449 int height = 11;
00450 std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00451 std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00452
00453
00454 for (int j = 0; j < height; j++){
00455
00456 if (j == 1 || j == 8)
00457 continue;
00458 ewv[j][4] = true;
00459 ewv[j][5] = true;
00460 }
00461
00462 nsv[5][0] = true;
00463 nsv[5][1] = true;
00464 nsv[5][7] = true;
00465 nsv[5][8] = true;
00466
00467
00468 for (int i = 0; i < 6; i++){
00469
00470 if (i == 1)
00471 continue;
00472 nsv[i][4] = true;
00473 nsv[i][5] = true;
00474 }
00475
00476 ewv[5][0] = true;
00477 ewv[5][1] = true;
00478
00479
00480 for (int i = 5; i < width; i++){
00481
00482 if (i == 8)
00483 continue;
00484 nsv[i][3] = true;
00485 nsv[i][4] = true;
00486 }
00487
00488 ewv[4][7] = true;
00489 ewv[4][8] = true;
00490
00491 return new Gridworld(height, width, nsv, ewv);
00492 }
00493
00494 FourRooms::room_action_t FourRooms::add_noise(room_action_t action) {
00495 switch(action) {
00496 case NORTH:
00497 case SOUTH:
00498 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00499 case EAST:
00500 case WEST:
00501 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00502 default:
00503 return action;
00504 }
00505 }
00506
00507
00508 void FourRooms::randomize_goal() {
00509 const unsigned n = grid->height() * grid->width();
00510 unsigned index = rng.uniformDiscrete(1,n) - 1;
00511 goal = coord_t(index / grid->width(), index % grid->width());
00512 }
00513
00514
00516 void FourRooms::setSensation(std::vector<float> newS){
00517 if (s.size() != newS.size()){
00518 cerr << "Error in sensation sizes" << endl;
00519 }
00520
00521 for (unsigned i = 0; i < newS.size(); i++){
00522 s[i] = (int)newS[i];
00523 }
00524 }
00525
00526
00527 void FourRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00528 std::vector<float> *maxFeat){
00529
00530 minFeat->resize(s.size(), 0.0);
00531 maxFeat->resize(s.size(), 10.0);
00532
00533 if (s.size() > 2) {
00534 for (unsigned i = 2; i < s.size(); i++){
00535 (*minFeat)[i] = -10.0;
00536 }
00537 }
00538
00539 }
00540
00541 void FourRooms::getMinMaxReward(float *minR,
00542 float *maxR){
00543
00544 if (extraReward){
00545 *minR = -2.0;
00546 *maxR = 0.0;
00547 }
00548 else if (negReward){
00549 *minR = -1.0;
00550 *maxR = 0.0;
00551 }else{
00552 *minR = 0.0;
00553 *maxR = 1.0;
00554 }
00555
00556 }