Go to the documentation of this file.00001
00007 #include <rl_env/LightWorld.hh>
00008
00009
00010 LightWorld::LightWorld(Random &rand, bool stochastic, int nrooms):
00011 noisy(stochastic),
00012 nrooms(nrooms),
00013 rng(rand),
00014 s(17),
00015 ns(s[0]),
00016 ew(s[1]),
00017 have_key(s[2]),
00018 door_open(s[3]),
00019 room_id(s[4]),
00020 key_n(s[5]),
00021 key_e(s[6]),
00022 key_w(s[7]),
00023 key_s(s[8]),
00024 lock_n(s[9]),
00025 lock_e(s[10]),
00026 lock_w(s[11]),
00027 lock_s(s[12]),
00028 door_n(s[13]),
00029 door_e(s[14]),
00030 door_w(s[15]),
00031 door_s(s[16])
00032 {
00033
00034 LWDEBUG = false;
00035 MAX_SENSE = 10;
00036
00037
00038 totalVisited = 0;
00039 keyVisited = 0;
00040 lockVisited = 0;
00041 doorVisited = 0;
00042 haveKey = 0;
00043 doorOpen = 0;
00044 leaveRoom = 0;
00045 pressKey = 0;
00046 pressLockCorrect = 0;
00047 pressLockIncorrect = 0;
00048 pressDoor = 0;
00049 pressOther = 0;
00050 pickupKeyCorrect = 0;
00051 pickupKeyIncorrect = 0;
00052 pickupLock = 0;
00053 pickupDoor = 0;
00054 pickupOther = 0;
00055
00056 reset();
00057 }
00058
00059
00060
00061 LightWorld::~LightWorld() { }
00062
00063
00064 const std::vector<float> &LightWorld::sensation() const {
00065 if (LWDEBUG) print_map();
00066 return s;
00067 }
00068
00069
00070 int LightWorld::applyNoise(int action){
00071 switch(action) {
00072 case NORTH:
00073 case SOUTH:
00074 return rng.bernoulli(0.9) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00075 case EAST:
00076 case WEST:
00077 return rng.bernoulli(0.9) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00078 case PRESS:
00079 case PICKUP:
00080 return rng.bernoulli(0.9) ? action : -1;
00081 default:
00082 return action;
00083 }
00084 }
00085
00086 float LightWorld::apply(int origAction) {
00087
00088 int reward = 0;
00089
00090 int action = origAction;
00091 if (noisy)
00092 action = applyNoise(origAction);
00093
00094 if (action == NORTH){
00095 if (((ns < rooms[room_id].height-2) || (rooms[room_id].lock_ns == rooms[room_id].height-1 && ew == rooms[room_id].lock_ew) || (rooms[room_id].door_ns == rooms[room_id].height-1 && ew == rooms[room_id].door_ew)) && ew > 0 && ew < rooms[room_id].width-1 && ns < rooms[room_id].height-1) {
00096 ns++;
00097 } else if (door_open && ns == rooms[room_id].door_ns && ew == rooms[room_id].door_ew && rooms[room_id].door_ns == rooms[room_id].height-1){
00098 leaveRoom++;
00099 room_id++;
00100 if (room_id >= nrooms) room_id = 0;
00101 have_key = false;
00102 door_open = false;
00103 if (rooms[room_id].key_ns < 0) have_key = true;
00104 ns = 0;
00105 resetKey();
00106 reward+=10;
00107 }
00108 }
00109 if (action == EAST){
00110 if (((ew < rooms[room_id].width-2) || (rooms[room_id].lock_ew == rooms[room_id].width-1 && ns == rooms[room_id].lock_ns) || (rooms[room_id].door_ew == rooms[room_id].width-1 && ns == rooms[room_id].door_ns)) && ns > 0 && ns < rooms[room_id].height-1 && ew < rooms[room_id].width-1) {
00111 ew++;
00112 } else if (door_open && ew == rooms[room_id].door_ew && ns == rooms[room_id].door_ns && rooms[room_id].door_ew == rooms[room_id].width-1){
00113 leaveRoom++;
00114 room_id++;
00115 if (room_id >= nrooms) room_id = 0;
00116 have_key = false;
00117 door_open = false;
00118 if (rooms[room_id].key_ns < 0) have_key = true;
00119 ew = 0;
00120 if (room_id == 1) ns = 3;
00121 resetKey();
00122 reward+=10;
00123 }
00124 }
00125 if (action == SOUTH){
00126 if (((ns > 1) || (rooms[room_id].lock_ns == 0 && ew == rooms[room_id].lock_ew) || (rooms[room_id].door_ns == 0 && ew == rooms[room_id].door_ew)) && ew > 0 && ew < rooms[room_id].width-1 && ns > 0) {
00127 ns--;
00128 } else if (door_open && ns == rooms[room_id].door_ns && ew == rooms[room_id].door_ew && rooms[room_id].door_ns == 0){
00129 leaveRoom++;
00130 room_id++;
00131 if (room_id >= nrooms) room_id = 0;
00132 have_key = false;
00133 door_open = false;
00134 if (rooms[room_id].key_ns < 0) have_key = true;
00135 ns = rooms[room_id].height-1;
00136 resetKey();
00137 reward+=10;
00138 }
00139 }
00140 if (action == WEST){
00141 if (((ew > 1) || (rooms[room_id].lock_ew == 0 && ns == rooms[room_id].lock_ns) || (rooms[room_id].door_ew == 0 && ns == rooms[room_id].door_ns)) && ns > 0 && ns < rooms[room_id].height-1 && ew > 0) {
00142 ew--;
00143 } else if (door_open && ew == rooms[room_id].door_ew && ns == rooms[room_id].door_ns && rooms[room_id].door_ew == 0){
00144 leaveRoom++;
00145 room_id++;
00146 if (room_id >= nrooms) room_id = 0;
00147 have_key = false;
00148 door_open = false;
00149 if (rooms[room_id].key_ns < 0) have_key = true;
00150 ew = rooms[room_id].width-1;
00151 resetKey();
00152 reward+=10;
00153 }
00154 }
00155
00156
00157 if (action == PICKUP){
00158 if (ns == rooms[room_id].key_ns && ew == rooms[room_id].key_ew){
00159 if (!have_key) pickupKeyCorrect++;
00160 else pickupKeyIncorrect++;
00161 have_key = true;
00162 }
00163 else if (ns == rooms[room_id].lock_ns && ew == rooms[room_id].lock_ew)
00164 pickupLock++;
00165 else if (ns == rooms[room_id].door_ns && ew == rooms[room_id].door_ew)
00166 pickupDoor++;
00167 else
00168 pickupOther++;
00169 }
00170
00171 if (action == PRESS){
00172 if (ns == rooms[room_id].lock_ns && ew == rooms[room_id].lock_ew){
00173 if (have_key) {
00174 if (!door_open) pressLockCorrect++;
00175 else pressLockIncorrect++;
00176 door_open = true;
00177 } else {
00178 pressLockIncorrect++;
00179 }
00180 }
00181 else if (ns == rooms[room_id].key_ns && ew == rooms[room_id].key_ew)
00182 pressKey++;
00183 else if (ns == rooms[room_id].door_ns && ew == rooms[room_id].door_ew)
00184 pressDoor++;
00185 else
00186 pressOther++;
00187 }
00188
00189 updateSensors();
00190
00191 return reward;
00192
00193 }
00194
00195 void LightWorld::updateSensors() {
00196
00197
00198 key_n = 0;
00199 key_e = 0;
00200 key_w = 0;
00201 key_s = 0;
00202 lock_n = 0;
00203 lock_e = 0;
00204 lock_w = 0;
00205 lock_s = 0;
00206 door_n = 0;
00207 door_e = 0;
00208 door_w = 0;
00209 door_s = 0;
00210
00211 if (!have_key){
00212 if (rooms[room_id].key_ns <= ns){
00213 key_s = MAX_SENSE - (ns - rooms[room_id].key_ns);
00214 }
00215 if (rooms[room_id].key_ns >= ns){
00216 key_n = MAX_SENSE - (rooms[room_id].key_ns - ns);
00217 }
00218 if (rooms[room_id].key_ew <= ew){
00219 key_w = MAX_SENSE - (ew - rooms[room_id].key_ew);
00220 }
00221 if (rooms[room_id].key_ew >= ew){
00222 key_e = MAX_SENSE - (rooms[room_id].key_ew - ew);
00223 }
00224 }
00225
00226 if (door_open){
00227 if (rooms[room_id].door_ns <= ns){
00228 door_s = MAX_SENSE - (ns - rooms[room_id].door_ns);
00229 }
00230 if (rooms[room_id].door_ns >= ns){
00231 door_n = MAX_SENSE - (rooms[room_id].door_ns - ns);
00232 }
00233 if (rooms[room_id].door_ew <= ew){
00234 door_w = MAX_SENSE - (ew - rooms[room_id].door_ew);
00235 }
00236 if (rooms[room_id].door_ew >= ew){
00237 door_e = MAX_SENSE - (rooms[room_id].door_ew - ew);
00238 }
00239 }
00240
00241 if (rooms[room_id].lock_ns <= ns){
00242 lock_s = MAX_SENSE - (ns - rooms[room_id].lock_ns);
00243 }
00244 if (rooms[room_id].lock_ns >= ns){
00245 lock_n = MAX_SENSE - (rooms[room_id].lock_ns - ns);
00246 }
00247 if (rooms[room_id].lock_ew <= ew){
00248 lock_w = MAX_SENSE - (ew - rooms[room_id].lock_ew);
00249 }
00250 if (rooms[room_id].lock_ew >= ew){
00251 lock_e = MAX_SENSE - (rooms[room_id].lock_ew - ew);
00252 }
00253 }
00254
00255
00256 void LightWorld::resetKey() {
00257 if (!have_key && rooms[room_id].key_ns > -1){
00258 rooms[room_id].key_ns = rng.uniformDiscrete(1, rooms[room_id].height-2);
00259 rooms[room_id].key_ew = rng.uniformDiscrete(1, rooms[room_id].width-2);
00260 }
00261 }
00262
00263 void LightWorld::setKey(std::vector<float> testS){
00264 if (!have_key){
00265 float nsDist = 0;
00266 if (testS[5] > 0)
00267 nsDist = MAX_SENSE - testS[5];
00268 else
00269 nsDist = -MAX_SENSE + testS[8];
00270 rooms[room_id].key_ns = testS[0] + nsDist;
00271 float ewDist = 0;
00272 if (testS[6] > 0)
00273 ewDist = MAX_SENSE - testS[6];
00274 else
00275 ewDist = -MAX_SENSE + testS[7];
00276 rooms[room_id].key_ew = testS[1] + ewDist;
00277 }
00278 }
00279
00280
00281
00282 bool LightWorld::terminal() const {
00283
00284
00285 return false;
00286 }
00287
00288 void LightWorld::reset() {
00289
00290
00291 rooms.resize(nrooms);
00292
00293 rooms[0].height = 8;
00294 rooms[0].width = 7;
00295 rooms[0].key_ns = 3;
00296 rooms[0].key_ew = 1;
00297 rooms[0].lock_ns = 2;
00298 rooms[0].lock_ew = 6;
00299 rooms[0].door_ns = 5;
00300 rooms[0].door_ew = 6;
00301
00302 rooms[1].height = 6;
00303 rooms[1].width = 5;
00304 rooms[1].key_ns = 3;
00305 rooms[1].key_ew = 2;
00306 rooms[1].lock_ns = 0;
00307 rooms[1].lock_ew = 3;
00308 rooms[1].door_ns = 0;
00309 rooms[1].door_ew = 1;
00310
00311 rooms[2].height = 8;
00312 rooms[2].width = 5;
00313 rooms[2].key_ns = -1;
00314 rooms[2].key_ew = -1;
00315 rooms[2].lock_ns = 3;
00316 rooms[2].lock_ew = 4;
00317 rooms[2].door_ns = 3;
00318 rooms[2].door_ew = 0;
00319
00320 rooms[3].height = 6;
00321 rooms[3].width = 7;
00322 rooms[3].key_ns = 1;
00323 rooms[3].key_ew = 1;
00324 rooms[3].lock_ns = 0;
00325 rooms[3].lock_ew = 4;
00326 rooms[3].door_ns = 5;
00327 rooms[3].door_ew = 2;
00328
00329 for (int i = 4; i < nrooms; i++){
00330 rooms[i].height = i+3;
00331 rooms[i].width = i+2;
00332 rooms[i].key_ns = i;
00333 rooms[i].key_ew = i-1;
00334 rooms[i].lock_ns = 0;
00335 rooms[i].lock_ew = i-2;
00336 rooms[i].door_ns = 0;
00337 rooms[i].door_ew = i;
00338 }
00339
00340
00341 room_id = 0;
00342 ns = rng.uniformDiscrete(1, rooms[0].height-2);
00343 ew = rng.uniformDiscrete(1, rooms[0].width-2);
00344 have_key = false;
00345 door_open = false;
00346 resetKey();
00347 updateSensors();
00348
00349 if (LWDEBUG) print_map();
00350
00351 }
00352
00353
00354 int LightWorld::getNumActions() {
00355 if (LWDEBUG) cout << "Return number of actions: " << NUM_ACTIONS << endl;
00356 return NUM_ACTIONS;
00357 }
00358
00359
00360 void LightWorld::print_map() const{
00361
00362
00363 cout << "\nLightWorld, Room " << room_id << endl;
00364
00365
00366 for (int j = rooms[room_id].height-1; j >= 0; --j){
00367
00368 for (int i = 0; i < rooms[room_id].width; i++){
00369 if (ns == j && ew == i) cout << "A";
00370 else if (j == rooms[room_id].key_ns && i == rooms[room_id].key_ew && !have_key) cout << "K";
00371 else if (j == rooms[room_id].lock_ns && i == rooms[room_id].lock_ew) cout << "L";
00372 else if (j == rooms[room_id].door_ns && i == rooms[room_id].door_ew) cout << "D";
00373 else if (j == 0 || i == 0 || j == rooms[room_id].height-1 || i == rooms[room_id].width-1) cout << "X";
00374 else cout << ".";
00375 }
00376 cout << endl;
00377 }
00378
00379 cout << "at " << ns << ", " << ew << endl;
00380 cout << "Key: " << have_key << " door: "<< door_open << endl;
00381 cout << "NORTH: key: " << key_n << ", door: " << door_n << ", lock: " << lock_n << endl;
00382 cout << "EAST: key: " << key_e << ", door: " << door_e << ", lock: " << lock_e << endl;
00383 cout << "SOUTH: key: " << key_s << ", door: " << door_s << ", lock: " << lock_s << endl;
00384 cout << "WEST: key: " << key_w << ", door: " << door_w << ", lock: " << lock_w << endl;
00385
00386
00387 }
00388
00389
00390
00391 void LightWorld::getMinMaxFeatures(std::vector<float> *minFeat,
00392 std::vector<float> *maxFeat){
00393
00394 minFeat->resize(s.size(), 0.0);
00395 maxFeat->resize(s.size(), MAX_SENSE);
00396
00397
00398 (*maxFeat)[4] = 2;
00399
00400
00401 (*maxFeat)[2] = 1;
00402 (*maxFeat)[3] = 1;
00403
00404
00405 (*maxFeat)[0] = 8;
00406 (*maxFeat)[1] = 8;
00407
00408 }
00409
00410 void LightWorld::getMinMaxReward(float *minR,
00411 float *maxR){
00412
00413 *minR = 0.0;
00414 *maxR = 10.0;
00415
00416 }
00417
00418