00001
00007 #include <rl_env/tworooms.hh>
00008
00009
00010 TwoRooms::TwoRooms(Random &rand, bool stochastic, bool rewardType,
00011 int actDelay, bool multiGoal):
00012 grid(create_default_map()),
00013 goal(coord_t(1.,1.)),
00014 goal2(coord_t(4.,1.)),
00015 negReward(rewardType),
00016 noisy(stochastic),
00017 actDelay(actDelay),
00018 multiGoal(multiGoal),
00019 rng(rand),
00020 doorway(coord_t(2.,5.)),
00021 s(2),
00022 ns(s[0]),
00023 ew(s[1])
00024 {
00025 reset();
00026 }
00027
00028
00029 TwoRooms::~TwoRooms() { delete grid; }
00030
00031 const std::vector<float> &TwoRooms::sensation() const {
00032
00033
00034 return s;
00035 }
00036
00037 float TwoRooms::apply(int action) {
00038
00039
00040
00041 int actUsed = action;
00042
00043 if (actDelay > 0){
00044 actUsed = actHistory.front();
00045 actHistory.push_back(action);
00046 actHistory.pop_front();
00047 }
00048
00049 if (actUsed > -1){
00050
00051 const room_action_t effect =
00052 noisy
00053 ? add_noise(static_cast<room_action_t>(actUsed))
00054 : static_cast<room_action_t>(actUsed);
00055 switch(effect) {
00056 case NORTH:
00057 if (!grid->wall(static_cast<unsigned>(ns),
00058 static_cast<unsigned>(ew),
00059 effect))
00060 {
00061 ++ns;
00062 }
00063 return reward();
00064 case SOUTH:
00065 if (!grid->wall(static_cast<unsigned>(ns),
00066 static_cast<unsigned>(ew),
00067 effect))
00068 {
00069 --ns;
00070 }
00071 return reward();
00072 case EAST:
00073 if (!grid->wall(static_cast<unsigned>(ns),
00074 static_cast<unsigned>(ew),
00075 effect))
00076 {
00077 ++ew;
00078 }
00079 return reward();
00080 case WEST:
00081 if (!grid->wall(static_cast<unsigned>(ns),
00082 static_cast<unsigned>(ew),
00083 effect))
00084 {
00085 --ew;
00086 }
00087 return reward();
00088 }
00089
00090 std::cerr << "Unreachable point reached in TwoRooms::apply!!!\n";
00091 }
00092
00093 return 0;
00094 }
00095
00096
00097 float TwoRooms::reward() {
00098
00099
00100
00101
00102
00103
00104
00105
00106 if (negReward){
00107
00108 if (terminal())
00109 return 0;
00110 else
00111 return -1;
00112
00113 }else{
00114
00115
00116 if (terminal())
00117 return 1;
00118 else
00119 return 0;
00120 }
00121 }
00122
00123
00124
00125 bool TwoRooms::terminal() const {
00126
00127 if (useGoal2)
00128 return coord_t(ns,ew) == goal2;
00129 else
00130 return coord_t(ns,ew) == goal;
00131 }
00132
00133
00134 void TwoRooms::reset() {
00135
00136 ns = rng.uniformDiscrete(0, grid->height() - 1 );
00137 ew = rng.uniformDiscrete(6, grid->width() - 1);
00138
00139
00140 actHistory.clear();
00141 actHistory.assign(actDelay, -1);
00142
00143 if (multiGoal){
00144 useGoal2 = rng.bernoulli(0.5);
00145
00146 }
00147 else {
00148 useGoal2 = false;
00149 }
00150
00151
00152
00153 }
00154
00155
00156
00157 int TwoRooms::getNumActions(){
00158 return 4;
00159 }
00160
00161
00162 const Gridworld *TwoRooms::create_default_map() {
00163 int width = 11;
00164 int height = 5;
00165 std::vector<std::vector<bool> > nsv(width, std::vector<bool>(height-1,false));
00166 std::vector<std::vector<bool> > ewv(height, std::vector<bool>(width-1,false));
00167
00168
00169 for (int j = 0; j < height; j++){
00170
00171 if (j == 2)
00172 continue;
00173 ewv[j][4] = true;
00174 ewv[j][5] = true;
00175 }
00176
00177 nsv[5][1] = true;
00178 nsv[5][2] = true;
00179
00180
00181 doorway = coord_t(2, 5);
00182
00183 return new Gridworld(height, width, nsv, ewv);
00184 }
00185
00186 TwoRooms::room_action_t TwoRooms::add_noise(room_action_t action) {
00187 switch(action) {
00188 case NORTH:
00189 case SOUTH:
00190 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00191 case EAST:
00192 case WEST:
00193 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00194 default:
00195 return action;
00196 }
00197 }
00198
00199
00200 void TwoRooms::randomize_goal() {
00201 const unsigned n = grid->height() * grid->width();
00202 unsigned index = rng.uniformDiscrete(1,n) - 1;
00203 goal = coord_t(index / grid->width(), index % grid->width());
00204 }
00205
00206
00207 void TwoRooms::getMinMaxFeatures(std::vector<float> *minFeat,
00208 std::vector<float> *maxFeat){
00209
00210 minFeat->resize(s.size(), 0.0);
00211 maxFeat->resize(s.size(), 10.0);
00212
00213 (*maxFeat)[0] = 5.0;
00214
00215 }
00216
00217 void TwoRooms::getMinMaxReward(float *minR,
00218 float *maxR){
00219 if (negReward){
00220 *minR = -1.0;
00221 *maxR = 0.0;
00222 }else{
00223 *minR = 0.0;
00224 *maxR = 1.0;
00225 }
00226 }
00227
00228
00229 std::vector<experience> TwoRooms::getSeedings() {
00230
00231
00232 std::vector<experience> seeds;
00233
00234
00235
00236
00237
00238
00239 useGoal2 = false;
00240 actHistory.clear();
00241 actHistory.assign(actDelay, SOUTH);
00242 seeds.push_back(getExp(2,1,SOUTH));
00243
00244
00245 if (multiGoal){
00246 useGoal2 = true;
00247 actHistory.clear();
00248 actHistory.assign(actDelay, NORTH);
00249 seeds.push_back(getExp(3,1,NORTH));
00250 }
00251
00252
00253 actHistory.clear();
00254 actHistory.assign(actDelay, WEST);
00255 seeds.push_back(getExp(2,6,WEST));
00256
00257 reset();
00258
00259 return seeds;
00260
00261 }
00262
00263 experience TwoRooms::getExp(float s0, float s1, int a){
00264
00265 experience e;
00266
00267 e.s.resize(2, 0.0);
00268 e.next.resize(2, 0.0);
00269
00270 ns = s0;
00271 ew = s1;
00272
00273 e.act = a;
00274 e.s = sensation();
00275 e.reward = apply(e.act);
00276
00277 e.terminal = terminal();
00278 e.next = sensation();
00279
00280
00281
00282
00283
00284
00285 reset();
00286
00287 return e;
00288 }