00001
00008 #include <rl_env/taxi.hh>
00009
00010 const Taxi::DefaultLandmarks Taxi::defaultlandmarks;
00011
00012 Taxi::DefaultLandmarks::DefaultLandmarks() {
00013 push_back(value_type(4.,0.));
00014 push_back(value_type(0.,3.));
00015 push_back(value_type(4.,4.));
00016 push_back(value_type(0.,0.));
00017 }
00018
00019 Taxi::Taxi(Random &rand, const Gridworld *gridworld, bool stochastic):
00020 grid(gridworld), landmarks(4), noisy(stochastic), rng(rand),
00021 s(4),
00022 ns(s[0]),
00023 ew(s[1]),
00024 pass(s[2]),
00025 dest(s[3])
00026 {
00027 randomize_landmarks_to_corners();
00028 reset();
00029 }
00030
00031 Taxi::Taxi(Random &rand):
00032 grid(create_default_map()),
00033 landmarks(defaultlandmarks),
00034 noisy(false),
00035 rng(rand),
00036 s(4),
00037 ns(s[0]),
00038 ew(s[1]),
00039 pass(s[2]),
00040 dest(s[3])
00041 {
00042 reset();
00043 }
00044
00045 Taxi::Taxi(Random &rand, bool stochastic):
00046 grid(create_default_map()),
00047 landmarks(defaultlandmarks),
00048 noisy(stochastic),
00049 rng(rand),
00050 s(4),
00051 ns(s[0]),
00052 ew(s[1]),
00053 pass(s[2]),
00054 dest(s[3])
00055 {
00056 reset();
00057 }
00058
00059 Taxi::Taxi(Random &rand, unsigned width, unsigned height, bool stochastic):
00060 grid(new Gridworld(height, width, rand)),
00061 landmarks(4), noisy(stochastic), rng(rand),
00062 s(4),
00063 ns(s[0]),
00064 ew(s[1]),
00065 pass(s[2]),
00066 dest(s[3])
00067 {
00068 randomize_landmarks_to_corners();
00069 reset();
00070 }
00071
00072 Taxi::~Taxi() { delete grid; }
00073
00074 const std::vector<float> &Taxi::sensation() const { return s; }
00075
00076 float Taxi::apply(int action) {
00077 const taxi_action_t effect =
00078 noisy
00079 ? add_noise(static_cast<taxi_action_t>(action))
00080 : static_cast<taxi_action_t>(action);
00081 switch(effect) {
00082 case NORTH:
00083 if (!grid->wall(static_cast<unsigned>(ns),
00084 static_cast<unsigned>(ew),
00085 effect))
00086 {
00087 ++ns;
00088 apply_fickle_passenger();
00089 }
00090 return -1;
00091 case SOUTH:
00092 if (!grid->wall(static_cast<unsigned>(ns),
00093 static_cast<unsigned>(ew),
00094 effect))
00095 {
00096 --ns;
00097 apply_fickle_passenger();
00098 }
00099 return -1;
00100 case EAST:
00101 if (!grid->wall(static_cast<unsigned>(ns),
00102 static_cast<unsigned>(ew),
00103 effect))
00104 {
00105 ++ew;
00106 apply_fickle_passenger();
00107 }
00108 return -1;
00109 case WEST:
00110 if (!grid->wall(static_cast<unsigned>(ns),
00111 static_cast<unsigned>(ew),
00112 effect))
00113 {
00114 --ew;
00115 apply_fickle_passenger();
00116 }
00117 return -1;
00118 case PICKUP: {
00119 if (pass < landmarks.size()
00120 && coord_t(ns,ew) == landmarks[static_cast<unsigned>(pass)])
00121 {
00122 pass = landmarks.size();
00123 fickle = noisy;
00124 return -1;
00125 } else
00126 return -10;
00127 }
00128 case PUTDOWN:
00129 if (pass == landmarks.size()
00130 && coord_t(ns,ew) == landmarks[static_cast<unsigned>(dest)]) {
00131 pass = dest;
00132 return 20;
00133 } else
00134 return -10;
00135 }
00136 std::cerr << "Unreachable point reached in Taxi::apply!!!\n";
00137 return 0;
00138 }
00139
00140 bool Taxi::terminal() const {
00141 return pass == dest;
00142 }
00143
00144 void Taxi::reset() {
00145 ns = rng.uniformDiscrete(1, grid->height()) - 1;
00146 ew = rng.uniformDiscrete(1, grid->width()) - 1;
00147 pass = rng.uniformDiscrete(1, landmarks.size()) - 1;
00148 do dest = rng.uniformDiscrete(1, landmarks.size()) - 1;
00149 while (dest == pass);
00150 fickle = false;
00151 }
00152
00153
00154
00155 int Taxi::getNumActions() {
00156 return 6;
00157 }
00158
00159
00160 const Gridworld *Taxi::create_default_map() {
00161 std::vector<std::vector<bool> > nsv(5, std::vector<bool>(4,false));
00162 std::vector<std::vector<bool> > ewv(5, std::vector<bool>(4,false));
00163 ewv[0][0] = true;
00164 ewv[0][2] = true;
00165 ewv[1][0] = true;
00166 ewv[1][2] = true;
00167 ewv[3][1] = true;
00168 ewv[4][1] = true;
00169 return new Gridworld(5,5,nsv,ewv);
00170 }
00171
00172 Taxi::taxi_action_t Taxi::add_noise(taxi_action_t action) {
00173 switch(action) {
00174 case NORTH:
00175 case SOUTH:
00176 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? EAST : WEST);
00177 case EAST:
00178 case WEST:
00179 return rng.bernoulli(0.8) ? action : (rng.bernoulli(0.5) ? NORTH : SOUTH);
00180 default:
00181 return action;
00182 }
00183 }
00184
00185 void Taxi::apply_fickle_passenger() {
00186
00187 if (fickle) {
00188 fickle = false;
00189 if (rng.bernoulli(0.3)) {
00190 dest += rng.uniformDiscrete(1, landmarks.size() - 1);
00191 dest = static_cast<int>(dest) % landmarks.size();
00192 }
00193 }
00194
00195 }
00196
00197 void Taxi::randomize_landmarks() {
00198 std::vector<unsigned> indices(landmarks.size());
00199 const unsigned n = grid->height() * grid->width();
00200 for (unsigned i = 0; i < indices.size(); ++i) {
00201 unsigned index;
00202 bool duplicate;
00203 do {
00204 index = rng.uniformDiscrete(1,n) - 1;
00205 duplicate = false;
00206 for (unsigned j = 0; j < i; ++j)
00207 if (index == indices[j])
00208 duplicate = true;
00209 } while (duplicate);
00210 indices[i] = index;
00211 }
00212 for (unsigned i = 0; i < indices.size(); ++i)
00213 landmarks[i] = coord_t(indices[i] / grid->width(),
00214 indices[i] % grid->width());
00215 }
00216
00217 void Taxi::randomize_landmarks_to_corners() {
00218 for (unsigned i = 0; i < landmarks.size(); ++i) {
00219 int ns = rng.uniformDiscrete(0,1);
00220 int ew = rng.uniformDiscrete(0,1);
00221 if (1 == i/2)
00222 ns = grid->height() - ns - 1;
00223 if (1 == i%2)
00224 ew = grid->width() - ew - 1;
00225 landmarks[i] = coord_t(ns,ew);
00226 }
00227 }
00228
00229
00230 void Taxi::setSensation(std::vector<float> newS){
00231 if (s.size() != newS.size()){
00232 cerr << "Error in sensation sizes" << endl;
00233 }
00234
00235 for (unsigned i = 0; i < newS.size(); i++){
00236 s[i] = (int)newS[i];
00237 }
00238 }
00239
00240 std::vector<experience> Taxi::getSeedings() {
00241
00242
00243 std::vector<experience> seeds;
00244
00245 if (true)
00246 return seeds;
00247
00248
00249
00250 for (int i = 0; i < 4; i++){
00251
00252 seeds.push_back(getExp(landmarks[i].first, landmarks[i].second, 4, i, PUTDOWN));
00253
00254 seeds.push_back(getExp(landmarks[i].first, landmarks[i].second, i, rng.uniformDiscrete(0,3), PICKUP));
00255 }
00256
00257 reset();
00258
00259 return seeds;
00260
00261 }
00262
00263 experience Taxi::getExp(float s0, float s1, float s2, float s3, int a){
00264
00265 experience e;
00266
00267 e.s.resize(4, 0.0);
00268 e.next.resize(4, 0.0);
00269
00270 ns = s0;
00271 ew = s1;
00272 pass = s2;
00273 dest = s3;
00274
00275 e.act = a;
00276 e.s = sensation();
00277 e.reward = apply(e.act);
00278
00279 e.terminal = terminal();
00280 e.next = sensation();
00281
00282 return e;
00283 }
00284
00285
00286 void Taxi::getMinMaxFeatures(std::vector<float> *minFeat,
00287 std::vector<float> *maxFeat){
00288
00289 minFeat->resize(s.size(), 0.0);
00290 maxFeat->resize(s.size(), 1.0);
00291
00292 (*minFeat)[0] = 0.0;
00293 (*maxFeat)[0] = 4.0;
00294 (*minFeat)[1] = 0.0;
00295 (*maxFeat)[1] = 4.0;
00296 (*minFeat)[2] = 0.0;
00297 (*maxFeat)[2] = 4.0;
00298 (*minFeat)[3] = 0.0;
00299 (*maxFeat)[3] = 3.0;
00300
00301 }
00302
00303 void Taxi::getMinMaxReward(float *minR,
00304 float *maxR){
00305
00306 *minR = -10.0;
00307 *maxR = 20.0;
00308
00309 }