Go to the documentation of this file.00001
00007 #include <rl_env/MountainCar.hh>
00008
00009
00010 MountainCar::MountainCar(Random &rand):
00011 noisy(false),
00012 rng(rand),
00013 s(2),
00014 pos(s[0]),
00015 vel(s[1]),
00016 linear(false),
00017 delay(0)
00018 {
00019 reset();
00020
00021 }
00022
00023
00024 MountainCar::MountainCar(Random &rand, bool stochastic, bool lin, int delay):
00025 noisy(stochastic),
00026 rng(rand),
00027 s(2),
00028 pos(s[0]),
00029 vel(s[1]),
00030 linear(lin),
00031 delay(delay)
00032 {
00033 reset();
00034 }
00035
00036
00037 MountainCar::~MountainCar() { }
00038
00039 const std::vector<float> &MountainCar::sensation() const {
00040
00041
00042 return s;
00043 }
00044
00045 float MountainCar::apply(int action) {
00046
00047
00048
00049 float actVal = ((float)action-1.0);
00050 if (noisy){
00051 actVal += rng.uniform(-0.5, 0.5);
00052 }
00053
00054 float newVel = vel;
00055 if (linear){
00056
00057 newVel = vel + 0.001 * actVal + -0.0075*pos;
00058 } else {
00059 newVel = vel + 0.001 * actVal + -0.0025*cos(3.0*pos);
00060 }
00061
00062 newVel = bound(newVel, -0.07, 0.07);
00063
00064 float newPos = pos + vel;
00065 if (newPos < -1.2f && newVel < 0.0f)
00066 newVel = 0.0;
00067 newPos = bound(newPos, -1.2, 0.6);
00068
00069 pos = newPos;
00070 vel = newVel;
00071
00072 if (delay > 0){
00073 posHistory.push_back(newPos);
00074 pos = posHistory.front();
00075 posHistory.pop_front();
00076 velHistory.push_back(newVel);
00077 vel = velHistory.front();
00078 velHistory.pop_front();
00079
00080
00081 }
00082
00083 return reward();
00084
00085 }
00086
00087 float MountainCar::bound(float val, float min, float max){
00088 if (val < min)
00089 return min;
00090 if (val > max)
00091 return max;
00092 return val;
00093 }
00094
00095
00096 float MountainCar::reward() {
00097
00098
00099 if (terminal())
00100 return 0;
00101 else
00102 return -1;
00103
00104 }
00105
00106
00107 bool MountainCar::terminal() const {
00108
00109 return (pos >= 0.6);
00110 }
00111
00112
00113
00114 void MountainCar::reset() {
00115
00116 if (noisy){
00117 pos = rng.uniform(-1.2, 0.59);
00118 vel = rng.uniform(-0.07, 0.07);
00119 } else {
00120 pos = 0;
00121 vel = 0;
00122 }
00123
00124 pos = rng.uniform(-1.2, 0.59);
00125 vel = rng.uniform(-0.07, 0.07);
00126
00127 if (delay > 0){
00128 posHistory.clear();
00129 velHistory.clear();
00130 for (int i = 0; i < delay; i++){
00131 posHistory.push_back(pos);
00132 velHistory.push_back(vel);
00133 }
00134 }
00135
00136 }
00137
00138
00139 int MountainCar::getNumActions(){
00140 return 3;
00141 }
00142
00143
00144 void MountainCar::setSensation(std::vector<float> newS){
00145 if (s.size() != newS.size()){
00146 cerr << "Error in sensation sizes" << endl;
00147 }
00148
00149 for (unsigned i = 0; i < newS.size(); i++){
00150 s[i] = newS[i];
00151 }
00152 }
00153
00154 std::vector<experience> MountainCar::getSeedings() {
00155
00156 int origDelay = delay;
00157 delay = 0;
00158
00159
00160 std::vector<experience> seeds;
00161
00162
00163 seeds.push_back(getExp(0.58, 0.03, 2));
00164
00165
00166
00167 for (int i = 0; i < getNumActions(); i++){
00168 float p = rng.uniform(-1.2, 0.6);
00169 float v = rng.uniform(-0.07, 0.07);
00170 seeds.push_back(getExp(p, v, i));
00171 }
00172
00173 delay = origDelay;
00174
00175 reset();
00176
00177 return seeds;
00178
00179 }
00180
00181 experience MountainCar::getExp(float s0, float s1, int a){
00182
00183 experience e;
00184
00185 e.s.resize(2, 0.0);
00186 e.next.resize(2, 0.0);
00187
00188 pos = s0;
00189 vel = s1;
00190
00191 e.act = a;
00192 e.s = sensation();
00193 e.reward = apply(e.act);
00194
00195 e.terminal = terminal();
00196 e.next = sensation();
00197
00198 reset();
00199
00200 return e;
00201 }
00202
00203
00204 void MountainCar::getMinMaxFeatures(std::vector<float> *minFeat,
00205 std::vector<float> *maxFeat){
00206
00207 minFeat->resize(s.size(), 0.0);
00208 maxFeat->resize(s.size(), 1.0);
00209
00210 (*minFeat)[0] = -1.2;
00211 (*maxFeat)[0] = 0.6;
00212
00213 (*minFeat)[1] = -0.07;
00214 (*maxFeat)[1] = 0.07;
00215
00216 }
00217
00218 void MountainCar::getMinMaxReward(float *minR,
00219 float *maxR){
00220
00221 *minR = -1.0;
00222 *maxR = 0.0;
00223
00224 }