00001
00011 #include <rl_env/RobotCarVel.hh>
00012
00013
00014 RobotCarVel::RobotCarVel(Random &rand, bool randomVel, bool upVel, bool tenToSix, bool lag):
00015 rng(rand),
00016 s(4),
00017 hidden(2),
00018 targetVel(s[0]),
00019 currVel(s[1]),
00020 trueThrottle(hidden[0]),
00021 trueBrake(hidden[1]),
00022 throttleTarget(s[2]),
00023 brakeTarget(s[3]),
00024 randomVel(randomVel),
00025 upVel(upVel),
00026 tenToSix(tenToSix),
00027 lag(lag)
00028 {
00029 reset();
00030 }
00031
00032
00033
00034 RobotCarVel::~RobotCarVel() { }
00035
00036 const std::vector<float> &RobotCarVel::sensation() const {
00037 return s;
00038 }
00039
00040 float RobotCarVel::apply(int action) {
00041
00042 float HZ = 10.0;
00043
00044 actNum++;
00045
00046
00047 float reward = -10.0 * fabs(currVel - targetVel);
00048
00049 float throttleChangePct = 1.0;
00050 float brakeChangePct = 1.0;
00051 if (lag){
00052 brakeChangePct = brakePosVel / HZ;
00053 float brakeVelTarget = 3.0*(brakeTarget - trueBrake);
00054 brakePosVel += (brakeVelTarget - brakePosVel) * 3.0 / HZ;
00055 trueBrake += brakeChangePct;
00056 } else {
00057 trueBrake += (brakeTarget-trueBrake) * brakeChangePct;
00058 }
00059 trueBrake = bound(trueBrake, 0.0, 1.0);
00060
00061
00062 trueThrottle += (throttleTarget-trueThrottle) * throttleChangePct;
00063 trueThrottle = bound(trueThrottle, 0.0, 0.4);
00064
00065
00066
00067 float g = 9.81;
00068 float throttle_accel = g;
00069 float brake_decel = g;
00070 float rolling_resistance = 0.01 * g;
00071 float drag_coeff = 0.01;
00072 float idle_accel = (rolling_resistance
00073 + drag_coeff * 3.1 * 3.1);
00074 float wind_resistance = drag_coeff * currVel * currVel;
00075 float accel = (idle_accel
00076 + trueThrottle * throttle_accel
00077 - trueBrake * brake_decel
00078 - rolling_resistance
00079 - wind_resistance);
00080 currVel += (accel / HZ);
00081 currVel = bound(currVel, 0.0, 12.0);
00082
00083
00084
00085 if (action == THROTTLE_UP){
00086 brakeTarget = 0.0;
00087 if (throttleTarget < 0.4)
00088 throttleTarget += 0.1;
00089 }
00090 else if (action == THROTTLE_DOWN){
00091 brakeTarget = 0.0;
00092 if (throttleTarget > 0.0)
00093 throttleTarget -= 0.1;
00094 }
00095 else if (action == BRAKE_UP){
00096 throttleTarget = 0.0;
00097 if (brakeTarget < 1.0)
00098 brakeTarget += 0.1;
00099 }
00100 else if (action == BRAKE_DOWN){
00101 throttleTarget = 0.0;
00102 if (brakeTarget > 0.0)
00103 brakeTarget -= 0.1;
00104 }
00105 else if (action != NOTHING){
00106 cout << "invalid action " << action << endl;
00107 }
00108 throttleTarget = bound(throttleTarget, 0.0, 0.4);
00109 brakeTarget = bound(brakeTarget, 0.0, 1.0);
00110 throttleTarget = 0.1 * (float)((int)(throttleTarget*10.0));
00111 brakeTarget = 0.1 * (float)((int)(brakeTarget*10.0));
00112
00113
00114
00115
00116
00117
00118
00119 return reward;
00120
00121 }
00122
00123
00124 bool RobotCarVel::terminal() const {
00125 return false;
00126 }
00127
00128
00129
00130 void RobotCarVel::reset() {
00131
00132
00133 if (randomVel){
00134 targetVel = rng.uniformDiscrete(0, 11);
00135 currVel = rng.uniformDiscrete(0, 11);
00136 } else {
00137 if (tenToSix){
00138 if (upVel){
00139 targetVel = 10.0;
00140 currVel = 6.0;
00141 } else {
00142 targetVel = 6.0;
00143 currVel = 10.0;
00144 }
00145 } else {
00146 if (upVel){
00147 targetVel = 7.0;
00148 currVel = 2.0;
00149 } else {
00150 targetVel = 2.0;
00151 currVel = 7.0;
00152 }
00153 }
00154 }
00155
00156 actNum = 0;
00157 throttleTarget = rng.uniformDiscrete(0,4) * 0.1;
00158 brakeTarget = 0.0;
00159 trueThrottle = throttleTarget;
00160 trueBrake = brakeTarget;
00161 brakePosVel = 0.0;
00162
00163 }
00164
00165
00166
00167 int RobotCarVel::getNumActions(){
00168 return 5;
00169 }
00170
00171
00172 void RobotCarVel::setSensation(std::vector<float> newS){
00173 if (s.size() != newS.size()){
00174 cerr << "Error in sensation sizes" << endl;
00175 }
00176
00177 for (unsigned i = 0; i < newS.size(); i++){
00178 s[i] = newS[i];
00179 }
00180 }
00181
00182 std::vector<experience> RobotCarVel::getSeedings() {
00183
00184
00185 std::vector<experience> seeds;
00186
00187
00188
00189 reset();
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210 for (int i = 0; i < 25; i++){
00211 float vel = rng.uniform(0,11);
00212
00213 float throt = 0;
00214 float brake = 0;
00215 if (rng.bernoulli(0.5)){
00216 throt = rng.uniformDiscrete(0,4)*0.1;
00217 } else {
00218 brake = rng.uniformDiscrete(0,9)*0.1;
00219 }
00220 int act = i%getNumActions();
00221 seeds.push_back(getExp(targetVel,vel,throt,brake,act));
00222 }
00223
00224 reset();
00225
00226 return seeds;
00227 }
00228
00229 experience RobotCarVel::getRandomVelSeed(float target){
00230 float throt = 0;
00231 float brake = 0;
00232 if (rng.bernoulli(0.5)){
00233 throt = rng.uniformDiscrete(0,4)*0.1;
00234 } else {
00235 brake = rng.uniformDiscrete(0,4)*0.1;
00236 }
00237
00238 return getExp(target, target, throt, brake, rng.uniformDiscrete(0,4));
00239 }
00240
00241 experience RobotCarVel::getExp(float s0, float s1, float s2, float s3, int a){
00242
00243 if (!randomVel) s0 = targetVel;
00244
00245 experience e;
00246
00247 e.s.resize(4, 0.0);
00248 e.next.resize(4, 0.0);
00249
00250 targetVel = s0;
00251 currVel = s1;
00252 throttleTarget = s2;
00253 brakeTarget = s3;
00254 trueThrottle = throttleTarget;
00255 trueBrake = brakeTarget;
00256 brakePosVel = 0.0;
00257
00258 e.act = a;
00259 e.s = sensation();
00260 e.reward = apply(e.act);
00261
00262 e.terminal = terminal();
00263 e.next = sensation();
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277 reset();
00278
00279 return e;
00280 }
00281
00282
00283 void RobotCarVel::getMinMaxFeatures(std::vector<float> *minFeat,
00284 std::vector<float> *maxFeat){
00285
00286 minFeat->resize(s.size(), 0.0);
00287 maxFeat->resize(s.size(), 12.0);
00288
00289 (*maxFeat)[2] = 0.4;
00290 (*maxFeat)[3] = 1.0;
00291
00292 }
00293
00294 void RobotCarVel::getMinMaxReward(float *minR,
00295 float *maxR){
00296
00297 *minR = -120.0;
00298 *maxR = 0.0;
00299
00300 }
00301
00302 float RobotCarVel::bound(float val, float min, float max){
00303 if (val < min)
00304 return min;
00305 if (val > max)
00306 return max;
00307 return val;
00308 }