00001
00006 #include "ValueIteration.hh"
00007 #include <algorithm>
00008
00009 #include <sys/time.h>
00010
00011
00012 ValueIteration::ValueIteration(int numactions, float gamma,
00013 int MAX_LOOPS, float MAX_TIME, int modelType,
00014 const std::vector<float> &fmax,
00015 const std::vector<float> &fmin,
00016 const std::vector<int> &n, Random newRng):
00017 numactions(numactions), gamma(gamma),
00018 MAX_LOOPS(MAX_LOOPS), MAX_TIME(MAX_TIME), modelType(modelType),
00019 statesPerDim(n)
00020 {
00021 rng = newRng;
00022
00023 nstates = 0;
00024 nactions = 0;
00025
00026 timingType = false;
00027
00028 model = NULL;
00029 planTime = getSeconds();
00030
00031
00032 MAX_STEPS = 100;
00033
00034 PLANNERDEBUG = false;
00035 POLICYDEBUG = false;
00036 ACTDEBUG = false;
00037 MODELDEBUG = false;
00038
00039 featmax = fmax;
00040 featmin = fmin;
00041
00042 if (statesPerDim[0] > 0){
00043 cout << "Planner VI using discretization of " << statesPerDim[0] << endl;
00044 }
00045
00046
00047 }
00048
00049 ValueIteration::~ValueIteration() {
00050 for (std::map<state_t, state_info>::iterator i = statedata.begin();
00051 i != statedata.end(); i++){
00052
00053
00054
00055 state_info* info = &((*i).second);
00056
00057 deleteInfo(info);
00058 }
00059
00060 statedata.clear();
00061
00062 }
00063
00064 void ValueIteration::setModel(MDPModel* m){
00065
00066 model = m;
00067
00068
00069
00070 }
00071
00072
00073
00074 void ValueIteration::initStates(){
00075 cout << "init states" << endl;
00076 std::vector<float> s(featmin.size());
00077
00078 fillInState(s,0);
00079 cout << "init states complete" << endl;
00080 }
00081
00082 void ValueIteration::fillInState(std::vector<float>s, int depth){
00083
00084
00085 if (depth == (int)featmin.size()){
00086 canonicalize(s);
00087 return;
00088 }
00089
00090
00091 for (float i = featmin[depth]; i < featmax[depth]+1; i++){
00092 s[depth] = i;
00093 fillInState(s, depth+1);
00094 }
00095 }
00096
00097
00099
00101
00102
00103 void ValueIteration::initNewState(state_t s){
00104 if (PLANNERDEBUG) cout << "initNewState(s = " << s
00105 << ") size = " << s->size() << endl;
00106
00107 if (MODELDEBUG) cout << "New State: " << endl;
00108
00109
00110 state_info* info = &(statedata[s]);
00111 initStateInfo(info);
00112
00113
00114 for (int i = 0; i < numactions; i++){
00115 model->getStateActionInfo(*s, i, &(info->modelInfo[i]));
00116 }
00117
00118
00119 if (PLANNERDEBUG) cout << "done with initNewState()" << endl;
00120
00121 }
00122
00123 bool ValueIteration::updateModelWithExperience(const std::vector<float> &laststate,
00124 int lastact,
00125 const std::vector<float> &currstate,
00126 float reward, bool term){
00127 if (PLANNERDEBUG) cout << "updateModelWithExperience(last = " << &laststate
00128 << ", curr = " << &currstate
00129 << ", lastact = " << lastact
00130 << ", r = " << reward
00131 << ")" << endl;
00132
00133 if (!timingType)
00134 planTime = getSeconds();
00135
00136
00137 state_t last = canonicalize(laststate);
00138 state_t curr = canonicalize(currstate);
00139
00140 prevstate = laststate;
00141 prevact = lastact;
00142
00143
00144 if (curr == NULL)
00145 return false;
00146
00147
00148 state_info* info = &(statedata[last]);
00149
00150
00151 info->visits[lastact]++;
00152
00153
00154 if (model == NULL){
00155 cout << "ERROR IN MODEL OR MODEL SIZE" << endl;
00156 exit(-1);
00157 }
00158
00159 experience e;
00160 e.s = laststate;
00161 e.next = currstate;
00162 e.act = lastact;
00163 e.reward = reward;
00164 e.terminal = term;
00165 bool modelChanged = model->updateWithExperience(e);
00166
00167 if (PLANNERDEBUG) cout << "VI Added exp: " << modelChanged << endl;
00168 if (timingType)
00169 planTime = getSeconds();
00170
00171 return modelChanged;
00172
00173 }
00174
00175
00176 void ValueIteration::updateStateActionFromModel(const std::vector<float> &state, int a){
00177 if (PLANNERDEBUG) cout << "updateStateActionFromModel()" << endl;
00178
00179 state_t s = canonicalize(state);
00180
00181
00182 state_info* info = &(statedata[s]);
00183
00184
00185
00186 model->getStateActionInfo(state, a, &(info->modelInfo[a]));
00187
00188 info->fresh = false;
00189
00190 }
00191
00192
00193 void ValueIteration::updateStatesFromModel(){
00194 if (PLANNERDEBUG) cout << "updateStatesFromModel()" << endl;
00195
00196
00197 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00198 i != statespace.end(); i++){
00199
00200 if (PLANNERDEBUG){
00201 cout << "updateStatesFromModel i = " << &(*i) << endl;
00202 cout << "State is ";
00203 for (unsigned j = 0; j < (*i).size(); j++){
00204 cout << (*i)[j] << ", ";
00205 }
00206 cout << endl;
00207 }
00208
00209 state_t s = canonicalize(*i);
00210
00211
00212 state_info* info = &(statedata[s]);
00213
00214
00215
00216 for (int j = 0; j < numactions; j++){
00217 model->getStateActionInfo(*s, j, &(info->modelInfo[j]));
00218 }
00219
00220
00221
00222 info->fresh = false;
00223
00224 if (PLANNERDEBUG) cout << "updateStatesFromModel i = " << &i << " complete" << endl;
00225
00226 }
00227
00228 if (PLANNERDEBUG) cout << "updateStatesFromModel " << " totally complete" << endl;
00229
00230 }
00231
00232
00233 int ValueIteration::getBestAction(const std::vector<float> &state){
00234 if (PLANNERDEBUG) cout << "getBestAction(s = " << &state
00235 << ")" << endl;
00236
00237 state_t s = canonicalize(state);
00238
00239
00240 state_info* info = &(statedata[s]);
00241
00242
00243 std::vector<float> &Q = info->Q;
00244
00245
00246 const std::vector<float>::iterator a =
00247 random_max_element(Q.begin(), Q.end());
00248
00249 int act = a - Q.begin();
00250 float val = *a;
00251 if (ACTDEBUG){
00252 cout << endl << "chooseAction State " << (*s)[0] << "," << (*s)[1]
00253 << " act: " << act << " val: " << val << endl;
00254 for (int iAct = 0; iAct < numactions; iAct++){
00255 cout << " Action: " << iAct
00256 << " val: " << Q[iAct]
00257 << " visits: " << info->visits[iAct]
00258 << " modelsAgree: " << info->modelInfo[iAct].known << endl;
00259 }
00260 }
00261
00262 nactions++;
00263
00264
00265 return act;
00266 }
00267
00268
00269
00270
00271
00272 void ValueIteration::createPolicy(){
00273 if (PLANNERDEBUG || POLICYDEBUG) cout << endl << "createPolicy()" << endl;
00274
00275 float maxError = 5000;
00276 int nloops = 0;
00277
00278 calculateReachableStates();
00279
00280 float MIN_ERROR = 0.0001;
00281
00282
00283 int statesUpdated = 0;
00284
00285
00286 while (maxError > MIN_ERROR){
00287
00288
00289
00290
00291
00292
00293
00294 if (POLICYDEBUG)
00295 cout << "max error: " << maxError << " nloops: " << nloops
00296 << endl;
00297
00298 maxError = 0;
00299 nloops++;
00300
00301
00302 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00303 i != statespace.end(); i++){
00304
00305
00306
00307
00308 statesUpdated++;
00309 state_t s = canonicalize(*i);
00310
00311
00312 state_info* info = &(statedata[s]);
00313
00314 if (POLICYDEBUG){
00315 cout << endl << " State: id: " << info->id << ": " ;
00316 for (unsigned si = 0; si < s->size(); si++){
00317 cout << (*s)[si] << ",";
00318 }
00319 cout << " Steps: " << info->stepsAway << endl;
00320
00321 }
00322
00323
00324 if (info->stepsAway > 99999){
00325 if (POLICYDEBUG)
00326 cout << "State not reachable, ignoring." << endl;
00327 continue;
00328 }
00329
00330
00331 for (int act = 0; act < numactions; act++){
00332
00333
00334
00335
00336 StateActionInfo *modelInfo = &(info->modelInfo[act]);
00337
00338 if (POLICYDEBUG)
00339 cout << " Action: " << act
00340 << " State visits: " << info->visits[act]
00341 << " reward: " << modelInfo->reward
00342 << " term: " << modelInfo->termProb << endl;
00343
00344
00345
00346 float newQ = modelInfo->reward;
00347
00348 float probSum = modelInfo->termProb;
00349
00350
00351
00352 for (std::map<std::vector<float>, float>::iterator outIt
00353 = modelInfo->transitionProbs.begin();
00354 outIt != modelInfo->transitionProbs.end(); outIt++){
00355
00356 std::vector<float> nextstate = (*outIt).first;
00357
00358 if (POLICYDEBUG){
00359 cout << " Next state was: ";
00360 for (unsigned oi = 0; oi < nextstate.size(); oi++){
00361 cout << nextstate[oi] << ",";
00362 }
00363 cout << endl;
00364 }
00365
00366
00367 float transitionProb = (1.0-modelInfo->termProb) *
00368 modelInfo->transitionProbs[nextstate];
00369
00370 probSum += transitionProb;
00371
00372 if (POLICYDEBUG)
00373 cout << " prob: " << transitionProb << endl;
00374
00375 if (transitionProb < 0 || transitionProb > 1.0001){
00376 cout << "Error with transitionProb: " << transitionProb << endl;
00377 exit(-1);
00378 }
00379
00380
00381 if (transitionProb > 0.0){
00382
00383 float maxval = 0.0;
00384
00385
00386 bool realState = true;
00387
00388 for (unsigned b = 0; b < nextstate.size(); b++){
00389 if (nextstate[b] < (featmin[b]-EPSILON)
00390 || nextstate[b] > (featmax[b]+EPSILON)){
00391 realState = false;
00392 if (POLICYDEBUG)
00393 cout << " Next state is not valid (feature "
00394 << b << " out of range)" << endl;
00395 break;
00396 }
00397 }
00398
00399 state_t next;
00400
00401
00402 if (info->stepsAway >= MAX_STEPS || !realState){
00403 next = s;
00404 } else {
00405 next = canonicalize(nextstate);
00406 }
00407
00408 state_info* nextinfo = &(statedata[next]);
00409
00410
00411 int newSteps = info->stepsAway + 1;
00412 if (newSteps < nextinfo->stepsAway){
00413 if (POLICYDEBUG) {
00414 cout << " Setting state to "
00415 << newSteps << " steps away." << endl;
00416 }
00417 nextinfo->stepsAway = newSteps;
00418 }
00419
00420
00421 std::vector<float>::iterator maxAct =
00422 std::max_element(nextinfo->Q.begin(),
00423 nextinfo->Q.end());
00424 maxval = *maxAct;
00425
00426 nextstate.clear();
00427
00428 if (POLICYDEBUG) cout << " Max value: " << maxval << endl;
00429
00430
00431 newQ += (gamma * transitionProb * maxval);
00432
00433 }
00434
00435 }
00436
00437
00438 if (probSum < 0.9999 || probSum > 1.0001){
00439 cout << "Error: transition probabilities do not add to 1: Sum: "
00440 << probSum << endl;
00441 exit(-1);
00442 }
00443
00444
00445
00446 float tdError = fabs(info->Q[act] - newQ);
00447 if (POLICYDEBUG) cout << " NewQ: " << newQ
00448 << " OldQ: " << info->Q[act] << endl;
00449 info->Q[act] = newQ;
00450
00451
00452 if (tdError > maxError)
00453 maxError = tdError;
00454
00455 if (POLICYDEBUG)
00456 cout << " TD error: " << tdError
00457 << " Max error: " << maxError << endl;
00458
00459 }
00460
00461 }
00462
00463 }
00464
00465 if (false || nloops >= MAX_LOOPS){
00466 cout << nactions << " Policy creation ended with maxError: " << maxError
00467 << " nloops: " << nloops << " time: " << (getSeconds()-planTime)
00468 << " states: " << statesUpdated
00469 << endl;
00470 }
00471
00472
00473 removeUnreachableStates();
00474
00475 if (POLICYDEBUG) cout << nactions
00476 << " policy creation complete: maxError: "
00477 << maxError << " nloops: " << nloops
00478 << endl;
00479
00480
00481 }
00482
00483
00484 void ValueIteration::planOnNewModel(){
00485
00486
00487
00488 if (modelType == RMAX){
00489 updateStateActionFromModel(prevstate, prevact);
00490 }
00491 else {
00492 updateStatesFromModel();
00493 }
00494
00495
00496 createPolicy();
00497
00498 }
00499
00500
00502
00504
00505 ValueIteration::state_t ValueIteration::canonicalize(const std::vector<float> &s) {
00506 if (PLANNERDEBUG) cout << "canonicalize(s = " << s[0] << ", "
00507 << s[1] << ")" << endl;
00508
00509 std::vector<float> s2;
00510 if (statesPerDim[0] > 0){
00511 s2 = discretizeState(s);
00512 } else {
00513 s2 = s;
00514 }
00515
00516 if (PLANNERDEBUG) cout << "discretized(" << s2[0] << ", " << s2[1] << ")" << endl;
00517
00518
00519 const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00520 statespace.insert(s2);
00521 state_t retval = &*result.first;
00522
00523 if (PLANNERDEBUG) cout << " returns " << retval
00524 << " New: " << result.second << endl;
00525
00526
00527 if (result.second) {
00528 initNewState(retval);
00529 if (PLANNERDEBUG) cout << " New state initialized" << endl;
00530 }
00531
00532 return retval;
00533 }
00534
00535
00536
00537
00538 void ValueIteration::initStateInfo(state_info* info){
00539 if (PLANNERDEBUG) cout << "initStateInfo()";
00540
00541 info->id = nstates++;
00542 if (PLANNERDEBUG) cout << " id = " << info->id << endl;
00543
00544 info->fresh = true;
00545 info->stepsAway = 100000;
00546
00547
00548 info->modelInfo = new StateActionInfo[numactions];
00549
00550
00551 info->visits.resize(numactions, 0);
00552 info->Q.resize(numactions, 0);
00553
00554 for (int i = 0; i < numactions; i++){
00555 info->Q[i] = rng.uniform(0,0.01);
00556 }
00557
00558 if (PLANNERDEBUG) cout << "done with initStateInfo()" << endl;
00559
00560 }
00561
00562
00563 void ValueIteration::printStates(){
00564
00565 for (std::set< std::vector<float> >::iterator i = statespace.begin();
00566 i != statespace.end(); i++){
00567
00568 state_t s = canonicalize(*i);
00569
00570 state_info* info = &(statedata[s]);
00571
00572 cout << "State " << info->id << ": ";
00573 for (unsigned j = 0; j < s->size(); j++){
00574 cout << (*s)[j] << ", ";
00575 }
00576 cout << endl;
00577
00578 for (int act = 0; act < numactions; act++){
00579 cout << " visits[" << act << "] = " << info->visits[act]
00580 << " Q: " << info->Q[act]
00581 << " R: " << info->modelInfo[act].reward << endl;
00582 }
00583
00584 }
00585 }
00586
00587
00588
00589
00590 void ValueIteration::calculateReachableStates(){
00591
00592
00593 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00594 i != statespace.end(); i++){
00595
00596 state_t s = canonicalize(*i);
00597
00598
00599 state_info* info = &(statedata[s]);
00600
00601 info->stepsAway = 100000;
00602
00603
00604
00605
00606
00607
00608 for (int j = 0; j < numactions; j++){
00609 if (info->visits[j] > 0){
00610 info->stepsAway = 0;
00611 break;
00612 }
00613 }
00614 }
00615 }
00616
00617 void ValueIteration::removeUnreachableStates(){
00618
00619 return;
00620
00621
00622
00623 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00624 i != statespace.end(); i++){
00625
00626 state_t s = canonicalize(*i);
00627
00628
00629 state_info* info = &(statedata[s]);
00630
00631
00632 if (info->stepsAway > MAX_STEPS){
00633
00634 if (POLICYDEBUG){
00635 cout << "Removing unreachable state: " << (*s)[0];
00636 for (unsigned i = 1; i < s->size(); i++){
00637 cout << ", " << (*s)[i];
00638 }
00639 cout << endl;
00640 }
00641
00642
00643 deleteInfo(info);
00644
00645
00646 statespace.erase(*s);
00647
00648
00649 statedata.erase(s);
00650
00651 }
00652 }
00653 }
00654
00655
00656
00657
00658
00659
00660 void ValueIteration::deleteInfo(state_info* info){
00661
00662 delete [] info->modelInfo;
00663
00664 }
00665
00666
00667 double ValueIteration::getSeconds(){
00668 struct timezone tz;
00669 timeval timeT;
00670 gettimeofday(&timeT, &tz);
00671 return timeT.tv_sec + (timeT.tv_usec / 1000000.0);
00672 }
00673
00674
00675 void ValueIteration::savePolicy(const char* filename){
00676
00677 ofstream policyFile(filename, ios::out | ios::binary | ios::trunc);
00678
00679
00680 int fsize = featmin.size();
00681 policyFile.write((char*)&fsize, sizeof(int));
00682
00683
00684 policyFile.write((char*)&numactions, sizeof(int));
00685
00686
00687 for (std::set< std::vector<float> >::iterator i = statespace.begin();
00688 i != statespace.end(); i++){
00689
00690 state_t s = canonicalize(*i);
00691 state_info* info = &(statedata[s]);
00692
00693
00694 policyFile.write((char*)&((*i)[0]), sizeof(float)*fsize);
00695
00696
00697 policyFile.write((char*)&(info->Q[0]), sizeof(float)*numactions);
00698
00699 }
00700
00701 policyFile.close();
00702 }
00703
00704
00705
00706
00707 std::vector<float> ValueIteration::discretizeState(const std::vector<float> &s){
00708 std::vector<float> ds(s.size());
00709
00710 for (unsigned i = 0; i < s.size(); i++){
00711
00712
00713
00714
00715
00716 float factor = (featmax[i] - featmin[i]) / (float)statesPerDim[i];
00717 int bin = 0;
00718 if (s[i] > 0){
00719 bin = (int)((s[i]+factor/2) / factor);
00720 } else {
00721 bin = (int)((s[i]-factor/2) / factor);
00722 }
00723
00724 ds[i] = factor*bin;
00725
00726
00727 }
00728
00729 return ds;
00730 }