00001
00010 #include "PO_ETUCT.hh"
00011 #include <algorithm>
00012
00013 #include <sys/time.h>
00014
00015
00016 PO_ETUCT::PO_ETUCT(int numactions, float gamma, float rrange, float lambda,
00017 int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType,
00018 const std::vector<float> &fmax, const std::vector<float> &fmin,
00019 const std::vector<int> &nstatesPerDim, bool trackActual,
00020 int historySize, Random r):
00021 numactions(numactions), gamma(gamma), rrange(rrange), lambda(lambda),
00022 MAX_ITER(MAX_ITER), MAX_TIME(MAX_TIME),
00023 MAX_DEPTH(MAX_DEPTH), modelType(modelType), statesPerDim(nstatesPerDim),
00024 trackActual(trackActual), HISTORY_SIZE(historySize),
00025 HISTORY_FL_SIZE(historySize*numactions)
00026 {
00027 rng = r;
00028
00029 nstates = 0;
00030 nactions = 0;
00031 lastUpdate = -1;
00032 seedMode = false;
00033
00034 timingType = true;
00035
00036 model = NULL;
00037 planTime = getSeconds();
00038
00039 PLANNERDEBUG = false;
00040 ACTDEBUG = false;
00041 MODELDEBUG = false;
00042 UCTDEBUG = false;
00043 REALSTATEDEBUG = false;
00044 HISTORYDEBUG = false;
00045
00046 featmax = fmax;
00047 featmin = fmin;
00048
00049 if (statesPerDim[0] > 0){
00050 cout << "Planner PO_ETUCT using discretization of " << statesPerDim[0] << endl;
00051 }
00052 if (trackActual){
00053 cout << "PO_ETUCT tracking real state values" << endl;
00054 }
00055
00056 if (HISTORY_SIZE == 0){
00057 saHistory.push_back(0.0);
00058 }
00059 else {
00060 if (HISTORYDEBUG) {
00061 cout << "History size of " << HISTORY_SIZE
00062 << " float size of " << HISTORY_FL_SIZE
00063 << " with state size: " << fmin.size()
00064 << " and numact: " << numactions << endl;
00065 }
00066 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00067 saHistory.push_back(0.0);
00068 }
00069 }
00070
00071 }
00072
00073 PO_ETUCT::~PO_ETUCT() {
00074
00075
00076
00077 for (std::map<state_t, state_info>::iterator i = statedata.begin();
00078 i != statedata.end(); i++){
00079
00080
00081
00082 state_info* info = &((*i).second);
00083
00084 deleteInfo(info);
00085 }
00086
00087 featmax.clear();
00088 featmin.clear();
00089
00090 statespace.clear();
00091 statedata.clear();
00092
00093 }
00094
00095 void PO_ETUCT::setModel(MDPModel* m){
00096
00097 model = m;
00098
00099 }
00100
00101
00103
00105
00106
00107 void PO_ETUCT::initNewState(state_t s){
00108
00109
00110
00111
00112 state_info* info = &(statedata[s]);
00113 initStateInfo(s, info);
00114
00115
00116
00117
00118 }
00119
00120 bool PO_ETUCT::updateModelWithExperience(const std::vector<float> &laststate,
00121 int lastact,
00122 const std::vector<float> &currstate,
00123 float reward, bool term){
00124
00125
00126
00127
00128
00129
00130
00131 if (!timingType)
00132 planTime = getSeconds();
00133
00134 state_t last = NULL;
00135
00136
00137 if (HISTORY_SIZE > 0){
00138 std::vector<float> modState = laststate;
00139 if (HISTORYDEBUG) {
00140 cout << "Original state vector (size " << modState.size() << ": " << modState[0];
00141 for (unsigned i = 1; i < modState.size(); i++){
00142 cout << "," << modState[i];
00143 }
00144 cout << endl;
00145 }
00146
00147 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00148 modState.push_back(saHistory[i]);
00149 }
00150
00151 if (HISTORYDEBUG) {
00152 cout << "New state vector (size " << modState.size() << ": " << modState[0];
00153 for (unsigned i = 1; i < modState.size(); i++){
00154 cout << "," << modState[i];
00155 }
00156 cout << endl;
00157 }
00158
00159 last = canonicalize(modState);
00160
00161 if (!seedMode){
00162
00163
00164
00165
00166
00167
00168
00169 for (int i = 0; i < numactions; i++){
00170 if (i == lastact)
00171 saHistory.push_back(1.0);
00172 else
00173 saHistory.push_back(0.0);
00174 saHistory.pop_front();
00175 }
00176 if (HISTORYDEBUG) {
00177 cout << "New history vector (size " << saHistory.size() << ": " << saHistory[0];
00178 for (unsigned i = 1; i < saHistory.size(); i++){
00179 cout << "," << saHistory[i];
00180 }
00181 cout << endl;
00182 }
00183 }
00184 }
00185
00186
00187 else {
00188
00189
00190 last = canonicalize(laststate);
00191 }
00192
00193 prevstate = last;
00194 prevact = lastact;
00195
00196
00197 previnfo = &(statedata[last]);
00198
00199
00200 if (model == NULL){
00201 cout << "ERROR IN MODEL OR MODEL SIZE" << endl;
00202 exit(-1);
00203 }
00204
00205 if (MODELDEBUG){
00206 cout << "Update with exp from state: ";
00207 for (unsigned i = 0; i < last->size(); i++){
00208 cout << (*last)[i] << ", ";
00209 }
00210 cout << " action: " << lastact;
00211 cout << " to state: ";
00212 for (unsigned i = 0; i < currstate.size(); i++){
00213 cout << currstate[i] << ", ";
00214 }
00215 cout << " and reward: " << reward << endl;
00216 }
00217
00218 experience e;
00219 e.s = *last;
00220 e.next = currstate;
00221 e.act = lastact;
00222 e.reward = reward;
00223 e.terminal = term;
00224
00225 bool modelChanged = model->updateWithExperience(e);
00226
00227 if (timingType)
00228 planTime = getSeconds();
00229
00230 return modelChanged;
00231
00232 }
00233
00234 void PO_ETUCT::updateStateActionFromModel(state_t s, int a, state_info* info){
00235
00236 StateActionInfo* newModel = NULL;
00237 newModel = &(info->model[a]);
00238
00239 updateStateActionHistoryFromModel(*s, a, newModel);
00240
00241 }
00242
00243 void PO_ETUCT::updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel){
00244
00245
00246
00247 model->getStateActionInfo(modState, a, newModel);
00248 newModel->frameUpdated = nactions;
00249
00250 if (HISTORY_SIZE > 0){
00251
00252
00253 std::deque<float> newHistory;
00254 int stateSize = modState.size() - HISTORY_FL_SIZE;
00255
00256 if (HISTORYDEBUG) cout << "input history was: ";
00257 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00258 newHistory.push_back(modState[i+stateSize]);
00259 if (HISTORYDEBUG) cout << modState[i+stateSize] << ", ";
00260 }
00261 if (HISTORYDEBUG) cout << endl;
00262
00263
00264 for (int i = 0; i < numactions; i++){
00265 if (i == a)
00266 newHistory.push_back(1.0);
00267 else
00268 newHistory.push_back(0.0);
00269 newHistory.pop_front();
00270 }
00271
00272 if (HISTORYDEBUG){
00273 cout << "act: " << a << ", new history:";
00274 for (unsigned i = 0; i < newHistory.size(); i++){
00275 cout << newHistory[i] << ", ";
00276 }
00277 cout << endl;
00278 }
00279
00280
00281 std::map< std::vector<float>, float> oldProbs = newModel->transitionProbs;
00282 newModel->transitionProbs.clear();
00283
00284 for (std::map<std::vector<float>, float>::iterator outIt
00285 = oldProbs.begin();
00286 outIt != oldProbs.end(); outIt++){
00287
00288 float prob = (*outIt).second;
00289 std::vector<float> next = (*outIt).first;
00290
00291 for (unsigned i = 0; i < newHistory.size(); i++){
00292 next.push_back(newHistory[i]);
00293 }
00294
00295 if (HISTORYDEBUG){
00296 cout << "add history onto prediction of state: ";
00297 for (unsigned i = 0; i < next.size(); i++){
00298 cout << next[i] << ", ";
00299 }
00300 cout << " with prob " << prob << endl;
00301 }
00302
00303 newModel->transitionProbs[next] = prob;
00304 }
00305 }
00306
00307
00308
00309 }
00310
00311
00312
00313 void PO_ETUCT::canonNextStates(StateActionInfo* modelInfo){
00314
00315
00316 for (std::map<std::vector<float>, float>::iterator outIt
00317 = modelInfo->transitionProbs.begin();
00318 outIt != modelInfo->transitionProbs.end(); outIt++){
00319
00320 std::vector<float> nextstate = (*outIt).first;
00321
00322
00323 bool badState = false;
00324 for (unsigned j = 0; j < featmax.size(); j++){
00325 if (nextstate[j] < (featmin[j]-EPSILON)
00326 || nextstate[j] > (featmax[j]+EPSILON)){
00327
00328 badState = true;
00329 break;
00330 }
00331 }
00332
00333 if (!badState){
00334 canonicalize(nextstate);
00335 }
00336 }
00337 }
00338
00339
00340
00341
00342 int PO_ETUCT::getBestAction(const std::vector<float> &state){
00343
00344
00345
00346
00347
00348 std::vector<float> modState = state;
00349 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00350 modState.push_back(saHistory[i]);
00351 }
00352
00353 state_t s = canonicalize(modState);
00354
00355 int i = 0;
00356 for (i = 0; i < MAX_ITER; i++){
00357
00358 uctSearch(modState, s, 0);
00359
00360
00361 if ((getSeconds() - planTime) > MAX_TIME){
00362 break;
00363 }
00364
00365 }
00366 double currTime = getSeconds();
00367 if (false || UCTDEBUG){
00368 cout << "Search complete after " << (currTime-planTime) << " seconds and "
00369 << i << " iterations." << endl;
00370 }
00371
00372
00373 state_info* info = &(statedata[s]);
00374
00375
00376 std::vector<float> &Q = info->Q;
00377
00378
00379 const std::vector<float>::iterator a =
00380 random_max_element(Q.begin(), Q.end());
00381
00382 int act = a - Q.begin();
00383 nactions++;
00384
00385 if (UCTDEBUG){
00386 cout << "State " << (*s)[0];
00387 for (unsigned i = 1; i < s->size(); i++){
00388 cout << "," << (*s)[i];
00389 }
00390 cout << ", Took action " << act << ", "
00391 << "value: " << *a << endl;
00392 }
00393
00394
00395 return act;
00396 }
00397
00398
00399
00400
00401
00402
00403 void PO_ETUCT::planOnNewModel(){
00404
00405
00406 resetUCTCounts();
00407
00408
00409 if (modelType == RMAX){
00410 updateStateActionFromModel(prevstate, prevact, previnfo);
00411 }
00412
00413
00414 else {
00415
00416
00417
00418 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00419 i != statespace.end(); i++){
00420 state_t s = canonicalize(*i);
00421 state_info* info = &(statedata[s]);
00422 if (info->needsUpdate){
00423 for (int j = 0; j < numactions; j++){
00424 updateStateActionFromModel(s, j, info);
00425 }
00426 info->needsUpdate = false;
00427 }
00428 }
00429 lastUpdate = nactions;
00430 }
00431
00432 }
00433
00434
00435 void PO_ETUCT::resetUCTCounts(){
00436
00437 const int MIN_VISITS = 10;
00438
00439
00440 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00441 i != statespace.end(); i++){
00442 state_t s = canonicalize(*i);
00443
00444 state_info* info = &(statedata[s]);
00445
00446 if (info->uctVisits > (MIN_VISITS * numactions))
00447 info->uctVisits = MIN_VISITS * numactions;
00448
00449 for (int j = 0; j < numactions; j++){
00450 if (info->uctActions[j] > MIN_VISITS)
00451 info->uctActions[j] = MIN_VISITS;
00452 }
00453
00454 }
00455
00456 }
00457
00458
00459
00460
00462
00464
00465 PO_ETUCT::state_t PO_ETUCT::canonicalize(const std::vector<float> &s) {
00466
00467
00468
00469
00470 std::vector<float> s2;
00471 if (statesPerDim[0] > 0){
00472 s2 = discretizeState(s);
00473 } else {
00474 s2 = s;
00475 }
00476
00477
00478 const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00479 statespace.insert(s2);
00480 state_t retval = &*result.first;
00481
00482
00483
00484
00485
00486 if (result.second) {
00487 initNewState(retval);
00488 if (PLANNERDEBUG) {
00489 cout << " New state initialized "
00490 << " orig:(" << s[0] << "," << s[1] << ")"
00491 << " disc:(" << s2[0] << "," << s2[1] << ")" << endl;
00492 }
00493 }
00494
00495
00496 return retval;
00497 }
00498
00499
00500
00501 void PO_ETUCT::initStateInfo(state_t s, state_info* info){
00502
00503
00504 info->id = nstates++;
00505 if (PLANNERDEBUG){
00506 cout << " id = " << info->id;
00507 cout << ", (" << (*s)[0] << "," << (*s)[1] << ")" << endl;
00508 }
00509
00510 info->model = new StateActionInfo[numactions];
00511
00512
00513 info->Q.resize(numactions, 0);
00514 info->uctActions.resize(numactions, 1);
00515 info->uctVisits = 1;
00516 info->visited = 0;
00517 info->needsUpdate = true;
00518
00519 for (int i = 0; i < numactions; i++){
00520 info->Q[i] = rng.uniform(0,0.01);
00521 }
00522
00523
00524
00525 }
00526
00527
00528 void PO_ETUCT::printStates(){
00529
00530 for (std::set< std::vector<float> >::iterator i = statespace.begin();
00531 i != statespace.end(); i++){
00532
00533 state_t s = canonicalize(*i);
00534
00535 state_info* info = &(statedata[s]);
00536
00537 cout << "State " << info->id << ": ";
00538 for (unsigned j = 0; j < s->size(); j++){
00539 cout << (*s)[j] << ", ";
00540 }
00541 cout << endl;
00542
00543 for (int act = 0; act < numactions; act++){
00544 cout << " Q: " << info->Q[act] << endl;
00545 }
00546
00547 }
00548 }
00549
00550
00551 void PO_ETUCT::deleteInfo(state_info* info){
00552
00553 delete [] info->model;
00554
00555 }
00556
00557
00558
00559 double PO_ETUCT::getSeconds(){
00560 struct timezone tz;
00561 timeval timeT;
00562 gettimeofday(&timeT, &tz);
00563 return timeT.tv_sec + (timeT.tv_usec / 1000000.0);
00564 }
00565
00566
00567
00568 float PO_ETUCT::uctSearch(const std::vector<float> &actS, state_t discS, int depth){
00569 if (UCTDEBUG){
00570 cout << " uctSearch state ";
00571 for (unsigned i = 0; i < actS.size(); i++){
00572 cout << actS[i] << ", ";
00573 }
00574 cout << " at depth " << depth << endl;
00575 }
00576
00577 state_info* info = &(statedata[discS]);
00578
00579
00580
00581
00582
00583
00584 if (depth > MAX_DEPTH){
00585
00586 std::vector<float>::iterator maxAct =
00587 std::max_element(info->Q.begin(),
00588 info->Q.end());
00589 float maxval = *maxAct;
00590
00591 if (UCTDEBUG)
00592 cout << "Terminated after depth: " << depth
00593
00594 << " Q: " << maxval
00595 << " visited: " << info->visited << endl;
00596
00597 return maxval;
00598 }
00599
00600
00601 int action = selectUCTAction(info);
00602
00603
00604
00605 float reward = 0;
00606 bool term = false;
00607
00608 float learnRate;
00609
00610
00611
00612 learnRate = 10.0 / (info->uctActions[action] + 10.0);
00613
00614
00615
00616
00617
00618
00619 info->needsUpdate = true;
00620
00621
00622 std::vector<float> actualNext = simulateNextState(actS, discS, info, action, &reward, &term);
00623
00624
00625 if (term){
00626
00627 if (UCTDEBUG) cout << " Terminated on exploration condition" << endl;
00628 info->Q[action] += learnRate * (reward - info->Q[action]);
00629 info->uctVisits++;
00630 info->uctActions[action]++;
00631 if (UCTDEBUG)
00632 cout << " Depth: " << depth << " Selected action " << action
00633 << " r: " << reward
00634 << " StateVisits: " << info->uctVisits
00635 << " ActionVisits: " << info->uctActions[action] << endl;
00636
00637 return reward;
00638 }
00639
00640
00641 state_t discNext = canonicalize(actualNext);
00642
00643 if (UCTDEBUG)
00644 cout << " Depth: " << depth << " Selected action " << action
00645 << " r: " << reward << endl;
00646
00647 info->visited++;
00648
00649
00650 float newQ = reward + gamma * uctSearch(actualNext, discNext, depth+1);
00651
00652 if (info->visited == 1){
00653
00654
00655 info->Q[action] += learnRate * (newQ - info->Q[action]);
00656 info->uctVisits++;
00657 info->uctActions[action]++;
00658
00659 if (UCTDEBUG)
00660 cout << " Depth: " << depth << " newQ: " << newQ
00661 << " StateVisits: " << info->uctVisits
00662 << " ActionVisits: " << info->uctActions[action] << endl;
00663
00664 if (lambda < 1.0){
00665
00666
00667 std::vector<float>::iterator maxAct =
00668 std::max_element(info->Q.begin(),
00669 info->Q.end());
00670 float maxval = *maxAct;
00671
00672 if (UCTDEBUG)
00673 cout << " Replacing newQ: " << newQ;
00674
00675
00676 newQ = (lambda * newQ) + ((1.0-lambda) * maxval);
00677
00678 if (UCTDEBUG)
00679 cout << " with wAvg: " << newQ << endl;
00680 }
00681
00682 }
00683
00684 info->visited--;
00685
00686
00687 return newQ;
00688
00689 }
00690
00691
00692 int PO_ETUCT::selectUCTAction(state_info* info){
00693
00694
00695 std::vector<float> &Q = info->Q;
00696
00697
00698 float rewardBound = rrange;
00699 if (rewardBound < 1.0)
00700 rewardBound = 1.0;
00701 rewardBound /= (1.0 - gamma);
00702 if (UCTDEBUG) cout << "Reward bound: " << rewardBound << endl;
00703
00704 std::vector<float> uctQ(numactions, 0.0);
00705
00706 for (int i = 0; i < numactions; i++){
00707
00708
00709 uctQ[i] = Q[i] +
00710 rewardBound * 2.0 * sqrt(log((float)info->uctVisits) /
00711 (float)info->uctActions[i]);
00712
00713 if (UCTDEBUG)
00714 cout << " Action: " << i << " Q: " << Q[i]
00715 << " visits: " << info->uctActions[i]
00716 << " value: " << uctQ[i] << endl;
00717 }
00718
00719
00720 std::vector<float>::iterator maxAct =
00721 max_element(uctQ.begin(), uctQ.end());
00722 float maxval = *maxAct;
00723 int act = maxAct - uctQ.begin();
00724
00725 if (UCTDEBUG)
00726 cout << " Selected " << act << " val: " << maxval << endl;
00727
00728 return act;
00729
00730 }
00731
00732
00733
00734 std::vector<float> PO_ETUCT::simulateNextState(const std::vector<float> &actualState, state_t discState, state_info* info, int action, float* reward, bool* term){
00735
00736 StateActionInfo* modelInfo = &(info->model[action]);
00737 bool upToDate = modelInfo->frameUpdated >= lastUpdate;
00738
00739 if (!upToDate){
00740
00741 updateStateActionHistoryFromModel(*discState, action, modelInfo);
00742
00743 }
00744
00745
00746 *reward = modelInfo->reward;
00747 *term = (rng.uniform() < modelInfo->termProb);
00748
00749 if (*term){
00750 return actualState;
00751 }
00752
00753 float randProb = rng.uniform();
00754
00755 float probSum = 0.0;
00756 std::vector<float> nextstate;
00757
00758 if (REALSTATEDEBUG) cout << "randProb: " << randProb << " numNext: " << modelInfo->transitionProbs.size() << endl;
00759
00760 if (modelInfo->transitionProbs.size() == 0)
00761 nextstate = actualState;
00762
00763 for (std::map<std::vector<float>, float>::iterator outIt
00764 = modelInfo->transitionProbs.begin();
00765 outIt != modelInfo->transitionProbs.end(); outIt++){
00766
00767 float prob = (*outIt).second;
00768 probSum += prob;
00769 if (REALSTATEDEBUG) cout << randProb << ", " << probSum << ", " << prob << endl;
00770
00771 if (randProb <= probSum){
00772 nextstate = (*outIt).first;
00773 if (REALSTATEDEBUG) cout << "selected state " << randProb << ", " << probSum << ", " << prob << endl;
00774 break;
00775 }
00776 }
00777
00778 if (trackActual){
00779
00780
00781 std::vector<float> relChange = subVec(nextstate, *discState);
00782
00783
00784 nextstate = addVec(actualState, relChange);
00785
00786
00787 }
00788
00789 if (UCTDEBUG){
00790 cout << "initial prediction: ";
00791 for (unsigned i = 0; i < nextstate.size(); i++){
00792 cout << nextstate[i] << ", ";
00793 }
00794 cout << endl;
00795 }
00796
00797
00798 for (unsigned j = 0; j < featmax.size(); j++){
00799 if (nextstate[j] < (featmin[j]-EPSILON)
00800 || nextstate[j] > (featmax[j]+EPSILON)){
00801
00802 if (HISTORY_SIZE == 0) return actualState;
00803
00804
00805 std::vector<float> modState = actualState;
00806 int stateOnlySize = modState.size()-HISTORY_FL_SIZE;
00807 for (int i = stateOnlySize; i < (int)modState.size(); i++){
00808 if (action == (i - stateOnlySize))
00809 modState[i] = 1;
00810 else
00811 modState[i] = 0;
00812 }
00813 return modState;
00814 }
00815 }
00816
00817 if (UCTDEBUG || HISTORYDEBUG){
00818 cout << "predicted next state: ";
00819 for (unsigned i = 0; i < nextstate.size(); i++){
00820 cout << nextstate[i] << ", ";
00821 }
00822 cout << endl;
00823 }
00824
00825
00826 return nextstate;
00827
00828 }
00829
00830
00831 void PO_ETUCT::savePolicy(const char* filename){
00832
00833 ofstream policyFile(filename, ios::out | ios::binary | ios::trunc);
00834
00835
00836 int fsize = featmin.size();
00837 policyFile.write((char*)&fsize, sizeof(int));
00838
00839
00840 policyFile.write((char*)&numactions, sizeof(int));
00841
00842
00843 for (std::set< std::vector<float> >::iterator i = statespace.begin();
00844 i != statespace.end(); i++){
00845
00846 state_t s = canonicalize(*i);
00847 state_info* info = &(statedata[s]);
00848
00849
00850 policyFile.write((char*)&((*i)[0]), sizeof(float)*fsize);
00851
00852
00853 policyFile.write((char*)&(info->Q[0]), sizeof(float)*numactions);
00854
00855 }
00856
00857 policyFile.close();
00858 }
00859
00860 void PO_ETUCT::logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax){
00861 std::vector<float> state(2, 0.0);
00862 for (int i = xmin ; i < xmax; i++){
00863 for (int j = ymin; j < ymax; j++){
00864 state[0] = j;
00865 state[1] = i;
00866 state_t s = canonicalize(state);
00867 state_info* info = &(statedata[s]);
00868 std::vector<float> &Q_s = info->Q;
00869 const std::vector<float>::iterator max =
00870 random_max_element(Q_s.begin(), Q_s.end());
00871 *of << (*max) << ",";
00872 }
00873 }
00874 }
00875
00876
00877
00878
00879 std::vector<float> PO_ETUCT::discretizeState(const std::vector<float> &s){
00880 std::vector<float> ds(s.size());
00881
00882 for (unsigned i = 0; i < statesPerDim.size(); i++){
00883
00884
00885
00886
00887
00888 float factor = (featmax[i] - featmin[i]) / (float)statesPerDim[i];
00889 int bin = 0;
00890 if (s[i] > 0){
00891 bin = (int)((s[i]+factor/2) / factor);
00892 } else {
00893 bin = (int)((s[i]-factor/2) / factor);
00894 }
00895
00896 ds[i] = factor*bin;
00897
00898
00899 }
00900 for (unsigned i = statesPerDim.size(); i < s.size(); i++){
00901 ds[i] = s[i];
00902 }
00903
00904 return ds;
00905 }
00906
00907 std::vector<float> PO_ETUCT::addVec(const std::vector<float> &a, const std::vector<float> &b){
00908 if (a.size() != b.size())
00909 cout << "ERROR: add vector sizes wrong " << a.size() << ", " << b.size() << endl;
00910
00911 std::vector<float> c(a.size(), 0.0);
00912 for (unsigned i = 0; i < a.size(); i++){
00913 c[i] = a[i] + b[i];
00914 }
00915
00916 return c;
00917 }
00918
00919 std::vector<float> PO_ETUCT::subVec(const std::vector<float> &a, const std::vector<float> &b){
00920 if (a.size() != b.size())
00921 cout << "ERROR: sub vector sizes wrong " << a.size() << ", " << b.size() << endl;
00922
00923 std::vector<float> c(a.size(), 0.0);
00924 for (unsigned i = 0; i < a.size(); i++){
00925 c[i] = a[i] - b[i];
00926 }
00927
00928 return c;
00929 }
00930
00931
00932 void PO_ETUCT::setFirst(){
00933 if (HISTORY_SIZE == 0) return;
00934
00935 if (HISTORYDEBUG) cout << "first action, set sahistory to 0s" << endl;
00936
00937
00938 saHistory.resize(saHistory.size(), 0.0);
00939 }
00940
00941 void PO_ETUCT::setSeeding(bool seeding){
00942
00943 if (HISTORYDEBUG) cout << "set seed mode to " << seeding << endl;
00944 seedMode = seeding;
00945
00946 }