00001
00011 #include "ParallelETUCT.hh"
00012 #include <algorithm>
00013
00014 #include <sys/time.h>
00015
00016
00017 ParallelETUCT::ParallelETUCT(int numactions, float gamma, float rrange, float lambda,
00018 int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType,
00019 const std::vector<float> &fmax, const std::vector<float> &fmin,
00020 const std::vector<int> &nstatesPerDim, bool trackActual, int historySize, Random r):
00021 numactions(numactions), gamma(gamma), rrange(rrange), lambda(lambda),
00022 MAX_ITER(MAX_ITER), MAX_TIME(MAX_TIME),
00023 MAX_DEPTH(MAX_DEPTH), modelType(modelType), statesPerDim(nstatesPerDim),
00024 trackActual(trackActual), HISTORY_SIZE(historySize),
00025 HISTORY_FL_SIZE(historySize*numactions),
00026 CLEAR_SIZE(25)
00027 {
00028 rng = r;
00029
00030 nstates = 0;
00031 nsaved = 0;
00032 nactions = 0;
00033 lastUpdate = -1;
00034
00035 seedMode = false;
00036 timingType = true;
00037
00038 previnfo = NULL;
00039 model = NULL;
00040 planTime = getSeconds();
00041 initTime = getSeconds();
00042 setTime = getSeconds();
00043
00044 PLANNERDEBUG = false;
00045 POLICYDEBUG = false;
00046 ACTDEBUG = false;
00047 MODELDEBUG = false;
00048 UCTDEBUG = false;
00049 PTHREADDEBUG = false;
00050 ATHREADDEBUG = false;
00051 MTHREADDEBUG = false;
00052 TIMINGDEBUG = false;
00053 REALSTATEDEBUG = false;
00054 HISTORYDEBUG = false;
00055
00056 if (statesPerDim[0] > 0){
00057 cout << "Planner Parallel ETUCT using discretization of " << statesPerDim[0] << endl;
00058 }
00059 if (trackActual){
00060 cout << "Parallel ETUCT tracking real state values" << endl;
00061 }
00062 cout << "Planner using history size: " << HISTORY_SIZE << endl;
00063
00064 featmax = fmax;
00065 featmin = fmin;
00066
00067 pthread_mutex_init(&update_mutex, NULL);
00068 pthread_mutex_init(&history_mutex, NULL);
00069 pthread_mutex_init(&nactions_mutex, NULL);
00070 pthread_mutex_init(&plan_state_mutex, NULL);
00071 pthread_mutex_init(&statespace_mutex, NULL);
00072 pthread_mutex_init(&model_mutex, NULL);
00073 pthread_mutex_init(&list_mutex, NULL);
00074 pthread_cond_init(&list_cond, NULL);
00075
00076
00077 actualPlanState = std::vector<float>(featmax.size());
00078 discPlanState = NULL;
00079 modelThreadStarted = false;
00080 planThreadStarted = false;
00081 expList.clear();
00082
00083 if (HISTORY_SIZE == 0){
00084 saHistory.push_back(0.0);
00085 }
00086 else {
00087 if (HISTORYDEBUG) {
00088 cout << "History size of " << HISTORY_SIZE
00089 << " float size of " << HISTORY_FL_SIZE
00090 << " with state size: " << fmin.size()
00091 << " and numact: " << numactions << endl;
00092 }
00093 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00094 saHistory.push_back(0.0);
00095 }
00096 }
00097
00098
00099 expfile.initFile("experiences.bin", featmax.size());
00100 }
00101
00102 ParallelETUCT::~ParallelETUCT() {
00103
00104
00105
00106
00107
00108 pthread_detach(planThread);
00109 pthread_detach(modelThread);
00110
00111 pthread_cancel(planThread);
00112 pthread_cancel(modelThread);
00113
00114
00115
00116
00117
00118
00119
00120
00121 pthread_mutex_lock(&plan_state_mutex);
00122 pthread_mutex_lock(&statespace_mutex);
00123 pthread_mutex_lock(&model_mutex);
00124 pthread_mutex_lock(&list_mutex);
00125
00126
00127 expList.clear();
00128
00129 for (std::map<state_t, state_info>::iterator i = statedata.begin();
00130 i != statedata.end(); i++){
00131
00132
00133
00134 state_info* info = &((*i).second);
00135
00136 deleteInfo(info);
00137 }
00138
00139 featmax.clear();
00140 featmin.clear();
00141
00142 statespace.clear();
00143 statedata.clear();
00144
00145 pthread_mutex_unlock(&plan_state_mutex);
00146 pthread_mutex_unlock(&statespace_mutex);
00147 pthread_mutex_unlock(&model_mutex);
00148 pthread_mutex_unlock(&list_mutex);
00149
00150 }
00151
00152 void ParallelETUCT::setModel(MDPModel* m){
00153
00154 model = m;
00155
00156 }
00157
00158
00160
00162
00163
00164
00166 bool ParallelETUCT::updateModelWithExperience(const std::vector<float> &laststate,
00167 int lastact,
00168 const std::vector<float> &currstate,
00169 float reward, bool term){
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179 if (!timingType)
00180 planTime = getSeconds();
00181 initTime = getSeconds();
00182
00183
00184 state_t last = canonicalize(laststate);
00185
00186 prevstate = last;
00187 prevact = lastact;
00188
00189
00190 pthread_mutex_lock(&statespace_mutex);
00191 previnfo = &(statedata[last]);
00192 pthread_mutex_unlock(&statespace_mutex);
00193
00194 if (MODELDEBUG){
00195 cout << "Update with exp from state: ";
00196 for (unsigned i = 0; i < last->size(); i++){
00197 cout << (laststate)[i] << ", ";
00198 }
00199 cout << " action: " << lastact;
00200 cout << " to state: ";
00201 for (unsigned i = 0; i < currstate.size(); i++){
00202 cout << (currstate)[i] << ", ";
00203 }
00204 cout << " and reward: " << reward << endl;
00205 }
00206
00207
00208 if (ATHREADDEBUG)
00209 cout << "*** Action thread wants list lock ***" << endl << flush;
00210 if (TIMINGDEBUG) cout << "Want list mutex, time: " << (getSeconds()-initTime) << endl;
00211 pthread_mutex_lock(&list_mutex);
00212 if (TIMINGDEBUG) cout << "got list mutex, time: " << (getSeconds()-initTime) << endl;
00213 experience e;
00214 e.s = laststate;
00215 e.next = currstate;
00216 e.act = lastact;
00217 e.reward = reward;
00218 e.terminal = term;
00219
00220 if (HISTORY_SIZE > 0){
00221 if (HISTORYDEBUG) {
00222 cout << "Original state vector (size " << e.s.size() << ": " << e.s[0];
00223 for (unsigned i = 1; i < e.s.size(); i++){
00224 cout << "," << e.s[i];
00225 }
00226 cout << endl;
00227 }
00228
00229 pthread_mutex_lock(&history_mutex);
00230 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00231 e.s.push_back(saHistory[i]);
00232 }
00233 pthread_mutex_unlock(&history_mutex);
00234
00235 if (HISTORYDEBUG) {
00236 cout << "New state vector (size " << e.s.size() << ": " << e.s[0];
00237 for (unsigned i = 1; i < e.s.size(); i++){
00238 cout << "," << e.s[i];
00239 }
00240 cout << endl;
00241 }
00242
00243 if (!seedMode){
00244 pthread_mutex_lock(&history_mutex);
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255 for (int i = 0; i < numactions; i++){
00256 if (i == lastact)
00257 saHistory.push_back(1.0);
00258 else
00259 saHistory.push_back(0.0);
00260 saHistory.pop_front();
00261 }
00262
00263
00264
00265 if (HISTORYDEBUG) {
00266 cout << "New history vector (size " << saHistory.size() << ": " << saHistory[0];
00267 for (unsigned i = 1; i < saHistory.size(); i++){
00268 cout << "," << saHistory[i];
00269 }
00270 cout << endl;
00271 }
00272 pthread_mutex_unlock(&history_mutex);
00273 }
00274 }
00275
00276 expList.push_back(e);
00277
00278 if (ATHREADDEBUG || MTHREADDEBUG)
00279 cout << "added exp to list, size: " << expList.size() << endl << flush;
00280 if (TIMINGDEBUG) cout << "list updated, time: " << (getSeconds()-initTime) << endl;
00281 pthread_cond_signal(&list_cond);
00282 pthread_mutex_unlock(&list_mutex);
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292 if (timingType)
00293 planTime = getSeconds();
00294
00295 if (TIMINGDEBUG) cout << "leaving updateModel, time: " << (getSeconds()-initTime) << endl;
00296
00297
00298 return false;
00299
00300 }
00301
00303 void ParallelETUCT::updateStateActionFromModel(state_t s, int a, state_info* info){
00304
00305 if (HISTORY_SIZE == 0){
00306 pthread_mutex_lock(&info->statemodel_mutex);
00307
00308 std::deque<float> history(1,0.0);
00309 StateActionInfo* newModel = NULL;
00310 newModel = &(info->historyModel[a][history]);
00311
00312 updateStateActionHistoryFromModel(*s, a, newModel);
00313 pthread_mutex_unlock(&info->statemodel_mutex);
00314
00315 }
00316
00317 else {
00318 pthread_mutex_lock(&info->statemodel_mutex);
00319
00320
00321 if (info->historyModel[a].size() > CLEAR_SIZE){
00322
00323
00324
00325
00326 info->historyModel[a].clear();
00327
00328 } else {
00329
00330
00331 for (std::map< std::deque<float>, StateActionInfo>::iterator it = info->historyModel[a].begin();
00332 it != info->historyModel[a].end(); it++){
00333
00334 std::deque<float> oneHist = (*it).first;
00335 StateActionInfo* newModel = &((*it).second);
00336
00337
00338 std::vector<float> modState = *s;
00339 for (int i = 0; i < HISTORY_FL_SIZE; i++){
00340 modState.push_back(oneHist[i]);
00341 }
00342 updateStateActionHistoryFromModel(modState, a, newModel);
00343 }
00344 }
00345
00346 pthread_mutex_unlock(&info->statemodel_mutex);
00347 }
00348
00349 }
00350
00352 void ParallelETUCT::updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel){
00353
00354
00355
00356 pthread_mutex_lock(&model_mutex);
00357
00358 model->getStateActionInfo(modState, a, newModel);
00359
00360 pthread_mutex_lock(&nactions_mutex);
00361 newModel->frameUpdated = nactions;
00362 pthread_mutex_unlock(&nactions_mutex);
00363
00364 pthread_mutex_unlock(&model_mutex);
00365
00366
00367
00368 }
00369
00370 void ParallelETUCT::canonNextStates(StateActionInfo* modelInfo){
00371
00372
00373
00374 for (std::map<std::vector<float>, float>::iterator outIt
00375 = modelInfo->transitionProbs.begin();
00376 outIt != modelInfo->transitionProbs.end(); outIt++){
00377
00378 std::vector<float> nextstate = (*outIt).first;
00379 bool badState = false;
00380
00381
00382 for (unsigned j = 0; j < nextstate.size(); j++){
00383 float factor = EPSILON;
00384 if (statesPerDim[j] > 0)
00385 factor = (featmax[j] - featmin[j]) / (float)statesPerDim[j];
00386 if (nextstate[j] < (featmin[j]-factor)
00387 || nextstate[j] > (featmax[j]+factor)){
00388
00389 badState = true;
00390 break;
00391 }
00392 }
00393
00394 if (!badState){
00395
00396 canonicalize(nextstate);
00397 }
00398 }
00399 }
00400
00401
00402
00403
00405 int ParallelETUCT::getBestAction(const std::vector<float> &state){
00406
00407
00408 pthread_mutex_lock(&nactions_mutex);
00409 nactions++;
00410 pthread_mutex_unlock(&nactions_mutex);
00411
00412 if (TIMINGDEBUG) cout << "getBestAction, time: " << (getSeconds()-initTime) << endl;
00413
00414
00415 state_t s = canonicalize(state);
00416
00417
00418 if (ATHREADDEBUG)
00419 cout << "*** Action thread wants plan state lock ***" << endl << flush;
00420 if (TIMINGDEBUG) cout << "want planStateMut, time: " << (getSeconds()-initTime) << endl;
00421
00422 pthread_mutex_lock(&(plan_state_mutex));
00423 if (TIMINGDEBUG) cout << "got planStateMut, time: " << (getSeconds()-initTime) << endl;
00424
00425 actualPlanState = state;
00426 discPlanState = s;
00427 setTime = getSeconds();
00428
00429 if (ATHREADDEBUG){
00430 cout << "Set planning state as: ";
00431 for (unsigned i = 0; i < state.size(); i++){
00432 cout << state[i] << ", ";
00433 }
00434 cout << endl << flush;
00435 }
00436
00437
00438 pthread_mutex_unlock(&(plan_state_mutex));
00439 if (TIMINGDEBUG) cout << "set planState, time: " << (getSeconds()-initTime) << endl;
00440
00441
00442 pthread_mutex_lock(&statespace_mutex);
00443 state_info* info = &(statedata[s]);
00444 pthread_mutex_unlock(&statespace_mutex);
00445
00446
00447
00448
00449
00450
00451
00452 while (((getSeconds()- initTime) < MAX_TIME)){
00453 if (TIMINGDEBUG)
00454 cout << "waiting for time: " << (getSeconds()-initTime) << endl;
00455
00456 pthread_yield();
00457 }
00458
00459 if (TIMINGDEBUG) cout << "time up: " << (getSeconds()-initTime) << endl;
00460
00461 if (TIMINGDEBUG && (getSeconds()-initTime) > 0.15) cout << "**********" << endl;
00462
00463 pthread_mutex_lock(&info->stateinfo_mutex);
00464
00465
00466 std::vector<float> &Q = info->Q;
00467
00468
00469 if (ATHREADDEBUG) {
00470 if (previnfo != NULL)
00471 cout << " ... now " << previnfo->uctVisits << " times." << endl;
00472 cout << "Getting best action from state ";
00473 for (unsigned i = 0; i < s->size(); i++){
00474 cout << (*s)[i] << ", ";
00475 }
00476 cout << " sampled " << info->uctVisits << " times.";
00477 }
00478
00479
00480 const std::vector<float>::iterator a =
00481 random_max_element(Q.begin(), Q.end());
00482 int act = a - Q.begin();
00483
00484 if (TIMINGDEBUG) cout << "got action: " << (getSeconds()-initTime) << endl;
00485
00486 pthread_mutex_unlock(&info->stateinfo_mutex);
00487
00488
00489 return act;
00490 }
00491
00492
00493
00494
00495
00496
00497 void ParallelETUCT::planOnNewModel(){
00498
00499
00500
00501 if (!modelThreadStarted){
00502 modelThreadStarted = true;
00503 pthread_create(&modelThread, NULL, parallelModelLearningStart, this);
00504 }
00505
00506 if (!planThreadStarted){
00507 planThreadStarted = true;
00508 pthread_create(&(planThread), NULL, parallelSearchStart, this);
00509 }
00510
00511 }
00512
00513
00514 void* parallelModelLearningStart(void* arg){
00515 cout << "Start model learning thread" << endl << flush;
00516 ParallelETUCT* pe = reinterpret_cast<ParallelETUCT*>(arg);
00517 while(true){
00518 pe->parallelModelLearning();
00519
00520
00521
00522
00523
00524
00525 }
00526 return NULL;
00527 }
00528
00529 void ParallelETUCT::parallelModelLearning(){
00530
00531
00532
00533 pthread_mutex_lock(&list_mutex);
00534 while (expList.size() == 0){
00535 pthread_cond_wait(&list_cond,&list_mutex);
00536 }
00537 pthread_mutex_unlock(&list_mutex);
00538
00539
00540 std::vector<experience> updateList;
00541 if (MTHREADDEBUG) cout << " *** Model thread wants list lock ***" << endl << flush;
00542 pthread_mutex_lock(&list_mutex);
00543 updateList = expList;
00544 expList.clear();
00545 if (MTHREADDEBUG) cout << " *** Model thread done with list lock ***" << endl << flush;
00546 pthread_mutex_unlock(&list_mutex);
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559 modelcopy = model->getCopy();
00560
00561
00562
00563 bool modelChanged = modelcopy->updateWithExperiences(updateList);
00564
00565
00566 pthread_mutex_lock(&model_mutex);
00567
00568
00569 delete model;
00570 model = modelcopy;
00571 if (MTHREADDEBUG) cout << " Model updated" << endl << flush;
00572
00573 pthread_mutex_unlock(&model_mutex);
00574
00575
00576
00577
00578 if (modelChanged) resetAndUpdateStateActions();
00579
00580 pthread_yield();
00581
00582
00583 }
00584
00585
00586
00587
00588 void ParallelETUCT::resetAndUpdateStateActions(){
00589
00590 const int MIN_VISITS = 10;
00591
00592 pthread_mutex_lock(&nactions_mutex);
00593 int updateTime = nactions;
00594 pthread_mutex_unlock(&nactions_mutex);
00595
00596
00597
00598 pthread_mutex_lock(&statespace_mutex);
00599
00600 for (std::set<std::vector<float> >::iterator i = statespace.begin();
00601 i != statespace.end(); i++){
00602 pthread_mutex_unlock(&statespace_mutex);
00603
00604 state_t s = canonicalize(*i);
00605
00606 if (MTHREADDEBUG) cout << " *** Model thread wants search lock ***" << endl;
00607
00608 if (MTHREADDEBUG) cout << " *** Model thread got search lock " << endl;
00609
00610 pthread_mutex_lock(&statespace_mutex);
00611 state_info* info = &(statedata[s]);
00612 pthread_mutex_unlock(&statespace_mutex);
00613
00614 pthread_mutex_lock(&info->stateinfo_mutex);
00615
00616 if (info->uctVisits > (MIN_VISITS * numactions))
00617 info->uctVisits = MIN_VISITS * numactions;
00618
00619 for (int j = 0; j < numactions; j++){
00620 if (info->uctActions[j] > MIN_VISITS)
00621 info->uctActions[j] = MIN_VISITS;
00622 if (info->needsUpdate || info->historyModel[j].size() > CLEAR_SIZE){
00623 updateStateActionFromModel(s, j, info);
00624 }
00625 }
00626 info->needsUpdate = false;
00627 pthread_mutex_unlock(&info->stateinfo_mutex);
00628
00629 pthread_yield();
00630
00631 pthread_mutex_lock(&statespace_mutex);
00632
00633 }
00634 pthread_mutex_unlock(&statespace_mutex);
00635
00636 pthread_mutex_lock(&update_mutex);
00637 lastUpdate = updateTime;
00638 pthread_mutex_unlock(&update_mutex);
00639
00640 }
00641
00642
00643
00644
00646
00648
00649 ParallelETUCT::state_t ParallelETUCT::canonicalize(const std::vector<float> &s) {
00650 if (PLANNERDEBUG) cout << "canonicalize(s = " << s[0] << ", "
00651 << s[1] << ")" << endl;
00652
00653
00654 std::vector<float> s2;
00655 if (statesPerDim[0] > 0){
00656 s2 = discretizeState(s);
00657 } else {
00658 s2 = s;
00659 }
00660
00661 pthread_mutex_lock(&statespace_mutex);
00662
00663
00664 const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00665 statespace.insert(s2);
00666 state_t retval = &*result.first;
00667
00668
00669
00670 if (result.second) {
00671 state_info* info = &(statedata[retval]);
00672 int id = nstates++;
00673 pthread_mutex_unlock(&statespace_mutex);
00674 initStateInfo(retval, info, id);
00675 } else {
00676 pthread_mutex_unlock(&statespace_mutex);
00677 }
00678
00679 return retval;
00680 }
00681
00682
00683
00684 void ParallelETUCT::initStateInfo(state_t s, state_info* info, int id){
00685
00686
00687
00688 pthread_mutex_init(&info->statemodel_mutex, NULL);
00689 pthread_mutex_init(&info->stateinfo_mutex, NULL);
00690
00691 pthread_mutex_lock(&info->stateinfo_mutex);
00692
00693
00694
00695 pthread_mutex_lock(&info->statemodel_mutex);
00696 info->historyModel = new std::map< std::deque<float>, StateActionInfo>[numactions];
00697 pthread_mutex_unlock(&info->statemodel_mutex);
00698
00699 info->id = id;
00700 if (PLANNERDEBUG) cout << " id = " << info->id << endl;
00701
00702
00703 info->Q.resize(numactions, 0);
00704 info->uctActions.resize(numactions, 1);
00705 info->uctVisits = 1;
00706 info->visited = 0;
00707
00708 for (int i = 0; i < numactions; i++){
00709 info->Q[i] = rng.uniform(0,0.01);
00710 }
00711
00712 info->needsUpdate = true;
00713
00714 pthread_mutex_unlock(&info->stateinfo_mutex);
00715
00716
00717
00718 }
00719
00720
00722 void ParallelETUCT::printStates(){
00723
00724 pthread_mutex_lock(&statespace_mutex);
00725 for (std::set< std::vector<float> >::iterator i = statespace.begin();
00726 i != statespace.end(); i++){
00727 pthread_mutex_unlock(&statespace_mutex);
00728
00729 state_t s = canonicalize(*i);
00730
00731 pthread_mutex_lock(&statespace_mutex);
00732 state_info* info = &(statedata[s]);
00733 pthread_mutex_unlock(&statespace_mutex);
00734
00735 cout << "State " << info->id << ": ";
00736 for (unsigned j = 0; j < s->size(); j++){
00737 cout << (*s)[j] << ", ";
00738 }
00739 cout << endl;
00740
00741 pthread_mutex_lock(&info->stateinfo_mutex);
00742
00743 for (int act = 0; act < numactions; act++){
00744 cout << " Q: " << info->Q[act] << endl;
00745
00746 }
00747
00748 pthread_mutex_unlock(&info->stateinfo_mutex);
00749
00750 pthread_mutex_lock(&statespace_mutex);
00751
00752 }
00753 pthread_mutex_unlock(&statespace_mutex);
00754
00755 }
00756
00757
00758 void ParallelETUCT::deleteInfo(state_info* info){
00759
00760 delete [] info->historyModel;
00761
00762 }
00763
00764
00765
00766 double ParallelETUCT::getSeconds(){
00767 struct timezone tz;
00768 timeval timeT;
00769 gettimeofday(&timeT, &tz);
00770 return timeT.tv_sec + (timeT.tv_usec / 1000000.0);
00771 }
00772
00773
00785 float ParallelETUCT::uctSearch(const std::vector<float> &actS, state_t discS, int depth, std::deque<float> &searchHistory){
00786 if (UCTDEBUG){
00787 cout << " uctSearch state ";
00788 for (unsigned i = 0; i < actS.size(); i++){
00789 cout << actS[i] << ", ";
00790 }
00791 cout << " at depth " << depth << endl;
00792 }
00793
00794 pthread_mutex_lock(&statespace_mutex);
00795 state_info* info = &(statedata[discS]);
00796 pthread_mutex_unlock(&statespace_mutex);
00797
00798
00799
00800
00801
00802
00803 if (depth > MAX_DEPTH){
00804 pthread_mutex_lock(&info->stateinfo_mutex);
00805
00806
00807 std::vector<float>::iterator maxAct =
00808 std::max_element(info->Q.begin(),
00809 info->Q.end());
00810 float maxval = *maxAct;
00811
00812 if (UCTDEBUG)
00813 cout << "Terminated after depth: " << depth
00814
00815 << " Q: " << maxval
00816 << " visited: " << info->visited << endl;
00817
00818 pthread_mutex_unlock(&info->stateinfo_mutex);
00819
00820 return maxval;
00821 }
00822
00823
00824 int action = selectUCTAction(info);
00825
00826
00827
00828 float reward = 0;
00829 bool term = false;
00830
00831 pthread_mutex_lock(&info->stateinfo_mutex);
00832
00833 float learnRate;
00834
00835
00836
00837 learnRate = 10.0 / (info->uctActions[action] + 10.0);
00838
00839
00840
00841
00842
00843
00844 info->needsUpdate = true;
00845
00846 pthread_mutex_unlock(&info->stateinfo_mutex);
00847
00848 std::vector<float> actualNext = simulateNextState(actS, discS, info, searchHistory, action, &reward, &term);
00849
00850
00851 if (term){
00852
00853 if (UCTDEBUG) cout << " Terminated on exploration condition" << endl;
00854 pthread_mutex_lock(&info->stateinfo_mutex);
00855
00856 info->Q[action] += learnRate * (reward - info->Q[action]);
00857 info->uctVisits++;
00858 info->uctActions[action]++;
00859
00860 if (UCTDEBUG)
00861 cout << " Depth: " << depth << " Selected action " << action
00862 << " r: " << reward
00863 << " StateVisits: " << info->uctVisits
00864 << " ActionVisits: " << info->uctActions[action] << endl;
00865
00866 pthread_mutex_unlock(&info->stateinfo_mutex);
00867
00868 return reward;
00869 }
00870
00871
00872 state_t discNext = canonicalize(actualNext);
00873
00874 if (UCTDEBUG)
00875 cout << " Depth: " << depth << " Selected action " << action
00876 << " r: " << reward << endl;
00877
00878 pthread_mutex_lock(&info->stateinfo_mutex);
00879 info->visited++;
00880 pthread_mutex_unlock(&info->stateinfo_mutex);
00881
00882 if (HISTORY_SIZE > 0){
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893 for (int i = 0; i < numactions; i++){
00894 if (i == action)
00895 searchHistory.push_back(1.0);
00896 else
00897 searchHistory.push_back(0.0);
00898 searchHistory.pop_front();
00899 }
00900
00901
00902
00903 if (HISTORYDEBUG) {
00904 cout << "New planning history vector (size " << searchHistory.size() << ": " << searchHistory[0];
00905 for (unsigned i = 1; i < searchHistory.size(); i++){
00906 cout << "," << searchHistory[i];
00907 }
00908 cout << endl;
00909 }
00910 }
00911
00912
00913 float newQ = reward + gamma * uctSearch(actualNext, discNext, depth+1, searchHistory);
00914
00915 pthread_mutex_lock(&info->stateinfo_mutex);
00916
00917 if (info->visited == 1){
00918
00919
00920 info->Q[action] += learnRate * (newQ - info->Q[action]);
00921 info->uctVisits++;
00922 info->uctActions[action]++;
00923
00924 if (UCTDEBUG)
00925 cout << " Depth: " << depth << " newQ: " << newQ
00926 << " StateVisits: " << info->uctVisits
00927 << " ActionVisits: " << info->uctActions[action] << endl;
00928
00929 if (lambda < 1.0){
00930
00931
00932 std::vector<float>::iterator maxAct =
00933 std::max_element(info->Q.begin(),
00934 info->Q.end());
00935 float maxval = *maxAct;
00936
00937 if (UCTDEBUG)
00938 cout << " Replacing newQ: " << newQ;
00939
00940
00941 newQ = (lambda * newQ) + ((1.0-lambda) * maxval);
00942
00943 if (UCTDEBUG)
00944 cout << " with wAvg: " << newQ << endl;
00945 }
00946
00947 }
00948
00949 info->visited--;
00950 pthread_mutex_unlock(&info->stateinfo_mutex);
00951
00952
00953 return newQ;
00954
00955 }
00956
00957
00958 int ParallelETUCT::selectUCTAction(state_info* info){
00959
00960
00961 pthread_mutex_lock(&info->stateinfo_mutex);
00962
00963 std::vector<float> &Q = info->Q;
00964
00965 if (info->uctActions.size() < (unsigned)numactions){
00966 cout << "ERROR: uctActions has size " << info->uctActions.size() << endl << flush;
00967 info->uctActions.resize(numactions);
00968 }
00969
00970
00971 float rewardBound = rrange;
00972 if (rewardBound < 1.0)
00973 rewardBound = 1.0;
00974 rewardBound /= (1.0 - gamma);
00975 if (UCTDEBUG) cout << "Reward bound: " << rewardBound << endl;
00976
00977 std::vector<float> uctQ(numactions, 0.0);
00978
00979 for (int i = 0; i < numactions; i++){
00980
00981
00982 uctQ[i] = Q[i] +
00983 rewardBound * 2.0 * sqrt(log((float)info->uctVisits) /
00984 (float)info->uctActions[i]);
00985
00986 if (UCTDEBUG)
00987 cout << " Action: " << i << " Q: " << Q[i]
00988 << " visits: " << info->uctActions[i]
00989 << " value: " << uctQ[i] << endl;
00990 }
00991
00992
00993 std::vector<float>::iterator maxAct =
00994 max_element(uctQ.begin(), uctQ.end());
00995 float maxval = *maxAct;
00996 int act = maxAct - uctQ.begin();
00997
00998 if (UCTDEBUG)
00999 cout << " Selected " << act << " val: " << maxval << endl;
01000
01001 pthread_mutex_unlock(&info->stateinfo_mutex);
01002
01003 return act;
01004
01005 }
01006
01008 std::vector<float> ParallelETUCT::simulateNextState(const std::vector<float> &actualState, state_t discState, state_info* info, const std::deque<float> &history, int action, float* reward, bool* term){
01009
01010
01011
01012
01013 pthread_mutex_lock(&info->statemodel_mutex);
01014 StateActionInfo* modelInfo = NULL;
01015 modelInfo = &(info->historyModel[action][history]);
01016
01017 pthread_mutex_lock(&update_mutex);
01018 bool upToDate = modelInfo->frameUpdated >= lastUpdate;
01019 pthread_mutex_unlock(&update_mutex);
01020
01021 if (!upToDate){
01022
01023 if (HISTORY_SIZE > 0){
01024 std::vector<float> modState = *discState;
01025 for (int i = 0; i < HISTORY_FL_SIZE; i++){
01026 modState.push_back(history[i]);
01027 }
01028 updateStateActionHistoryFromModel(modState, action, modelInfo);
01029 } else {
01030 updateStateActionHistoryFromModel(*discState, action, modelInfo);
01031 }
01032 }
01033
01034 *reward = modelInfo->reward;
01035 *term = (rng.uniform() < modelInfo->termProb);
01036
01037 if (*term){
01038 pthread_mutex_unlock(&info->statemodel_mutex);
01039 return actualState;
01040 }
01041
01042 float randProb = rng.uniform();
01043
01044 float probSum = 0.0;
01045 std::vector<float> nextstate;
01046
01047 if (REALSTATEDEBUG) cout << "randProb: " << randProb << " numNext: " << modelInfo->transitionProbs.size() << endl;
01048
01049
01050 nextstate = actualState;
01051
01052 for (std::map<std::vector<float>, float>::iterator outIt
01053 = modelInfo->transitionProbs.begin();
01054 outIt != modelInfo->transitionProbs.end(); outIt++){
01055
01056 float prob = (*outIt).second;
01057 probSum += prob;
01058 if (REALSTATEDEBUG) cout << randProb << ", " << probSum << ", " << prob << endl;
01059
01060 if (randProb <= probSum){
01061 nextstate = (*outIt).first;
01062 if (REALSTATEDEBUG) cout << "selected state " << randProb << ", " << probSum << ", " << prob << endl;
01063 break;
01064 }
01065 }
01066
01067 pthread_mutex_unlock(&info->statemodel_mutex);
01068
01069 if (trackActual){
01070
01071
01072 std::vector<float> relChange = subVec(nextstate, *discState);
01073
01074
01075 nextstate = addVec(actualState, relChange);
01076
01077 }
01078
01079
01080 for (unsigned j = 0; j < nextstate.size(); j++){
01081 float factor = EPSILON;
01082 if (statesPerDim[j] > 0)
01083 factor = (featmax[j] - featmin[j]) / (float)statesPerDim[j];
01084 if (nextstate[j] < (featmin[j]-factor)
01085 || nextstate[j] > (featmax[j]+factor)){
01086 return actualState;
01087 }
01088 }
01089
01090
01091 return nextstate;
01092
01093 }
01094
01095 std::vector<float> ParallelETUCT::selectRandomState(){
01096
01097 pthread_mutex_lock(&statespace_mutex);
01098 if (statespace.size() == 0){
01099 pthread_mutex_unlock(&statespace_mutex);
01100 return std::vector<float>(featmax.size());
01101 }
01102 pthread_mutex_unlock(&statespace_mutex);
01103
01104
01105 int index = 0;
01106 std::vector<float> state;
01107
01108 pthread_mutex_lock(&statespace_mutex);
01109 if (statespace.size() > 1){
01110 index = rng.uniformDiscrete(0, statespace.size()-1);
01111 }
01112 pthread_mutex_unlock(&statespace_mutex);
01113
01114 int cnt = 0;
01115
01116 if (PTHREADDEBUG) cout << "*** Planning thread wants search lock (randomstate) ***" << endl << flush;
01117
01118 pthread_mutex_lock(&statespace_mutex);
01119 for (std::set<std::vector<float> >::iterator i = statespace.begin();
01120 i != statespace.end(); i++, cnt++){
01121 if (cnt == index){
01122 state = *i;
01123 break;
01124 }
01125 }
01126 pthread_mutex_unlock(&statespace_mutex);
01127
01128 return state;
01129 }
01130
01131
01132 void* parallelSearchStart(void* arg){
01133 ParallelETUCT* pe = reinterpret_cast<ParallelETUCT*>(arg);
01134
01135 cout << "start parallel uct planning search thread" << endl << flush;
01136
01137 while(true){
01138 pe->parallelSearch();
01139 }
01140
01141 return NULL;
01142 }
01143
01144 void ParallelETUCT::parallelSearch(){
01145
01146 std::vector<float> actS;
01147 state_t discS;
01148 std::deque<float> searchHistory;
01149
01150
01151 if (PTHREADDEBUG) {
01152 cout << "*** Planning thread wants planning state lock ***" << endl << flush;
01153 }
01154 pthread_mutex_lock(&(plan_state_mutex));
01155 if (HISTORY_SIZE > 0) pthread_mutex_lock(&history_mutex);
01156
01157
01158 actS = actualPlanState;
01159 discS = discPlanState;
01160 searchHistory = saHistory;
01161
01162
01163
01164 if (discS == NULL){
01165 pthread_mutex_unlock(&(plan_state_mutex));
01166 if (HISTORY_SIZE > 0) pthread_mutex_unlock(&history_mutex);
01167 return;
01168 }
01169
01170 if (PTHREADDEBUG){
01171 pthread_mutex_lock(&statespace_mutex);
01172 cout << " uct search from state s ("
01173 << statedata[discS].uctVisits <<"): ";
01174 pthread_mutex_unlock(&statespace_mutex);
01175
01176 for (unsigned i = 0; i < discS->size(); i++){
01177 cout << (*discS)[i] << ", ";
01178 }
01179 cout << endl << flush;
01180 }
01181
01182
01183 pthread_mutex_unlock(&(plan_state_mutex));
01184 if (HISTORY_SIZE > 0) pthread_mutex_unlock(&history_mutex);
01185
01186 if (PTHREADDEBUG) cout << "*** Planning thread wants search lock ***" << endl;
01187 uctSearch(actS, discS, 0, searchHistory);
01188
01189 pthread_yield();
01190
01191 }
01192
01193
01194
01195 void ParallelETUCT::initStates(){
01196 cout << "init states" << endl;
01197 std::vector<float> s(featmin.size());
01198
01199 fillInState(s,0);
01200 }
01201
01202 void ParallelETUCT::fillInState(std::vector<float>s, int depth){
01203
01204
01205 if (depth == (int)featmin.size()){
01206 canonicalize(s);
01207 return;
01208 }
01209
01210
01211 for (float i = featmin[depth]; i < featmax[depth]+1; i++){
01212 s[depth] = i;
01213 fillInState(s, depth+1);
01214 }
01215 }
01216
01217
01218
01219 void ParallelETUCT::savePolicy(const char* filename){
01220
01221 ofstream policyFile(filename, ios::out | ios::binary | ios::trunc);
01222
01223
01224 int fsize = featmin.size();
01225 policyFile.write((char*)&fsize, sizeof(int));
01226
01227
01228 policyFile.write((char*)&numactions, sizeof(int));
01229
01230
01231 pthread_mutex_lock(&statespace_mutex);
01232
01233 for (std::set< std::vector<float> >::iterator i = statespace.begin();
01234 i != statespace.end(); i++){
01235 pthread_mutex_unlock(&statespace_mutex);
01236
01237 state_t s = canonicalize(*i);
01238
01239 pthread_mutex_lock(&statespace_mutex);
01240 state_info* info = &(statedata[s]);
01241 pthread_mutex_unlock(&statespace_mutex);
01242
01243
01244 policyFile.write((char*)&((*i)[0]), sizeof(float)*fsize);
01245
01246
01247 pthread_mutex_lock(&info->stateinfo_mutex);
01248 policyFile.write((char*)&(info->Q[0]), sizeof(float)*numactions);
01249 pthread_mutex_unlock(&info->stateinfo_mutex);
01250
01251 pthread_mutex_lock(&statespace_mutex);
01252 }
01253 pthread_mutex_unlock(&statespace_mutex);
01254
01255 policyFile.close();
01256 }
01257
01258
01259
01260 void ParallelETUCT::loadPolicy(const char* filename){
01261
01262 ifstream policyFile(filename, ios::in | ios::binary);
01263
01264
01265 int fsize;
01266 policyFile.read((char*)&fsize, sizeof(int));
01267 cout << "Numfeats loaded: " << fsize << endl << flush;
01268
01269
01270 int nact;
01271 policyFile.read((char*)&nact, sizeof(int));
01272 cout << "nact loaded: " << nact << endl << flush;
01273 cout << " numactions: " << numactions << endl << flush;
01274
01275 if (nact != numactions){
01276 cout << "this policy is not valid loaded nact: " << nact
01277 << " was told: " << numactions << endl << flush;
01278 exit(-1);
01279 }
01280
01281
01282 while(!policyFile.eof()){
01283 std::vector<float> state(fsize, 0.0);
01284
01285
01286 policyFile.read((char*)&(state[0]), sizeof(float)*fsize);
01287
01288
01289
01290
01291
01292 state_t s = canonicalize(state);
01293
01294 pthread_mutex_lock(&statespace_mutex);
01295 state_info* info = &(statedata[s]);
01296 pthread_mutex_unlock(&statespace_mutex);
01297
01298 if (policyFile.eof()) break;
01299
01300
01301 pthread_mutex_lock(&info->stateinfo_mutex);
01302
01303 policyFile.read((char*)&(info->Q[0]), sizeof(float)*numactions);
01304
01305 info->uctVisits = numactions * 100;
01306
01307 for (int j = 0; j < numactions; j++){
01308 info->uctActions[j] = 100;
01309 }
01310
01311 info->needsUpdate = true;
01312
01313 pthread_mutex_unlock(&info->stateinfo_mutex);
01314
01315
01316
01317
01318
01319
01320
01321 }
01322
01323 policyFile.close();
01324 cout << "Policy loaded!!!" << endl << flush;
01325 }
01326
01327 void ParallelETUCT::logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax){
01328 std::vector<float> state(2, 0.0);
01329 for (int i = xmin ; i < xmax; i++){
01330 for (int j = ymin; j < ymax; j++){
01331 state[0] = j;
01332 state[1] = i;
01333 state_t s = canonicalize(state);
01334
01335 pthread_mutex_lock(&statespace_mutex);
01336 state_info* info = &(statedata[s]);
01337 pthread_mutex_unlock(&statespace_mutex);
01338
01339 pthread_mutex_lock(&info->stateinfo_mutex);
01340
01341 std::vector<float> &Q_s = info->Q;
01342 const std::vector<float>::iterator max =
01343 random_max_element(Q_s.begin(), Q_s.end());
01344 *of << (*max) << ",";
01345
01346 pthread_mutex_unlock(&info->stateinfo_mutex);
01347
01348 }
01349 }
01350 }
01351
01352
01353
01354
01355 std::vector<float> ParallelETUCT::discretizeState(const std::vector<float> &s){
01356 std::vector<float> ds(s.size());
01357
01358 for (unsigned i = 0; i < s.size(); i++){
01359
01360
01361
01362
01363
01364 float factor = (featmax[i] - featmin[i]) / (float)statesPerDim[i];
01365 int bin = 0;
01366 if (s[i] > 0){
01367 bin = (int)((s[i]+factor/2) / factor);
01368 } else {
01369 bin = (int)((s[i]-factor/2) / factor);
01370 }
01371
01372 ds[i] = factor*bin;
01373
01374
01375 }
01376
01377 return ds;
01378 }
01379
01380
01381 std::vector<float> ParallelETUCT::addVec(const std::vector<float> &a, const std::vector<float> &b){
01382 if (a.size() != b.size())
01383 cout << "ERROR: add vector sizes wrong " << a.size() << ", " << b.size() << endl;
01384
01385 std::vector<float> c(a.size(), 0.0);
01386 for (unsigned i = 0; i < a.size(); i++){
01387 c[i] = a[i] + b[i];
01388 }
01389
01390 return c;
01391 }
01392
01393 std::vector<float> ParallelETUCT::subVec(const std::vector<float> &a, const std::vector<float> &b){
01394 if (a.size() != b.size())
01395 cout << "ERROR: sub vector sizes wrong " << a.size() << ", " << b.size() << endl;
01396
01397 std::vector<float> c(a.size(), 0.0);
01398 for (unsigned i = 0; i < a.size(); i++){
01399 c[i] = a[i] - b[i];
01400 }
01401
01402 return c;
01403 }
01404
01405 void ParallelETUCT::setFirst(){
01406 if (HISTORY_SIZE == 0) return;
01407
01408 if (HISTORYDEBUG) cout << "first action, set sahistory to 0s" << endl;
01409
01410 pthread_mutex_lock(&(history_mutex));
01411
01412 saHistory.resize(saHistory.size(), 0.0);
01413 pthread_mutex_unlock(&(history_mutex));
01414 }
01415
01416 void ParallelETUCT::setSeeding(bool seeding){
01417
01418 if (HISTORYDEBUG) cout << "set seed mode to " << seeding << endl;
01419 seedMode = seeding;
01420
01421 }