rl_agent: ETUCT.cc Source File

Go to the documentation of this file.
00001 
00009 #include "ETUCT.hh"
00010 #include <algorithm>
00011 
00012 #include <sys/time.h>
00013 
00014 
00015 ETUCT::ETUCT(int numactions, float gamma, float rrange, float lambda,
00016              int MAX_ITER, float MAX_TIME, int MAX_DEPTH, int modelType,
00017              const std::vector<float> &fmax, const std::vector<float> &fmin,
00018              const std::vector<int> &nstatesPerDim, bool trackActual,
00019              int historySize, Random r):
00020   numactions(numactions), gamma(gamma), rrange(rrange), lambda(lambda),
00021   MAX_ITER(MAX_ITER), MAX_TIME(MAX_TIME),
00022   MAX_DEPTH(MAX_DEPTH), modelType(modelType), statesPerDim(nstatesPerDim),
00023   trackActual(trackActual), HISTORY_SIZE(historySize),
00024   HISTORY_FL_SIZE(historySize*numactions)//fmax.size())
00025 {
00026   rng = r;
00027 
00028   nstates = 0;
00029   nactions = 0;
00030   lastUpdate = -1;
00031   seedMode = false;
00032 
00033   timingType = true;
00034 
00035   model = NULL;
00036   planTime = getSeconds();
00037 
00038   PLANNERDEBUG = false;//true;
00039   ACTDEBUG = false; //true;
00040   MODELDEBUG = false;//true;//false;
00041   UCTDEBUG = false; //true;//false;
00042   REALSTATEDEBUG = false;
00043   HISTORYDEBUG = false; //true; //false;
00044 
00045   featmax = fmax;
00046   featmin = fmin;
00047 
00048   if (statesPerDim[0] > 0){
00049     cout << "Planner ETUCT using discretization of " << statesPerDim[0] << endl;
00050   }
00051   if (trackActual){
00052     cout << "ETUCT tracking real state values" << endl;
00053   }
00054   cout << "Planner using history size: " << HISTORY_SIZE << endl;
00055 
00056   if (HISTORY_SIZE == 0){
00057     saHistory.push_back(0.0);
00058   }
00059   else {
00060     if (HISTORYDEBUG) {
00061       cout << "History size of " << HISTORY_SIZE
00062            << " float size of " << HISTORY_FL_SIZE
00063            << " with state size: " << fmin.size()
00064            << " and numact: " << numactions << endl;
00065     }
00066     for (int i = 0; i < HISTORY_FL_SIZE; i++){
00067       saHistory.push_back(0.0);
00068     }
00069   }
00070 
00071   //  initStates();
00072 
00073 }
00074 
00075 ETUCT::~ETUCT() {
00076   //cout << "planner delete" << endl;
00077   // clear all state info
00078 
00079   for (std::map<state_t, state_info>::iterator i = statedata.begin();
00080        i != statedata.end(); i++){
00081 
00082     // get state's info
00083     //cout << "  planner got info" << endl;
00084     state_info* info = &((*i).second);
00085 
00086     deleteInfo(info);
00087   }
00088 
00089   featmax.clear();
00090   featmin.clear();
00091 
00092   statespace.clear();
00093   statedata.clear();
00094   //cout << "planner done" << endl;
00095 }
00096 
00097 void ETUCT::setModel(MDPModel* m){
00098 
00099   model = m;
00100 
00101 }
00102 
00103 // canonicalize all the states so we already have them in our statespace
00104 void ETUCT::initStates(){
00105   cout << "init states" << endl;
00106   std::vector<float> s(featmin.size());
00107 
00108   fillInState(s,0);
00109 }
00110 
00111 void ETUCT::fillInState(std::vector<float>s, int depth){
00112 
00113   // if depth == size, canonicalize and return
00114   if (depth == (int)featmin.size()){
00115     canonicalize(s);
00116     return;
00117   }
00118 
00119   // go through all features at depth
00120   for (float i = featmin[depth]; i < featmax[depth]+1; i++){
00121     s[depth] = i;
00122     fillInState(s, depth+1);
00123   }
00124 }
00125 
00126 
00128 // Functional functions :) //
00130 
00131 
00132 void ETUCT::initNewState(state_t s){
00133   //if (PLANNERDEBUG) cout << "initNewState(s = " << s
00134   //     << ") size = " << s->size() << endl;
00135 
00136   // create state info and add to hash map
00137   state_info* info = &(statedata[s]);
00138   initStateInfo(s, info);
00139 
00140   // dont init any model info
00141   // we'll get it during search if we have to
00142 
00143 }
00144 
00145 bool ETUCT::updateModelWithExperience(const std::vector<float> &laststate,
00146                                       int lastact,
00147                                       const std::vector<float> &currstate,
00148                                       float reward, bool term){
00149   //  if (PLANNERDEBUG) cout << "updateModelWithExperience(last = " << &laststate
00150   //     << ", curr = " << &currstate
00151   //        << ", lastact = " << lastact
00152   //     << ", r = " << reward
00153   //     << ", term = " << term
00154   //     << ")" << endl;
00155 
00156   if (!timingType)
00157     planTime = getSeconds();
00158 
00159   // canonicalize these things
00160   state_t last = canonicalize(laststate);
00161 
00162   prevstate = last;
00163   prevact = lastact;
00164 
00165   // get state info
00166   previnfo = &(statedata[last]);
00167 
00168   // init model?
00169   if (model == NULL){
00170     cout << "ERROR IN MODEL OR MODEL SIZE" << endl;
00171     exit(-1);
00172   }
00173 
00174   if (MODELDEBUG){
00175     cout << "Update with exp from state: ";
00176     for (unsigned i = 0; i < last->size(); i++){
00177       cout << (laststate)[i] << ", ";
00178     }
00179     cout << " action: " << lastact;
00180     cout << " to state: ";
00181     for (unsigned i = 0; i < currstate.size(); i++){
00182       cout << currstate[i] << ", ";
00183     }
00184     cout << " and reward: " << reward << endl;
00185   }
00186 
00187   experience e;
00188   e.s = laststate;
00189   e.next = currstate;
00190   e.act = lastact;
00191   e.reward = reward;
00192   e.terminal = term;
00193 
00194   if (HISTORY_SIZE > 0){
00195     if (HISTORYDEBUG) {
00196       cout << "Original state vector (size " << e.s.size() << ": " << e.s[0];
00197       for (unsigned i = 1; i < e.s.size(); i++){
00198         cout << "," << e.s[i];
00199       }
00200       cout << endl;
00201     }
00202     // add history onto e.s
00203     for (int i = 0; i < HISTORY_FL_SIZE; i++){
00204       e.s.push_back(saHistory[i]);
00205     }
00206 
00207     if (HISTORYDEBUG) {
00208       cout << "New state vector (size " << e.s.size() << ": " << e.s[0];
00209       for (unsigned i = 1; i < e.s.size(); i++){
00210         cout << "," << e.s[i];
00211       }
00212       cout << endl;
00213     }
00214 
00215     if (!seedMode){
00216       // push this state and action onto the history vector
00217       /*
00218       for (unsigned i = 0; i < last->size(); i++){
00219         saHistory.push_back((*last)[i]);
00220         saHistory.pop_front();
00221       }
00222       */
00223       
00224       for (int i = 0; i < numactions; i++){
00225         if (i == lastact)
00226           saHistory.push_back(1.0);
00227         else
00228           saHistory.push_back(0.0);
00229         saHistory.pop_front();
00230       }
00231       
00232       if (HISTORYDEBUG) {
00233         cout << "New history vector (size " << saHistory.size() << ": " << saHistory[0];
00234         for (unsigned i = 1; i < saHistory.size(); i++){
00235           cout << "," << saHistory[i];
00236         }
00237         cout << endl;
00238       }
00239     }
00240   }
00241 
00242   bool modelChanged = model->updateWithExperience(e);
00243 
00244   if (timingType)
00245     planTime = getSeconds();
00246 
00247   return modelChanged;
00248 
00249 }
00250 
00251 void ETUCT::updateStateActionFromModel(state_t s, int a, state_info* info){
00252 
00253   if (HISTORY_SIZE == 0){
00254 
00255     std::deque<float> history(1,0.0);
00256     StateActionInfo* newModel = NULL;
00257     newModel = &(info->historyModel[a][history]);
00258 
00259     updateStateActionHistoryFromModel(*s, a, newModel);
00260 
00261   }
00262 
00263   else {
00264 
00265     // fill in for all histories???
00266     for (std::map< std::deque<float>, StateActionInfo>::iterator it = info->historyModel[a].begin(); it != info->historyModel[a].end(); it++){
00267 
00268       std::deque<float> oneHist = (*it).first;
00269       StateActionInfo* newModel = &((*it).second);
00270 
00271       // add history to vector
00272       std::vector<float> modState = *s;
00273       for (int i = 0; i < HISTORY_FL_SIZE; i++){
00274         modState.push_back(oneHist[i]);
00275       }
00276       updateStateActionHistoryFromModel(modState, a, newModel);
00277     }
00278 
00279   }
00280 
00281 }
00282 
00283 void ETUCT::updateStateActionHistoryFromModel(const std::vector<float> modState, int a, StateActionInfo *newModel){
00284 
00285   // update state info
00286   // get state action info for each action
00287   model->getStateActionInfo(modState, a, newModel);
00288   newModel->frameUpdated = nactions;
00289 
00290   //canonNextStates(newModel);
00291 
00292 }
00293 
00294 
00295 
00296 void ETUCT::canonNextStates(StateActionInfo* modelInfo){
00297 
00298   // loop through all next states
00299   for (std::map<std::vector<float>, float>::iterator outIt
00300          = modelInfo->transitionProbs.begin();
00301        outIt != modelInfo->transitionProbs.end(); outIt++){
00302 
00303     std::vector<float> nextstate = (*outIt).first;
00304 
00305     // check that it is valid, otherwise replace with current
00306     bool badState = false;
00307     for (unsigned j = 0; j < nextstate.size(); j++){
00308       if (nextstate[j] < (featmin[j]-EPSILON)
00309           || nextstate[j] > (featmax[j]+EPSILON)){
00310         //cout << "next state out of range " << nextstate[j] << endl;
00311         badState = true;
00312         break;
00313       }
00314     }
00315 
00316     if (!badState){
00317       canonicalize(nextstate);
00318     }
00319   }
00320 }
00321 
00322 
00323 
00324 
00325 int ETUCT::getBestAction(const std::vector<float> &state){
00326   //  if (PLANNERDEBUG) cout << "getBestAction(s = " << &state << ")" << endl;
00327 
00328   //  resetUCTCounts();
00329 
00330   state_t s = canonicalize(state);
00331 
00332   int i = 0;
00333   for (i = 0; i < MAX_ITER; i++){
00334 
00335     std::deque<float> searchHistory = saHistory;
00336     uctSearch(state, s, 0, searchHistory);
00337 
00338     // break after some max time
00339     if ((getSeconds() - planTime) > MAX_TIME){ // && i > 500){
00340       break;
00341     }
00342 
00343   }
00344   double currTime = getSeconds();
00345   if (false || UCTDEBUG){
00346     cout << "Search complete after " << (currTime-planTime) << " seconds and "
00347          << i << " iterations." << endl;
00348   }
00349 
00350   // get state info
00351   state_info* info = &(statedata[s]);
00352 
00353   // Get Q values
00354   std::vector<float> &Q = info->Q;
00355 
00356   // Choose an action
00357   const std::vector<float>::iterator a =
00358     random_max_element(Q.begin(), Q.end()); // Choose maximum
00359 
00360   int act = a - Q.begin();
00361   nactions++;
00362 
00363   if (false){
00364     cout << "State " << (*s)[0];
00365     for (unsigned i = 1; i < s->size(); i++){
00366       cout << "," << (*s)[i];
00367     }
00368     cout << ", Took action " << act << ", "
00369          << "value: " << *a << endl;
00370   }
00371 
00372   // return index of action
00373   return act;
00374 }
00375 
00376 
00377 
00378 
00379 
00380 
00381 void ETUCT::planOnNewModel(){
00382 
00383   // reset visit counts/q values
00384   resetUCTCounts();
00385 
00386   // for rmax, only s-a's prediction has changed
00387   if (modelType == RMAX){
00388     updateStateActionFromModel(prevstate, prevact, previnfo);
00389   }
00390 
00391   // for other model types, it all could change, clear all cached model predictions
00392   else {
00393 
00394     // still update flagged s-a's
00395     // then less stuff to query while planning
00396     for (std::set<std::vector<float> >::iterator i = statespace.begin();
00397          i != statespace.end(); i++){
00398       state_t s = canonicalize(*i);
00399       state_info* info = &(statedata[s]);
00400       if (info->needsUpdate){
00401         for (int j = 0; j < numactions; j++){
00402           updateStateActionFromModel(s, j, info);
00403         }
00404         info->needsUpdate = false;
00405       }
00406     }
00407     lastUpdate = nactions;
00408   }
00409 
00410 }
00411 
00412 
00413 void ETUCT::resetUCTCounts(){
00414   // if (PLANNERDEBUG) cout << "Reset UCT Counts" << endl;
00415   const int MIN_VISITS = 10;
00416 
00417   // loop through here
00418   for (std::set<std::vector<float> >::iterator i = statespace.begin();
00419        i != statespace.end(); i++){
00420     state_t s = canonicalize(*i);
00421 
00422     state_info* info = &(statedata[s]);
00423 
00424     if (info->uctVisits > (MIN_VISITS * numactions))
00425       info->uctVisits = MIN_VISITS * numactions;
00426 
00427     for (int j = 0; j < numactions; j++){
00428       if (info->uctActions[j] > MIN_VISITS)
00429         info->uctActions[j] = MIN_VISITS;
00430     }
00431 
00432   }
00433 
00434 }
00435 
00436 
00437 
00438 
00440 // Helper Functions       //
00442 
00443 ETUCT::state_t ETUCT::canonicalize(const std::vector<float> &s) {
00444   //if (PLANNERDEBUG) cout << "canonicalize(s = " << s[0] << ", "
00445   //                     << s[1] << ")" << endl;
00446 
00447   // discretize it
00448   std::vector<float> s2;
00449   if (statesPerDim[0] > 0){
00450     s2 = discretizeState(s);
00451   } else {
00452     s2 = s;
00453   }
00454 
00455   // get state_t for pointer if its in statespace
00456   const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00457     statespace.insert(s2);
00458   state_t retval = &*result.first; // Dereference iterator then get pointer
00459 
00460   //if (PLANNERDEBUG) cout << " returns " << retval
00461   //       << " New: " << result.second << endl;
00462 
00463   // if not, init this new state
00464   if (result.second) { // s is new, so initialize Q(s,a) for all a
00465     initNewState(retval);
00466     if (PLANNERDEBUG) {
00467       cout << " New state initialized "
00468            << " orig:(" << s[0] << "," << s[1] << ")"
00469            << " disc:(" << s2[0] << "," << s2[1] << ")" << endl;
00470     }
00471   }
00472 
00473 
00474   return retval;
00475 }
00476 
00477 
00478 // init state info
00479 void ETUCT::initStateInfo(state_t s, state_info* info){
00480   //if (PLANNERDEBUG) cout << "initStateInfo()";
00481 
00482   info->id = nstates++;
00483   if (PLANNERDEBUG){
00484     cout << " id = " << info->id;
00485     cout << ", (" << (*s)[0] << "," << (*s)[1] << ")" << endl;
00486   }
00487 
00488   info->historyModel = new std::map< std::deque<float>, StateActionInfo>[numactions];
00489 
00490   // model q values, visit counts
00491   info->Q.resize(numactions, 0);
00492   info->uctActions.resize(numactions, 1);
00493   info->uctVisits = 1;
00494   info->visited = 0; //false;
00495   info->needsUpdate = true;
00496 
00497   for (int i = 0; i < numactions; i++){
00498     info->Q[i] = rng.uniform(0,0.01);
00499   }
00500 
00501   //if (PLANNERDEBUG) cout << "done with initStateInfo()" << endl;
00502 
00503 }
00504 
00505 
00506 void ETUCT::printStates(){
00507 
00508   for (std::set< std::vector<float> >::iterator i = statespace.begin();
00509        i != statespace.end(); i++){
00510 
00511     state_t s = canonicalize(*i);
00512 
00513     state_info* info = &(statedata[s]);
00514 
00515     cout << "State " << info->id << ": ";
00516     for (unsigned j = 0; j < s->size(); j++){
00517       cout << (*s)[j] << ", ";
00518     }
00519     cout << endl;
00520 
00521     for (int act = 0; act < numactions; act++){
00522       cout << " Q: " << info->Q[act] << endl;
00523     }
00524 
00525   }
00526 }
00527 
00528 
00529 void ETUCT::deleteInfo(state_info* info){
00530 
00531   delete [] info->historyModel;
00532 
00533 }
00534 
00535 
00536 
00537 double ETUCT::getSeconds(){
00538   struct timezone tz;
00539   timeval timeT;
00540   gettimeofday(&timeT, &tz);
00541   return  timeT.tv_sec + (timeT.tv_usec / 1000000.0);
00542 }
00543 
00544 
00545 float ETUCT::uctSearch(const std::vector<float> &actS, state_t discS, int depth,std::deque<float> searchHistory){
00546   if (UCTDEBUG){
00547     cout << " uctSearch state ";
00548     for (unsigned i = 0; i < actS.size(); i++){
00549       cout << actS[i] << ", ";
00550     }
00551     cout << " at depth " << depth << endl;
00552   }
00553 
00554   state_info* info = &(statedata[discS]);
00555 
00556   // if max depth
00557   // iterative deepening (probability inversely proportional to visits)
00558   //float terminateProb = 1.0/(2.0+(float)info->uctVisits);
00559 
00560   // already visited, stop here
00561   if (depth > MAX_DEPTH){
00562     // return max q value here
00563     std::vector<float>::iterator maxAct =
00564       std::max_element(info->Q.begin(),
00565                        info->Q.end());
00566     float maxval = *maxAct;
00567 
00568     if (UCTDEBUG)
00569       cout << "Terminated after depth: " << depth
00570         //   << " prob: " << terminateProb
00571            << " Q: " << maxval
00572            << " visited: " << info->visited << endl;
00573 
00574     return maxval;
00575   }
00576 
00577   // select action
00578   int action = selectUCTAction(info);
00579 
00580   // simulate action to get next state and reward
00581   // depending on exploration, may also terminate us
00582   float reward = 0;
00583   bool term = false;
00584 
00585   float learnRate;
00586   //float learnRate = 0.001;
00587   //float learnRate = 1.0 / info->uctActions[action];
00588   //    learnRate = 10.0 / (info->uctActions[action] + 100.0);
00589   learnRate = 10.0 / (info->uctActions[action] + 10.0);
00590   //if (learnRate < 0.001 && MAX_TIME < 0.5)
00591   //learnRate = 0.001;
00592   //learnRate = 0.05;
00593   //learnRate = 1.0;
00594 
00595   // tell model learning thread to update this state since we've visited it
00596   info->needsUpdate = true;
00597 
00598   // simulate next state, reward, terminal
00599   std::vector<float> actualNext = simulateNextState(actS, discS, info, searchHistory, action, &reward, &term);
00600 
00601   // simulate reward from this action
00602   if (term){
00603     // this one terminated
00604     if (UCTDEBUG) cout << "   Terminated on exploration condition" << endl;
00605     info->Q[action] += learnRate * (reward - info->Q[action]);
00606     info->uctVisits++;
00607     info->uctActions[action]++;
00608     if (UCTDEBUG)
00609       cout << " Depth: " << depth << " Selected action " << action
00610            << " r: " << reward
00611            << " StateVisits: " << info->uctVisits
00612            << " ActionVisits: " << info->uctActions[action] << endl;
00613 
00614     return reward;
00615   }
00616 
00617   // get discretized version of next
00618   state_t discNext = canonicalize(actualNext);
00619 
00620   if (UCTDEBUG)
00621     cout << " Depth: " << depth << " Selected action " << action
00622          << " r: " << reward  << endl;
00623 
00624   info->visited++; // = true;
00625 
00626   if (HISTORY_SIZE > 0){
00627     // update history vector for this state
00628     /*
00629     for (unsigned i = 0; i < (*discS).size(); i++){
00630       searchHistory.push_back((*discS)[i]);
00631       searchHistory.pop_front();
00632     }
00633     */
00634     for (int i = 0; i < numactions; i++){
00635       if (i == action)
00636         searchHistory.push_back(1.0);
00637       else
00638         searchHistory.push_back(0.0);
00639       searchHistory.pop_front();
00640     }
00641     
00642     if (HISTORYDEBUG) {
00643       cout << "New planning history vector (size " << searchHistory.size() << ": " << searchHistory[0];
00644       for (unsigned i = 1; i < searchHistory.size(); i++){
00645         cout << "," << searchHistory[i];
00646       }
00647       cout << endl;
00648     }
00649   }
00650 
00651 
00652   // new q value
00653   float newQ = reward + gamma * uctSearch(actualNext, discNext, depth+1, searchHistory);
00654 
00655   if (info->visited == 1){
00656 
00657     // update q and visit counts
00658     info->Q[action] += learnRate * (newQ - info->Q[action]);
00659     info->uctVisits++;
00660     info->uctActions[action]++;
00661 
00662     if (UCTDEBUG)
00663       cout << " Depth: " << depth << " newQ: " << newQ
00664            << " StateVisits: " << info->uctVisits
00665            << " ActionVisits: " << info->uctActions[action] << endl;
00666 
00667     if (lambda < 1.0){
00668 
00669       // new idea, return max of Q or new q
00670       std::vector<float>::iterator maxAct =
00671         std::max_element(info->Q.begin(),
00672                          info->Q.end());
00673       float maxval = *maxAct;
00674 
00675       if (UCTDEBUG)
00676         cout << " Replacing newQ: " << newQ;
00677 
00678       // replace with w avg of maxq and new val
00679       newQ = (lambda * newQ) + ((1.0-lambda) * maxval);
00680 
00681       if (UCTDEBUG)
00682         cout << " with wAvg: " << newQ << endl;
00683     }
00684 
00685   }
00686 
00687   info->visited--;
00688 
00689   // return q
00690   return newQ;
00691 
00692 }
00693 
00694 
00695 int ETUCT::selectUCTAction(state_info* info){
00696   //  if (UCTDEBUG) cout << "  selectUCTAction" << endl;
00697 
00698   std::vector<float> &Q = info->Q;
00699 
00700   // loop through
00701   float rewardBound = rrange;
00702   if (rewardBound < 1.0)
00703     rewardBound = 1.0;
00704   rewardBound /= (1.0 - gamma);
00705   if (UCTDEBUG) cout << "Reward bound: " << rewardBound << endl;
00706 
00707   std::vector<float> uctQ(numactions, 0.0);
00708 
00709   for (int i = 0; i < numactions; i++){
00710 
00711     // this actions value is Q + rMax * 2 sqrt (log N(s) / N(s,a))
00712     uctQ[i] = Q[i] +
00713       rewardBound * 2.0 * sqrt(log((float)info->uctVisits) /
00714                                (float)info->uctActions[i]);
00715 
00716     if (UCTDEBUG)
00717       cout << "  Action: " << i << " Q: " << Q[i]
00718            << " visits: " << info->uctActions[i]
00719            << " value: " << uctQ[i] << endl;
00720   }
00721 
00722   // max element of uctQ
00723   std::vector<float>::iterator maxAct =
00724     max_element(uctQ.begin(), uctQ.end());
00725   float maxval = *maxAct;
00726   int act = maxAct - uctQ.begin();
00727 
00728   if (UCTDEBUG)
00729     cout << "  Selected " << act << " val: " << maxval << endl;
00730 
00731   return act;
00732 
00733 }
00734 
00735 
00736 
00737 std::vector<float> ETUCT::simulateNextState(const std::vector<float> &actualState, state_t discState, state_info* info, const std::deque<float> &history, int action, float* reward, bool* term){
00738 
00739   StateActionInfo* modelInfo = &(info->historyModel[action][history]);
00740   bool upToDate = modelInfo->frameUpdated >= lastUpdate;
00741 
00742   if (!upToDate){
00743     // must put in appropriate history
00744     if (HISTORY_SIZE > 0){
00745       std::vector<float> modState = *discState;
00746       for (int i = 0; i < HISTORY_FL_SIZE; i++){
00747         modState.push_back(history[i]);
00748       }
00749       updateStateActionHistoryFromModel(modState, action, modelInfo);
00750     } else {
00751       updateStateActionHistoryFromModel(*discState, action, modelInfo);
00752     }
00753   }
00754 
00755 
00756   *reward = modelInfo->reward;
00757   *term = (rng.uniform() < modelInfo->termProb);
00758 
00759   if (*term){
00760     return actualState;
00761   }
00762 
00763   float randProb = rng.uniform();
00764 
00765   float probSum = 0.0;
00766   std::vector<float> nextstate;
00767 
00768   if (REALSTATEDEBUG) cout << "randProb: " << randProb << " numNext: " << modelInfo->transitionProbs.size() << endl;
00769 
00770   if (modelInfo->transitionProbs.size() == 0)
00771     nextstate = actualState;
00772 
00773   for (std::map<std::vector<float>, float>::iterator outIt
00774          = modelInfo->transitionProbs.begin();
00775        outIt != modelInfo->transitionProbs.end(); outIt++){
00776 
00777     float prob = (*outIt).second;
00778     probSum += prob;
00779     if (REALSTATEDEBUG) cout << randProb << ", " << probSum << ", " << prob << endl;
00780 
00781     if (randProb <= probSum){
00782       nextstate = (*outIt).first;
00783       if (REALSTATEDEBUG) cout << "selected state " << randProb << ", " << probSum << ", " << prob << endl;
00784       break;
00785     }
00786   }
00787 
00788   if (trackActual){
00789 
00790 
00791     // find the relative change from discrete center
00792     std::vector<float> relChange = subVec(nextstate, *discState);
00793 
00794     // add that on to actual current state value
00795     nextstate = addVec(actualState, relChange);
00796 
00797 
00798   }
00799 
00800   // check that next state is valid
00801   for (unsigned j = 0; j < nextstate.size(); j++){
00802     if (nextstate[j] < (featmin[j]-EPSILON)
00803         || nextstate[j] > (featmax[j]+EPSILON)){
00804       return actualState;
00805     }
00806   }
00807 
00808   if (UCTDEBUG) cout << "predicted next state: " << nextstate[0] << ", " << nextstate[1] << endl;
00809 
00810   // return new actual state
00811   return nextstate;
00812 
00813 }
00814 
00815 
00816 void ETUCT::savePolicy(const char* filename){
00817 
00818   ofstream policyFile(filename, ios::out | ios::binary | ios::trunc);
00819 
00820   // first part, save the vector size
00821   int fsize = featmin.size();
00822   policyFile.write((char*)&fsize, sizeof(int));
00823 
00824   // save numactions
00825   policyFile.write((char*)&numactions, sizeof(int));
00826 
00827   // go through all states, and save Q values
00828   for (std::set< std::vector<float> >::iterator i = statespace.begin();
00829        i != statespace.end(); i++){
00830 
00831     state_t s = canonicalize(*i);
00832     state_info* info = &(statedata[s]);
00833 
00834     // save state
00835     policyFile.write((char*)&((*i)[0]), sizeof(float)*fsize);
00836 
00837     // save q-values
00838     policyFile.write((char*)&(info->Q[0]), sizeof(float)*numactions);
00839 
00840   }
00841 
00842   policyFile.close();
00843 }
00844 
00845 void ETUCT::logValues(ofstream *of, int xmin, int xmax, int ymin, int ymax){
00846   std::vector<float> state(2, 0.0);
00847   for (int i = xmin ; i < xmax; i++){
00848     for (int j = ymin; j < ymax; j++){
00849       state[0] = j;
00850       state[1] = i;
00851       state_t s = canonicalize(state);
00852       state_info* info = &(statedata[s]);
00853       std::vector<float> &Q_s = info->Q;
00854       const std::vector<float>::iterator max =
00855         random_max_element(Q_s.begin(), Q_s.end());
00856       *of << (*max) << ",";
00857     }
00858   }
00859 }
00860 
00861 
00862 // should do it such that an already discretized state stays the same
00863 // mainly the numerical value of each bin should be the average of that bin
00864 std::vector<float> ETUCT::discretizeState(const std::vector<float> &s){
00865   std::vector<float> ds(s.size());
00866 
00867   for (unsigned i = 0; i < s.size(); i++){
00868 
00869     // since i'm sometimes doing this for discrete domains
00870     // want to center bins on 0, not edge on 0
00871     //cout << "feat " << i << " range: " << featmax[i] << " " << featmin[i] << " " << (featmax[i]-featmin[i]) << " n: " << (float)statesPerDim;
00872 
00873     float factor = (featmax[i] - featmin[i]) / (float)statesPerDim[i];
00874     int bin = 0;
00875     if (s[i] > 0){
00876       bin = (int)((s[i]+factor/2) / factor);
00877     } else {
00878       bin = (int)((s[i]-factor/2) / factor);
00879     }
00880 
00881     ds[i] = factor*bin;
00882     //cout << "P factor: " << factor << " bin: " << bin;
00883     //cout << " Original: " << s[i] << " Discrete: " << ds[i] << endl;
00884   }
00885 
00886   return ds;
00887 }
00888 
00889 std::vector<float> ETUCT::addVec(const std::vector<float> &a, const std::vector<float> &b){
00890   if (a.size() != b.size())
00891     cout << "ERROR: add vector sizes wrong " << a.size() << ", " << b.size() << endl;
00892 
00893   std::vector<float> c(a.size(), 0.0);
00894   for (unsigned i = 0; i < a.size(); i++){
00895     c[i] = a[i] + b[i];
00896   }
00897 
00898   return c;
00899 }
00900 
00901 std::vector<float> ETUCT::subVec(const std::vector<float> &a, const std::vector<float> &b){
00902   if (a.size() != b.size())
00903     cout << "ERROR: sub vector sizes wrong " << a.size() << ", " << b.size() << endl;
00904 
00905   std::vector<float> c(a.size(), 0.0);
00906   for (unsigned i = 0; i < a.size(); i++){
00907     c[i] = a[i] - b[i];
00908   }
00909 
00910   return c;
00911 }
00912 
00913 
00914 void ETUCT::setFirst(){
00915   if (HISTORY_SIZE == 0) return;
00916 
00917   if (HISTORYDEBUG) cout << "first action, set sahistory to 0s" << endl;
00918 
00919   // first action, reset history vector
00920   saHistory.resize(saHistory.size(), 0.0);
00921 }
00922 
00923 void ETUCT::setSeeding(bool seeding){
00924 
00925   if (HISTORYDEBUG) cout << "set seed mode to " << seeding << endl;
00926   seedMode = seeding;
00927 
00928 }