rl_agent: RMaxModel.cc Source File

Go to the documentation of this file.
00001 
00006 #include "RMaxModel.hh"
00007 
00008 
00009 
00010 
00011 RMaxModel::RMaxModel(int m, int nact, Random rng):
00012   M(m), nact(nact), rng(rng)
00013 {
00014 
00015   nstates = 0;
00016   RMAX_DEBUG = false; //true;
00017   //initMDPModel(nfactors);  
00018 }
00019 
00020 RMaxModel::RMaxModel(const RMaxModel &rm):
00021 M(rm.M), nact(rm.nact), rng(rm.rng)
00022 {
00023 
00024   nstates = rm.nstates;
00025   RMAX_DEBUG = rm.RMAX_DEBUG;
00026 
00027   statespace = rm.statespace;
00028 
00029   // copy info into map for each of these
00030   // because map is by address, must get correct address for each
00031   for (std::map<state_t, state_info>::const_iterator i = rm.statedata.begin();
00032        i != rm.statedata.end(); i++){
00033 
00034     state_t s = canonicalize(*((*i).first));
00035     statedata[s] = (*i).second;
00036 
00037   }
00038 
00039 }
00040 
00041 RMaxModel* RMaxModel::getCopy(){
00042   RMaxModel* copy = new RMaxModel(*this);
00043   return copy;
00044 }
00045 
00046 RMaxModel::~RMaxModel() {}
00047 
00048 
00049 bool RMaxModel::updateWithExperiences(std::vector<experience> &instances){
00050 
00051   bool changed = false;
00052   
00053   for (unsigned i = 0; i < instances.size(); i++){
00054     bool singleChange = updateWithExperience(instances[i]);
00055     changed = changed || singleChange;
00056   }
00057   return changed;
00058 }
00059 
00060 
00061 // update all the counts, check if model has changed
00062 // stop counting at M
00063 bool RMaxModel::updateWithExperience(experience &e){
00064   if (RMAX_DEBUG) cout << "updateWithExperience " << &(e.s) << ", " << e.act 
00065                        << ", " << &(e.next) << ", " << e.reward << endl;
00066 
00067   // get state info for last state
00068   state_t l = canonicalize(e.s);
00069   state_info* info = &(statedata[l]);
00070 
00071   bool modelChanged = false;
00072 
00073   // stop at M
00074   //if (info->visits[e.act] == M)
00075   if (info->known[e.act])
00076     return false;
00077 
00078   // update visit count for action just executed
00079   info->visits[e.act]++;
00080 
00081   // update termination count
00082   if (e.terminal) info->terminations[e.act]++;
00083 
00084   // update reward sum for this action
00085   info->Rsum[e.act] += e.reward;
00086 
00087   // update transition count for outcome that occured
00088   std::vector<int> &transCounts = info->outCounts[e.next];
00089 
00090   // first, check that we have resized this counter
00091   checkTransitionCountSize(&transCounts);
00092 
00093   // only update state transition counts for non-terminal transitions
00094   if (!e.terminal){
00095     // then update transition count for this action/outcome
00096     transCounts[e.act]++;
00097   }    
00098 
00099   // check if count becomes known 
00100   if (!info->known[e.act] && info->visits[e.act] >= M){
00101     info->known[e.act] = true;
00102     modelChanged = true;
00103   }
00104 
00105   if (RMAX_DEBUG) cout << "s" << info->id << " act: " << e.act 
00106                                << " transCounts[act] = " << transCounts[e.act] 
00107                                << " visits[act] = " << info->visits[e.act] << endl;
00108   
00109   // anything that got past the 'return false' above is a change in conf or predictions
00110   return true; //modelChanged;
00111 
00112 }
00113 
00114 
00115 // calculate state info such as transition probs, known/unknown, reward prediction
00116 float RMaxModel::getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00117   if (RMAX_DEBUG) cout << "getStateActionInfo, " << &state <<  ", " << act << endl;
00118 
00119 
00120   retval->transitionProbs.clear();
00121 
00122   // get state-action info for this state
00123   state_t l = canonicalize(state);
00124   state_info* info = &(statedata[l]);
00125 
00126   
00127   // see if it has any visits (could still be unknown)
00128   if (info->visits[act] == 0){
00129     if (RMAX_DEBUG) cout << "This outcome is unknown" << endl;
00130     retval->reward = -0.001;
00131 
00132     // add to transition map
00133     retval->transitionProbs[state] = 1.0;
00134     retval->known = false;
00135     retval->termProb = 0.0;
00136     return 0;
00137   }
00138   
00139   
00140   // fill in transition probs
00141   for (std::map<std::vector<float>, std::vector<int> >::iterator it = info->outCounts.begin(); 
00142        it != info->outCounts.end(); it++){
00143 
00144     // get key from iterator
00145     std::vector<float> next = (*it).first;
00146     int count = ((*it).second)[act];
00147 
00148     // add to transition map
00149     if (count > 0.0){
00150       retval->transitionProbs[next] = (float)count / (float)(info->visits[act] - info->terminations[act]);
00151       if (RMAX_DEBUG) cout << "Outcome " << &next << " has prob " << retval->transitionProbs[next]
00152                            << " from count of " << count << " on " 
00153                            << info->visits[act] << " visits." << endl;
00154     }
00155   }
00156   
00157 
00158   // add in avg rewrad
00159   retval->reward = (float)info->Rsum[act] / (float)info->visits[act];
00160   if (RMAX_DEBUG) cout << "Avg Reward of  " << retval->reward << " from reward sum of " 
00161                        << info->Rsum[act] 
00162                        << " on " << info->visits[act] << " visits." << endl;
00163 
00164   // termination probability
00165   retval->termProb = (float)info->terminations[act] / (float)info->visits[act];
00166   if (RMAX_DEBUG) cout << "termProb: " << retval->termProb << endl;
00167   if (retval->termProb < 0 || retval->termProb > 1){
00168     cout << "Problem with termination probability: " << retval->termProb << endl;
00169   }
00170 
00171 
00172   retval->known = info->known[act];
00173   // conf as a pct of float m (so 0.5 is exactly M)
00174   float conf = (float)info->visits[act]/ (2.0 * (float)M);
00175 
00176   return conf;
00177 
00178 }
00179 
00180 
00181 
00182 
00183 
00184 RMaxModel::state_t RMaxModel::canonicalize(const std::vector<float> &s) {
00185   if (RMAX_DEBUG) cout << "canonicalize, s = " << &s << endl;
00186 
00187   // get state_t for pointer if its in statespace
00188   const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00189     statespace.insert(s);
00190   state_t retval = &*result.first; // Dereference iterator then get pointer 
00191 
00192   if (RMAX_DEBUG) cout << " returns " << retval << endl;
00193 
00194   // if not, init this new state
00195   if (result.second) { // s is new, so initialize Q(s,a) for all a
00196     initNewState(retval);
00197   }
00198 
00199   return retval; 
00200 }
00201 
00202 void RMaxModel::initNewState(state_t s){
00203   if (RMAX_DEBUG) cout << "initNewState(s = " << s 
00204                        << ")" << endl;
00205   
00206   // create state info and add to hash map
00207   state_info* info = &(statedata[s]);
00208   initStateInfo(info);
00209 
00210 }
00211 
00212 
00213 // init state info
00214 void RMaxModel::initStateInfo(state_info* info){
00215   if (RMAX_DEBUG) cout << "initStateInfo()";
00216 
00217   info->id = nstates++;
00218   if (RMAX_DEBUG) cout << " id = " << info->id << endl;
00219 
00220   // model data (q values, state-action counts, transition, etc)
00221   info->visits.resize(nact, 0);
00222   info->Rsum.resize(nact, 0);
00223   info->known.resize(nact, false);
00224   info->terminations.resize(nact, 0);
00225 
00226 }
00227 
00228 
00229 void RMaxModel::checkTransitionCountSize(std::vector<int>* transCounts){
00230   if (RMAX_DEBUG) cout << "checkTransitionCountSize(transCounts) " 
00231                     << "size: " << transCounts->size() << endl;
00232 
00233   // resize to numactions if not initialized for this outcome yet
00234   if (transCounts->size() == 0)
00235     transCounts->resize(nact, 0);
00236 
00237 }