rl_agent: ExplorationModel.cc Source File

Go to the documentation of this file.
00001 
00009 #include "ExplorationModel.hh"
00010 
00011 
00012 
00013 
00014 ExplorationModel::ExplorationModel(MDPModel* innermodel, int modelType, int exploreType,
00015                                    int predType, int nModels,
00016                                    float m, int numactions,
00017                                    float rmax, float qmax, float rrange,
00018                                    int nfactors, float v, float n,
00019                                    const std::vector<float> &fmax,
00020                                    const std::vector<float> &fmin, Random rng):
00021   modelType(modelType), exploreType(exploreType), predType(predType),
00022   nModels(nModels),
00023   M(m), numactions(numactions), rmax(rmax), qmax(qmax), rrange(rrange),
00024   nfactors(nfactors), v(v), n(n), rng(rng)
00025 {
00026 
00027   model = innermodel;
00028 
00029   MODEL_DEBUG = false; //true;
00030 
00031   cout << "Exploration Model " << exploreType << ", v: " << v << ", n: " << n << endl;
00032 
00033   featmax = fmax;
00034   featmin = fmin;
00035 
00036 }
00037 
00038 ExplorationModel::ExplorationModel(const ExplorationModel &em):
00039 modelType(em.modelType), exploreType(em.exploreType), predType(em.predType),
00040   nModels(em.nModels),
00041   M(em.M), numactions(em.numactions), rmax(em.rmax), qmax(em.qmax), rrange(em.rrange),
00042 nfactors(em.nfactors), v(em.v), n(em.n), rng(em.rng)
00043 {
00044   model = em.model->getCopy();
00045   MODEL_DEBUG = em.MODEL_DEBUG;
00046   featmax = em.featmax;
00047   featmin = em.featmin;
00048   statespace = em.statespace;
00049 }
00050 
00051 ExplorationModel* ExplorationModel::getCopy(){
00052   ExplorationModel* copy = new ExplorationModel(*this);
00053   return copy;
00054 }
00055 
00056 
00057 ExplorationModel::~ExplorationModel() {
00058   delete model;
00059 }
00060 
00061 
00062 
00063 bool ExplorationModel::updateWithExperiences(std::vector<experience> &instances){
00064   bool changed = model->updateWithExperiences(instances);
00065   bool visitChange = false;
00066 
00067   // keep track of which states we've been to for this mode
00068   for (unsigned i = 0; i < instances.size(); i++){
00069     if (exploreType == UNVISITED_BONUS){
00070       bool retval = addStateToSet(instances[i].s);
00071       visitChange = visitChange || retval;
00072     }
00073 
00074     if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS || exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00075       std::vector<float> last2 = instances[i].s;
00076       last2.push_back(instances[i].act);
00077       bool retval = addStateToSet(last2);
00078       visitChange = visitChange || retval;
00079     }
00080   }
00081 
00082   return (changed || visitChange);
00083 }
00084 
00085 
00086 // update all the counts, check if model has changed
00087 // stop counting at M
00088 bool ExplorationModel::updateWithExperience(experience &e){
00089   //if (MODEL_DEBUG) cout << "updateWithExperience " << &last << ", " << act
00090   //        << ", " << &curr << ", " << reward << endl;
00091 
00092   bool changed = model->updateWithExperience(e);
00093   bool visitChange = false;
00094 
00095   // keep track of which states we've been to for this mode
00096   if (exploreType == UNVISITED_BONUS){
00097     bool retval = addStateToSet(e.s);
00098     visitChange = visitChange || retval;
00099   }
00100 
00101   if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS || exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00102     std::vector<float> last2 = e.s;
00103     last2.push_back(e.act);
00104     bool retval = addStateToSet(last2);
00105     visitChange = visitChange || retval;
00106   }
00107 
00108   return (changed || visitChange);
00109 }
00110 
00111 
00112 // calculate state info such as transition probs, known/unknown, reward prediction
00113 float ExplorationModel::getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00114   //if (MODEL_DEBUG) cout << "getStateActionInfo, " << &state <<  ", " << act << endl;
00115 
00116   retval->transitionProbs.clear();
00117 
00118   float conf = model->getStateActionInfo(state, act, retval);
00119 
00120 
00121   //cout << "state: " << state[0] << " act: " << act;
00122 
00123   if (MODEL_DEBUG)// || (conf > 0.0 && conf < 1.0))
00124     cout << "reward: " << retval->reward << " conf: " << conf << endl;
00125 
00126   // check exploration bonuses
00127 
00128   // use qmax if state is unknown
00129   if (exploreType == EXPLORE_UNKNOWN){
00130     if (!retval->known){
00131       if (MODEL_DEBUG){
00132         cout << "State-Action Unknown in model: conf: " << conf << " ";
00133         for (unsigned si = 0; si < state.size(); si++){
00134           cout << (state)[si] << ",";
00135         }
00136         cout << " Action: " << act << endl;
00137       }
00138       retval->reward = qmax;
00139       retval->termProb = 1.0;
00140       if (MODEL_DEBUG || MODEL_DEBUG)
00141         cout << "   State-Action Unknown in model, using qmax "
00142              << qmax << endl;
00143     }
00144   }
00145 
00146   // small bonus for unvisited states
00147   if (exploreType == UNVISITED_BONUS){
00148     if (!checkForState(state)){
00149       // modify reward with a bonus of n
00150       float newQ =retval->reward + n;
00151       if (MODEL_DEBUG){
00152         cout << "   State unvisited bonus, orig R: "
00153              << retval->reward
00154              << " adding n: " << n
00155              << " new value : " << newQ
00156              << endl;
00157       }
00158       retval->reward = newQ;
00159     }
00160   }
00161 
00162   // small bonus for unvisited state-actions
00163   if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS){
00164     std::vector<float> state2 = state;
00165     state2.push_back(act);
00166     if (!checkForState(state2)){
00167       // modify reward with a bonus of n
00168       float newQ =retval->reward + n;
00169       if (MODEL_DEBUG){
00170         cout << "   State-Action unvisited bonus, orig R: "
00171              << retval->reward
00172              << " adding n: " << n
00173              << " new value : " << newQ
00174              << endl;
00175       }
00176       retval->reward = newQ;
00177     }
00178   }
00179 
00180   // small bonus for states far from visited states with same action
00181   if (exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00182     std::vector<float> state2 = state;
00183     state2.push_back(act);
00184     float featDist = getFeatDistToVisitedSA(state2);
00185     if (featDist > 0){
00186       // modify reward with proportional bonus of n
00187       float bonus = featDist * n;
00188       if (MODEL_DEBUG){
00189         cout << "   State-Action novel state bonus, dist: " << featDist
00190              << " n: " << n << ", bonus, " << bonus << endl;
00191       }
00192       retval->reward += bonus;
00193     }
00194   }
00195 
00196   // use some % of v if we're doing continuous terminal bonus
00197   if (exploreType == CONTINUOUS_BONUS){
00198     if (conf < 1.0){
00199       // percent of conf
00200       float bonus = (1.0-conf)*v;
00201       if (MODEL_DEBUG){
00202         cout << "   State-Action continuous bonus conf: "
00203              << conf
00204              << ", using v*(1-conf): "
00205              << bonus << endl;
00206       }
00207       retval->reward = bonus;
00208       retval->termProb = 1.0;
00209     }
00210   }
00211 
00212   // use some % of v if we're doing continuous bonus
00213   if (exploreType == CONTINUOUS_BONUS_R || exploreType == DIFF_AND_VISIT_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00214     if (conf < 1.0){
00215       // percent of conf
00216       float bonus = (1.0-conf)*v;
00217       retval->reward += bonus;
00218       if (MODEL_DEBUG){
00219         cout << "   State-Action continuous bonus conf: "
00220              << conf
00221              << ", using v*(1-conf): "
00222              << bonus << endl;
00223       }
00224     }
00225   }
00226 
00227   // use qmax if we're doing threshold terminal bonus and conf under threshold
00228   if (exploreType == THRESHOLD_BONUS){
00229     if (conf < 0.5){
00230       float bonus = v;
00231       if (MODEL_DEBUG){
00232         cout << "   State-Action conf< thresh: "
00233              << conf
00234              << " M: " << M
00235              << ", using v "
00236              << v << endl;
00237       }
00238       retval->reward = bonus;
00239       retval->termProb = 1.0;
00240     }
00241   }
00242 
00243   // use rmax for additional thresh bonus and conf under thresh
00244   if (exploreType == THRESHOLD_BONUS_R){
00245     if (conf < 0.9){
00246       float bonus = v;
00247       retval->reward += bonus;
00248       if (MODEL_DEBUG){
00249         cout << "   State-Action conf< thresh: "
00250              << conf
00251              << " M: " << M
00252              << ", using v "
00253              << v << endl;
00254       }
00255     }
00256   }
00257 
00258   // visits conf
00259   if (exploreType == VISITS_CONF){
00260     if (conf < 0.5){
00261       float bonus = qmax;
00262       retval->reward += bonus;
00263       if (MODEL_DEBUG){
00264         cout << "   State-Action conf< thresh or 0 visits: "
00265              << conf
00266              << " M: " << M
00267              << ", using qmax "
00268              << qmax << endl;
00269       }
00270       retval->reward = bonus;
00271       retval->termProb = 1.0;
00272     }
00273   }
00274 
00275 
00276   if (MODEL_DEBUG)
00277     cout << "   Conf: " << conf << "   Avg reward: " << retval->reward << endl;
00278   if (isnan(retval->reward))
00279     cout << "ERROR: Model returned reward of NaN" << endl;
00280 
00281   return true;
00282 
00283 }
00284 
00285 // add state to set (if its not already in it)
00286 bool ExplorationModel::addStateToSet(const std::vector<float> &s){
00287   std::pair<std::set<std::vector<float> >::iterator, bool> retval;
00288   retval = statespace.insert(s);
00289   return retval.second;
00290 }
00291 
00292 
00293 // check if state is in set (so we know if we've visited it)
00294 bool ExplorationModel::checkForState(const std::vector<float> &s){
00295   return (statespace.count(s) == 1);
00296 }
00297 
00298 // get distance in feature space from this state to one we've visited
00299 float ExplorationModel::getFeatDistToVisitedSA(const std::vector<float> &s){
00300 
00301   // if we've visited this exact s,a then dist is 0
00302   if (checkForState(s)){
00303     return 0;
00304   }
00305 
00306   // otherwise go through all states and find minimum distance
00307   float maxDist = 0;
00308   unsigned nfeats = s.size()-1;
00309   std::vector<float> featRange(nfeats, 0);
00310   for (unsigned i = 0; i < nfeats; i++){
00311     featRange[i] = featmax[i] - featmin[i];
00312     maxDist += 1.0;//featmax[i] - featmin[i];
00313 
00314     //cout << "feat " << i << " diff: " << (featmax[i] - featmin[i]) << " max: " << maxDist << endl;
00315   }
00316 
00317   float minDist = maxDist;//nfeats;
00318   unsigned actionIndex = nfeats;
00319 
00320   for (std::set<std::vector<float> >::iterator i = statespace.begin(); i != statespace.end(); i++){
00321     // ignore if not the same action
00322     if (s[actionIndex] != (*i)[actionIndex]) continue;
00323 
00324     // otherwise, sum all features that are different
00325     float count = 0;
00326     for (unsigned j = 0; j < nfeats; j++){
00327       // distance based on magnitude of feature difference
00328       // normalize by feature range
00329       count += fabs(s[j] - (*i)[j]) / featRange[j];
00330     }
00331     if (count < minDist) minDist = count;
00332 
00333   }
00334 
00335   return (float)minDist/(float)nfeats;
00336 
00337 }
00338 
00339