00001
00009 #include "ExplorationModel.hh"
00010
00011
00012
00013
00014 ExplorationModel::ExplorationModel(MDPModel* innermodel, int modelType, int exploreType,
00015 int predType, int nModels,
00016 float m, int numactions,
00017 float rmax, float qmax, float rrange,
00018 int nfactors, float v, float n,
00019 const std::vector<float> &fmax,
00020 const std::vector<float> &fmin, Random rng):
00021 modelType(modelType), exploreType(exploreType), predType(predType),
00022 nModels(nModels),
00023 M(m), numactions(numactions), rmax(rmax), qmax(qmax), rrange(rrange),
00024 nfactors(nfactors), v(v), n(n), rng(rng)
00025 {
00026
00027 model = innermodel;
00028
00029 MODEL_DEBUG = false;
00030
00031 cout << "Exploration Model " << exploreType << ", v: " << v << ", n: " << n << endl;
00032
00033 featmax = fmax;
00034 featmin = fmin;
00035
00036 }
00037
00038 ExplorationModel::ExplorationModel(const ExplorationModel &em):
00039 modelType(em.modelType), exploreType(em.exploreType), predType(em.predType),
00040 nModels(em.nModels),
00041 M(em.M), numactions(em.numactions), rmax(em.rmax), qmax(em.qmax), rrange(em.rrange),
00042 nfactors(em.nfactors), v(em.v), n(em.n), rng(em.rng)
00043 {
00044 model = em.model->getCopy();
00045 MODEL_DEBUG = em.MODEL_DEBUG;
00046 featmax = em.featmax;
00047 featmin = em.featmin;
00048 statespace = em.statespace;
00049 }
00050
00051 ExplorationModel* ExplorationModel::getCopy(){
00052 ExplorationModel* copy = new ExplorationModel(*this);
00053 return copy;
00054 }
00055
00056
00057 ExplorationModel::~ExplorationModel() {
00058 delete model;
00059 }
00060
00061
00062
00063 bool ExplorationModel::updateWithExperiences(std::vector<experience> &instances){
00064 bool changed = model->updateWithExperiences(instances);
00065 bool visitChange = false;
00066
00067
00068 for (unsigned i = 0; i < instances.size(); i++){
00069 if (exploreType == UNVISITED_BONUS){
00070 bool retval = addStateToSet(instances[i].s);
00071 visitChange = visitChange || retval;
00072 }
00073
00074 if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS || exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00075 std::vector<float> last2 = instances[i].s;
00076 last2.push_back(instances[i].act);
00077 bool retval = addStateToSet(last2);
00078 visitChange = visitChange || retval;
00079 }
00080 }
00081
00082 return (changed || visitChange);
00083 }
00084
00085
00086
00087
00088 bool ExplorationModel::updateWithExperience(experience &e){
00089
00090
00091
00092 bool changed = model->updateWithExperience(e);
00093 bool visitChange = false;
00094
00095
00096 if (exploreType == UNVISITED_BONUS){
00097 bool retval = addStateToSet(e.s);
00098 visitChange = visitChange || retval;
00099 }
00100
00101 if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS || exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00102 std::vector<float> last2 = e.s;
00103 last2.push_back(e.act);
00104 bool retval = addStateToSet(last2);
00105 visitChange = visitChange || retval;
00106 }
00107
00108 return (changed || visitChange);
00109 }
00110
00111
00112
00113 float ExplorationModel::getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00114
00115
00116 retval->transitionProbs.clear();
00117
00118 float conf = model->getStateActionInfo(state, act, retval);
00119
00120
00121
00122
00123 if (MODEL_DEBUG)
00124 cout << "reward: " << retval->reward << " conf: " << conf << endl;
00125
00126
00127
00128
00129 if (exploreType == EXPLORE_UNKNOWN){
00130 if (!retval->known){
00131 if (MODEL_DEBUG){
00132 cout << "State-Action Unknown in model: conf: " << conf << " ";
00133 for (unsigned si = 0; si < state.size(); si++){
00134 cout << (state)[si] << ",";
00135 }
00136 cout << " Action: " << act << endl;
00137 }
00138 retval->reward = qmax;
00139 retval->termProb = 1.0;
00140 if (MODEL_DEBUG || MODEL_DEBUG)
00141 cout << " State-Action Unknown in model, using qmax "
00142 << qmax << endl;
00143 }
00144 }
00145
00146
00147 if (exploreType == UNVISITED_BONUS){
00148 if (!checkForState(state)){
00149
00150 float newQ =retval->reward + n;
00151 if (MODEL_DEBUG){
00152 cout << " State unvisited bonus, orig R: "
00153 << retval->reward
00154 << " adding n: " << n
00155 << " new value : " << newQ
00156 << endl;
00157 }
00158 retval->reward = newQ;
00159 }
00160 }
00161
00162
00163 if (exploreType == UNVISITED_ACT_BONUS || exploreType == DIFF_AND_VISIT_BONUS){
00164 std::vector<float> state2 = state;
00165 state2.push_back(act);
00166 if (!checkForState(state2)){
00167
00168 float newQ =retval->reward + n;
00169 if (MODEL_DEBUG){
00170 cout << " State-Action unvisited bonus, orig R: "
00171 << retval->reward
00172 << " adding n: " << n
00173 << " new value : " << newQ
00174 << endl;
00175 }
00176 retval->reward = newQ;
00177 }
00178 }
00179
00180
00181 if (exploreType == NOVEL_STATE_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00182 std::vector<float> state2 = state;
00183 state2.push_back(act);
00184 float featDist = getFeatDistToVisitedSA(state2);
00185 if (featDist > 0){
00186
00187 float bonus = featDist * n;
00188 if (MODEL_DEBUG){
00189 cout << " State-Action novel state bonus, dist: " << featDist
00190 << " n: " << n << ", bonus, " << bonus << endl;
00191 }
00192 retval->reward += bonus;
00193 }
00194 }
00195
00196
00197 if (exploreType == CONTINUOUS_BONUS){
00198 if (conf < 1.0){
00199
00200 float bonus = (1.0-conf)*v;
00201 if (MODEL_DEBUG){
00202 cout << " State-Action continuous bonus conf: "
00203 << conf
00204 << ", using v*(1-conf): "
00205 << bonus << endl;
00206 }
00207 retval->reward = bonus;
00208 retval->termProb = 1.0;
00209 }
00210 }
00211
00212
00213 if (exploreType == CONTINUOUS_BONUS_R || exploreType == DIFF_AND_VISIT_BONUS || exploreType == DIFF_AND_NOVEL_BONUS){
00214 if (conf < 1.0){
00215
00216 float bonus = (1.0-conf)*v;
00217 retval->reward += bonus;
00218 if (MODEL_DEBUG){
00219 cout << " State-Action continuous bonus conf: "
00220 << conf
00221 << ", using v*(1-conf): "
00222 << bonus << endl;
00223 }
00224 }
00225 }
00226
00227
00228 if (exploreType == THRESHOLD_BONUS){
00229 if (conf < 0.5){
00230 float bonus = v;
00231 if (MODEL_DEBUG){
00232 cout << " State-Action conf< thresh: "
00233 << conf
00234 << " M: " << M
00235 << ", using v "
00236 << v << endl;
00237 }
00238 retval->reward = bonus;
00239 retval->termProb = 1.0;
00240 }
00241 }
00242
00243
00244 if (exploreType == THRESHOLD_BONUS_R){
00245 if (conf < 0.9){
00246 float bonus = v;
00247 retval->reward += bonus;
00248 if (MODEL_DEBUG){
00249 cout << " State-Action conf< thresh: "
00250 << conf
00251 << " M: " << M
00252 << ", using v "
00253 << v << endl;
00254 }
00255 }
00256 }
00257
00258
00259 if (exploreType == VISITS_CONF){
00260 if (conf < 0.5){
00261 float bonus = qmax;
00262 retval->reward += bonus;
00263 if (MODEL_DEBUG){
00264 cout << " State-Action conf< thresh or 0 visits: "
00265 << conf
00266 << " M: " << M
00267 << ", using qmax "
00268 << qmax << endl;
00269 }
00270 retval->reward = bonus;
00271 retval->termProb = 1.0;
00272 }
00273 }
00274
00275
00276 if (MODEL_DEBUG)
00277 cout << " Conf: " << conf << " Avg reward: " << retval->reward << endl;
00278 if (isnan(retval->reward))
00279 cout << "ERROR: Model returned reward of NaN" << endl;
00280
00281 return true;
00282
00283 }
00284
00285
00286 bool ExplorationModel::addStateToSet(const std::vector<float> &s){
00287 std::pair<std::set<std::vector<float> >::iterator, bool> retval;
00288 retval = statespace.insert(s);
00289 return retval.second;
00290 }
00291
00292
00293
00294 bool ExplorationModel::checkForState(const std::vector<float> &s){
00295 return (statespace.count(s) == 1);
00296 }
00297
00298
00299 float ExplorationModel::getFeatDistToVisitedSA(const std::vector<float> &s){
00300
00301
00302 if (checkForState(s)){
00303 return 0;
00304 }
00305
00306
00307 float maxDist = 0;
00308 unsigned nfeats = s.size()-1;
00309 std::vector<float> featRange(nfeats, 0);
00310 for (unsigned i = 0; i < nfeats; i++){
00311 featRange[i] = featmax[i] - featmin[i];
00312 maxDist += 1.0;
00313
00314
00315 }
00316
00317 float minDist = maxDist;
00318 unsigned actionIndex = nfeats;
00319
00320 for (std::set<std::vector<float> >::iterator i = statespace.begin(); i != statespace.end(); i++){
00321
00322 if (s[actionIndex] != (*i)[actionIndex]) continue;
00323
00324
00325 float count = 0;
00326 for (unsigned j = 0; j < nfeats; j++){
00327
00328
00329 count += fabs(s[j] - (*i)[j]) / featRange[j];
00330 }
00331 if (count < minDist) minDist = count;
00332
00333 }
00334
00335 return (float)minDist/(float)nfeats;
00336
00337 }
00338
00339