Go to the documentation of this file.00001
00006 #include "RMaxModel.hh"
00007
00008
00009
00010
00011 RMaxModel::RMaxModel(int m, int nact, Random rng):
00012 M(m), nact(nact), rng(rng)
00013 {
00014
00015 nstates = 0;
00016 RMAX_DEBUG = false;
00017
00018 }
00019
00020 RMaxModel::RMaxModel(const RMaxModel &rm):
00021 M(rm.M), nact(rm.nact), rng(rm.rng)
00022 {
00023
00024 nstates = rm.nstates;
00025 RMAX_DEBUG = rm.RMAX_DEBUG;
00026
00027 statespace = rm.statespace;
00028
00029
00030
00031 for (std::map<state_t, state_info>::const_iterator i = rm.statedata.begin();
00032 i != rm.statedata.end(); i++){
00033
00034 state_t s = canonicalize(*((*i).first));
00035 statedata[s] = (*i).second;
00036
00037 }
00038
00039 }
00040
00041 RMaxModel* RMaxModel::getCopy(){
00042 RMaxModel* copy = new RMaxModel(*this);
00043 return copy;
00044 }
00045
00046 RMaxModel::~RMaxModel() {}
00047
00048
00049 bool RMaxModel::updateWithExperiences(std::vector<experience> &instances){
00050
00051 bool changed = false;
00052
00053 for (unsigned i = 0; i < instances.size(); i++){
00054 bool singleChange = updateWithExperience(instances[i]);
00055 changed = changed || singleChange;
00056 }
00057 return changed;
00058 }
00059
00060
00061
00062
00063 bool RMaxModel::updateWithExperience(experience &e){
00064 if (RMAX_DEBUG) cout << "updateWithExperience " << &(e.s) << ", " << e.act
00065 << ", " << &(e.next) << ", " << e.reward << endl;
00066
00067
00068 state_t l = canonicalize(e.s);
00069 state_info* info = &(statedata[l]);
00070
00071 bool modelChanged = false;
00072
00073
00074
00075 if (info->known[e.act])
00076 return false;
00077
00078
00079 info->visits[e.act]++;
00080
00081
00082 if (e.terminal) info->terminations[e.act]++;
00083
00084
00085 info->Rsum[e.act] += e.reward;
00086
00087
00088 std::vector<int> &transCounts = info->outCounts[e.next];
00089
00090
00091 checkTransitionCountSize(&transCounts);
00092
00093
00094 if (!e.terminal){
00095
00096 transCounts[e.act]++;
00097 }
00098
00099
00100 if (!info->known[e.act] && info->visits[e.act] >= M){
00101 info->known[e.act] = true;
00102 modelChanged = true;
00103 }
00104
00105 if (RMAX_DEBUG) cout << "s" << info->id << " act: " << e.act
00106 << " transCounts[act] = " << transCounts[e.act]
00107 << " visits[act] = " << info->visits[e.act] << endl;
00108
00109
00110 return true;
00111
00112 }
00113
00114
00115
00116 float RMaxModel::getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00117 if (RMAX_DEBUG) cout << "getStateActionInfo, " << &state << ", " << act << endl;
00118
00119
00120 retval->transitionProbs.clear();
00121
00122
00123 state_t l = canonicalize(state);
00124 state_info* info = &(statedata[l]);
00125
00126
00127
00128 if (info->visits[act] == 0){
00129 if (RMAX_DEBUG) cout << "This outcome is unknown" << endl;
00130 retval->reward = -0.001;
00131
00132
00133 retval->transitionProbs[state] = 1.0;
00134 retval->known = false;
00135 retval->termProb = 0.0;
00136 return 0;
00137 }
00138
00139
00140
00141 for (std::map<std::vector<float>, std::vector<int> >::iterator it = info->outCounts.begin();
00142 it != info->outCounts.end(); it++){
00143
00144
00145 std::vector<float> next = (*it).first;
00146 int count = ((*it).second)[act];
00147
00148
00149 if (count > 0.0){
00150 retval->transitionProbs[next] = (float)count / (float)(info->visits[act] - info->terminations[act]);
00151 if (RMAX_DEBUG) cout << "Outcome " << &next << " has prob " << retval->transitionProbs[next]
00152 << " from count of " << count << " on "
00153 << info->visits[act] << " visits." << endl;
00154 }
00155 }
00156
00157
00158
00159 retval->reward = (float)info->Rsum[act] / (float)info->visits[act];
00160 if (RMAX_DEBUG) cout << "Avg Reward of " << retval->reward << " from reward sum of "
00161 << info->Rsum[act]
00162 << " on " << info->visits[act] << " visits." << endl;
00163
00164
00165 retval->termProb = (float)info->terminations[act] / (float)info->visits[act];
00166 if (RMAX_DEBUG) cout << "termProb: " << retval->termProb << endl;
00167 if (retval->termProb < 0 || retval->termProb > 1){
00168 cout << "Problem with termination probability: " << retval->termProb << endl;
00169 }
00170
00171
00172 retval->known = info->known[act];
00173
00174 float conf = (float)info->visits[act]/ (2.0 * (float)M);
00175
00176 return conf;
00177
00178 }
00179
00180
00181
00182
00183
00184 RMaxModel::state_t RMaxModel::canonicalize(const std::vector<float> &s) {
00185 if (RMAX_DEBUG) cout << "canonicalize, s = " << &s << endl;
00186
00187
00188 const std::pair<std::set<std::vector<float> >::iterator, bool> result =
00189 statespace.insert(s);
00190 state_t retval = &*result.first;
00191
00192 if (RMAX_DEBUG) cout << " returns " << retval << endl;
00193
00194
00195 if (result.second) {
00196 initNewState(retval);
00197 }
00198
00199 return retval;
00200 }
00201
00202 void RMaxModel::initNewState(state_t s){
00203 if (RMAX_DEBUG) cout << "initNewState(s = " << s
00204 << ")" << endl;
00205
00206
00207 state_info* info = &(statedata[s]);
00208 initStateInfo(info);
00209
00210 }
00211
00212
00213
00214 void RMaxModel::initStateInfo(state_info* info){
00215 if (RMAX_DEBUG) cout << "initStateInfo()";
00216
00217 info->id = nstates++;
00218 if (RMAX_DEBUG) cout << " id = " << info->id << endl;
00219
00220
00221 info->visits.resize(nact, 0);
00222 info->Rsum.resize(nact, 0);
00223 info->known.resize(nact, false);
00224 info->terminations.resize(nact, 0);
00225
00226 }
00227
00228
00229 void RMaxModel::checkTransitionCountSize(std::vector<int>* transCounts){
00230 if (RMAX_DEBUG) cout << "checkTransitionCountSize(transCounts) "
00231 << "size: " << transCounts->size() << endl;
00232
00233
00234 if (transCounts->size() == 0)
00235 transCounts->resize(nact, 0);
00236
00237 }