FactoredModel.cc
Go to the documentation of this file.
00001 
00007 #include "FactoredModel.hh"
00008 
00009 
00010 FactoredModel::FactoredModel(int id, int numactions, int M, int modelType,
00011                  int predType, int nModels, float treeThreshold,
00012                  const std::vector<float> &featRange, float rRange,
00013                  bool needConf, bool dep, bool relTrans, float featPct, 
00014                  bool stoch, bool episodic, Random rng):
00015   rewardModel(NULL), terminalModel(NULL), 
00016   id(id), nact(numactions), M(M), modelType(modelType),
00017   predType(predType), nModels(nModels),
00018   treeBuildType(BUILD_ON_ERROR), // build tree after prediction error
00019   treeThresh(treeThreshold), featRange(featRange), rRange(rRange),
00020   needConf(needConf), dep(dep), relTrans(relTrans), FEAT_PCT(featPct), 
00021   stoch(stoch), episodic(episodic), rng(rng)
00022 {
00023 
00024   //cout << "MDP Tree explore type: " << predType << endl;
00025   MODEL_DEBUG = false; //true;
00026   COPYDEBUG = false;
00027 
00028   // percent of experiences to use for each model
00029   EXP_PCT = 0.55;//6; //0.4;
00030 
00031   // just to ensure the diff models are on different random values
00032   for (int i = 0; i < id; i++){
00033     rng.uniform(0, 1);
00034   }
00035 
00036 }
00037 
00038 
00039 FactoredModel::FactoredModel(const FactoredModel & m):
00040   rewardModel(NULL), terminalModel(NULL), 
00041   id(m.id), nact(m.nact), M(m.M), modelType(m.modelType),
00042   predType(m.predType), nModels(m.nModels),
00043   treeBuildType(m.treeBuildType),
00044   treeThresh(m.treeThresh), featRange(m.featRange), rRange(m.rRange),
00045   needConf(m.needConf), dep(m.dep), relTrans(m.relTrans), FEAT_PCT(m.FEAT_PCT),
00046   stoch(m.stoch), episodic(m.episodic), rng(m.rng)
00047 {
00048   COPYDEBUG = m.COPYDEBUG;
00049 
00050   if (COPYDEBUG) cout << "MDP Tree copy constructor" << endl;
00051   MODEL_DEBUG = m.MODEL_DEBUG;
00052   EXP_PCT = m.EXP_PCT;
00053   nfactors = m.nfactors;
00054 
00055 
00056   if (m.outputModels.size() > 0){
00057     if (COPYDEBUG) cout << " FactoredModel copy trees" << endl;
00058     rewardModel = m.rewardModel->getCopy();
00059     if (m.terminalModel != NULL) terminalModel = m.terminalModel->getCopy();
00060     if (COPYDEBUG) cout << " copy output trees" << endl;
00061     outputModels.resize(m.outputModels.size());
00062     for (unsigned i = 0; i < m.outputModels.size(); i++){
00063       outputModels[i] = m.outputModels[i]->getCopy();
00064     }
00065     if (COPYDEBUG) cout << " FactoredModel trees copied" << endl;
00066   }
00067   if (COPYDEBUG) cout << "FactoredModel copy complete " << endl;
00068 }
00069 
00070 FactoredModel* FactoredModel::getCopy(){
00071 
00072   FactoredModel* copy = new FactoredModel(*this);
00073   return copy;
00074 
00075 }
00076 
00077 
00078 FactoredModel::~FactoredModel() {
00079   if (rewardModel != NULL) delete rewardModel;
00080   if (terminalModel != NULL) delete terminalModel;
00081   for (unsigned i = 0; i < outputModels.size(); i++){
00082     delete outputModels[i];
00083   }
00084   outputModels.clear();
00085 }
00086 
00087 
00088 
00089 // init the trees
00090 bool FactoredModel::initMDPModel(int nfactors){
00091   if (MODEL_DEBUG) cout << "Init trees for each state factor and reward" << endl;
00092 
00093   outputModels.resize(nfactors);
00094 
00095   bool simpleRegress = false;
00096   if (modelType == M5SINGLE || modelType == M5ALLSINGLE || modelType == LSTSINGLE)
00097     simpleRegress = true;
00098 
00099   // institute a model for each state factor, depending on model type
00100   for (int i = 0; i < nfactors; i++){
00101     if (modelType == C45TREE && nModels == 1){
00102       outputModels[i] = new C45Tree((id * (nfactors+1)) + i, treeBuildType, 5, M, 0, rng);
00103       if (i == 0){
00104         rewardModel = new C45Tree((id*(nfactors+1))+nfactors, treeBuildType,5, M, 0, rng);
00105         if (episodic) terminalModel = new C45Tree((id*(nfactors+1))+nfactors+1, treeBuildType,5, M, 0, rng);
00106       }
00107     }
00108     else if ((modelType == M5MULTI || modelType == M5SINGLE) && nModels == 1){
00109       outputModels[i] = new M5Tree((id * (nfactors+1)) + i, treeBuildType, 5, M, 0, simpleRegress, false, treeThresh *featRange[i], rng);
00110       if (i == 0){
00111         rewardModel = new M5Tree((id * (nfactors+1)) + nfactors, treeBuildType, 5, M, 0, simpleRegress, false, treeThresh *rRange, rng);
00112         if (episodic) terminalModel = new M5Tree((id * (nfactors+1)) + 1+nfactors, treeBuildType, 5, M, 0, simpleRegress, false, treeThresh, rng);
00113       }
00114     }
00115     else if ((modelType == M5ALLMULTI || modelType == M5ALLSINGLE) && nModels == 1){
00116       outputModels[i] = new M5Tree((id * (nfactors+1)) + i, treeBuildType, 5, M, 0, simpleRegress, true, treeThresh *featRange[i], rng);
00117       if (i == 0){
00118         rewardModel = new M5Tree((id * (nfactors+1)) + nfactors, treeBuildType, 5, M, 0, simpleRegress, true, treeThresh *rRange, rng);
00119         if (episodic) terminalModel = new M5Tree((id * (nfactors+1)) + 1+nfactors, treeBuildType, 5, M, 0, simpleRegress, true, treeThresh, rng);
00120       }
00121     }
00122     else if ((modelType == LSTMULTI || modelType == LSTSINGLE) && nModels == 1){
00123       outputModels[i] = new LinearSplitsTree((id * (nfactors+1)) + i, treeBuildType, 5, M, 0, simpleRegress, treeThresh *featRange[i], rng);
00124       if (i == 0){
00125         rewardModel = new LinearSplitsTree((id * (nfactors+1)) + nfactors, treeBuildType, 5, M, 0, simpleRegress, treeThresh *rRange, rng);
00126         if (episodic) terminalModel = new LinearSplitsTree((id * (nfactors+1)) + 1+nfactors, treeBuildType, 5, M, 0, simpleRegress, treeThresh, rng);
00127       }
00128     }
00129     else if (modelType == STUMP && nModels == 1){
00130       outputModels[i] = new Stump((id * (nfactors+1)) + i, 1, 5, M, 0, rng);
00131       if (i == 0){
00132         rewardModel = new Stump((id * (nfactors+1)) + nfactors, 1, 5, M, 0, rng);
00133         if (episodic) terminalModel = new Stump((id * (nfactors+1)) +1+ nfactors, 1, 5, M, 0, rng);
00134       }
00135     }
00136     else if (predType == SEPARATE && nModels > 1){
00137       outputModels[i] = new SepPlanExplore((id * (nfactors+1)) + i,
00138                                           modelType, predType,
00139                                           nModels, treeBuildType, 5,
00140                                           FEAT_PCT,
00141                                           EXP_PCT,
00142                                            treeThresh *featRange[i], stoch, featRange[i], rng);
00143       if (i == 0){
00144         rewardModel = new SepPlanExplore((id * (nfactors+1)) + nfactors,
00145                                         modelType, predType,
00146                                         nModels, treeBuildType, 5,
00147                                         FEAT_PCT, // remove this pct of feats
00148                                          EXP_PCT, treeThresh *rRange, stoch, rRange, rng);
00149         if (episodic){
00150           terminalModel = new SepPlanExplore((id * (nfactors+1)) +1+ nfactors,
00151                                        modelType, predType,
00152                                        nModels, treeBuildType, 5,
00153                                        FEAT_PCT, // remove this pct of feats
00154                                        EXP_PCT, treeThresh, stoch, 1.0, rng);
00155         }
00156       }
00157     }
00158     else if (nModels > 1 || modelType == ALLM5TYPES){
00159       outputModels[i] = new MultipleClassifiers((id * (nfactors+1)) + i,
00160                                                modelType, predType,
00161                                                nModels, treeBuildType, 5,
00162                                                FEAT_PCT,
00163                                                EXP_PCT,
00164                                                 treeThresh *featRange[i], stoch, featRange[i], rng);
00165       if (i == 0){
00166         rewardModel = new MultipleClassifiers((id * (nfactors+1)) + nfactors,
00167                                              modelType, predType,
00168                                              nModels, treeBuildType, 5,
00169                                              FEAT_PCT, // remove this pct of feats
00170                                               EXP_PCT, treeThresh *rRange, stoch, rRange, rng);
00171         if (episodic){
00172           terminalModel = new MultipleClassifiers((id * (nfactors+1)) +1+ nfactors,
00173                                             modelType, predType,
00174                                             nModels, treeBuildType, 5,
00175                                             FEAT_PCT, // remove this pct of feats
00176                                             EXP_PCT, treeThresh, stoch, 1.0, rng);
00177         }
00178       }
00179     } else {
00180       cout << "Invalid model type for MDP TREE" << endl;
00181       exit(-1);
00182     }
00183 
00184   }
00185 
00186   return true;
00187 
00188 }
00189 
00190 
00191 // update all trees with multiple experiences
00192 bool FactoredModel::updateWithExperiences(std::vector<experience> &instances){
00193   if (MODEL_DEBUG) cout << "FactoredModel updateWithExperiences : " << instances.size() << endl;
00194 
00195   bool changed = false;
00196   if (outputModels.size() == 0){
00197     nfactors = instances[0].next.size();
00198     initMDPModel(instances[0].next.size());
00199   }
00200 
00201   // make sure size is right
00202   if (outputModels.size() != instances[0].next.size()){
00203     if (MODEL_DEBUG)
00204       cout << "ERROR: size mismatch between input vector and # trees "
00205            << outputModels.size() << ", " << instances[0].next.size() << endl;
00206     return false;
00207     exit(-1);
00208   }
00209 
00210   // separate these experience instances into classPairs
00211   std::vector<std::vector<classPair> > stateData(outputModels.size());
00212   std::vector<classPair> rewardData(instances.size());
00213   std::vector<classPair> termData(instances.size());
00214 
00215   // count non-terminal experiences
00216   int nonTerm = 0;
00217   for (unsigned i = 0; i < instances.size(); i++){
00218     if (!instances[i].terminal)
00219       nonTerm++;
00220   }
00221   for (unsigned i = 0; i < outputModels.size(); i++){
00222     stateData[i].resize(nonTerm);
00223   }
00224   int nonTermIndex = 0;
00225 
00226   for (unsigned i = 0; i < instances.size(); i++){
00227     experience e = instances[i];
00228 
00229     std::vector<float> inputs(e.s.size() + nact);
00230 
00231     for (unsigned k = 0; k < e.s.size(); k++){
00232       inputs[k] = e.s[k];
00233     }
00234     // convert to binary vector of length nact
00235     for (int k = 0; k < nact; k++){
00236       if (e.act == k)
00237         inputs[e.s.size()+k] = 1;
00238       else
00239         inputs[e.s.size()+k] = 0;
00240     }
00241 
00242     // convert to rel
00243     if (relTrans)
00244       e.next = subVec(e.next, e.s);
00245 
00246     // reward and terminal models
00247     classPair cp;
00248     cp.in = inputs;
00249     cp.out = e.reward;
00250     rewardData[i] = cp;
00251 
00252     cp.out = e.terminal;
00253     termData[i] = cp;
00254 
00255     // add to each vector
00256     if (!e.terminal){
00257       for (unsigned j = 0; j < outputModels.size(); j++){
00258         classPair cp;
00259         cp.in = inputs;
00260 
00261         // split the outcome and rewards up
00262         // into each vector
00263         cp.out = e.next[j];
00264         stateData[j][nonTermIndex] = cp;
00265 
00266         // for dep trees, add this models target to next model's input
00267         if (dep){
00268           inputs.push_back(e.next[j]);
00269         }
00270       }
00271       nonTermIndex++;
00272     }
00273 
00274   }
00275 
00276   // build trees on all data
00277   for (unsigned k = 0; k < stateData.size(); k++){
00278     if (stateData[k].size() > 0){
00279       bool singleChange = outputModels[k]->trainInstances(stateData[k]);
00280       changed = changed || singleChange;
00281     }
00282   }
00283 
00284   bool singleChange = rewardModel->trainInstances(rewardData);
00285   changed = changed || singleChange;
00286 
00287   if (episodic){
00288     singleChange = terminalModel->trainInstances(termData);
00289     changed = changed || singleChange;
00290   }
00291 
00292   return changed;
00293 }
00294 
00295 
00296 // update all the trees, check if model has changed
00297 bool FactoredModel::updateWithExperience(experience &e){
00298   if (MODEL_DEBUG) cout << "updateWithExperience " << &(e.s) << ", " << e.act
00299                        << ", " << &(e.next) << ", " << e.reward << endl;
00300 
00301   if (MODEL_DEBUG){
00302     cout << "From: ";
00303     for (unsigned i = 0; i < e.s.size(); i++){
00304       cout << e.s[i] << ", ";
00305     }
00306     cout << "Action: " << e.act << endl;;
00307     cout << "To: ";
00308     for (unsigned i = 0; i < e.next.size(); i++){
00309       cout << e.next[i] << ", ";
00310     }
00311     cout << "Reward: " << e.reward 
00312          << " term: " << e.terminal << endl;
00313   }
00314 
00315   bool changed = false;
00316 
00317   if (outputModels.size() == 0){
00318     nfactors = e.next.size();
00319     initMDPModel(e.next.size());
00320   }
00321 
00322   // make sure size is right
00323   if (outputModels.size() != e.next.size()){
00324     if (MODEL_DEBUG) cout << "ERROR: size mismatch between input vector and # trees "
00325                          << outputModels.size() << ", " << e.next.size() << endl;
00326     return false;
00327     exit(-1);
00328   }
00329 
00330   std::vector<float> inputs(e.s.size() + nact);
00331   for (unsigned i = 0; i < e.s.size(); i++){
00332     inputs[i] = e.s[i];
00333   }
00334   // convert to binary vector of length nact
00335   for (int k = 0; k < nact; k++){
00336     if (e.act == k)
00337       inputs[e.s.size()+k] = 1;
00338     else
00339       inputs[e.s.size()+k] = 0;
00340   }
00341 
00342   // convert to rel
00343   if (relTrans)
00344     e.next = subVec(e.next, e.s);
00345 
00346   // split the outcome and rewards up
00347   // and train the trees
00348   classPair cp;
00349   cp.in = inputs;
00350 
00351   // reward model
00352   cp.out = e.reward;
00353   bool singleChange = rewardModel->trainInstance(cp);
00354   changed = changed || singleChange;
00355 
00356   // termination model
00357   if (episodic){
00358     cp.out = e.terminal;
00359     singleChange = terminalModel->trainInstance(cp);
00360     changed = changed || singleChange;
00361   }
00362 
00363   // if not a terminal transition
00364   if (!e.terminal){
00365     for (unsigned i = 0; i < e.next.size(); i++){
00366       cp.in = inputs;
00367       cp.out = e.next[i];
00368 
00369       bool singleChange = outputModels[i]->trainInstance(cp);
00370       changed = changed || singleChange;
00371 
00372       // add this model's target to input for next model
00373       if (dep){
00374         inputs.push_back(e.next[i]);
00375       }
00376     }
00377   }
00378 
00379   if (MODEL_DEBUG) cout << "Model updated, changed: " << changed << endl;
00380   return changed;
00381 
00382 }
00383 
00384 
00385 float FactoredModel::getSingleSAInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00386 
00387   retval->transitionProbs.clear();
00388 
00389   if (outputModels.size() == 0){
00390     retval->reward = -0.001;
00391 
00392     // add to transition map
00393     retval->transitionProbs[state] = 1.0;
00394     retval->known = false;
00395     retval->termProb = 0.0;
00396     return 0;
00397   }
00398 
00399   // input we want predictions for
00400   std::vector<float> inputs(state.size() + nact);
00401   for (unsigned i = 0; i < state.size(); i++){
00402     inputs[i] = state[i];
00403   }
00404   // convert to binary vector of length nact
00405   for (int k = 0; k < nact; k++){
00406     if (act == k)
00407       inputs[state.size()+k] = 1;
00408     else
00409       inputs[state.size()+k] = 0;
00410   }
00411 
00412   // just pick one sample from each feature prediction
00413   std::vector<float>output(nfactors);
00414   for (int i = 0; i < nfactors; i++){
00415 
00416     // get prediction
00417     std::map<float, float> outputPreds;
00418     outputModels[i]->testInstance(inputs, &outputPreds);
00419 
00420     // sample a value
00421     float randProb = rng.uniform();
00422     float probSum = 0;
00423     for (std::map<float, float>::iterator it1 = outputPreds.begin(); it1 != outputPreds.end(); it1++){
00424 
00425       // get prob
00426       probSum += (*it1).second;
00427 
00428       if (randProb <= probSum){
00429         output[i] = (*it1).first;
00430         break;
00431       }
00432     }
00433   }
00434 
00435   retval->transitionProbs[output] = 1.0;
00436 
00437   // calculate reward and terminal probabilities
00438   // calculate expected reward
00439   float rewardSum = 0.0;
00440   // each value
00441   std::map<float, float> rewardPreds;
00442   rewardModel->testInstance(inputs, &rewardPreds);
00443 
00444   float totalVisits = 0.0;
00445   for (std::map<float, float>::iterator it = rewardPreds.begin(); it != rewardPreds.end(); it++){
00446     // get key from iterator
00447     float val = (*it).first;
00448     float prob = (*it).second;
00449     totalVisits += prob;
00450     if (MODEL_DEBUG) cout << "Reward value " << val << " had prob of " << prob << endl;
00451     rewardSum += (prob * val);
00452   }
00453 
00454   retval->reward = rewardSum / totalVisits;
00455   if (MODEL_DEBUG) cout << "Average reward was " << retval->reward << endl;
00456 
00457   if (isnan(retval->reward))
00458     cout << "FactoredModel setting model reward to NaN" << endl;
00459 
00460 
00461   // get termination prob
00462   std::map<float, float> termProbs;
00463   if (!episodic){
00464     termProbs[0.0] = 1.0;
00465   } else {
00466     terminalModel->testInstance(inputs, &termProbs);
00467   }
00468   // this needs to be a weighted sum.
00469   // discrete trees will give some probabilty of termination (outcome 1)
00470   // where continuous ones will give some value between 0 and 1
00471   float termSum = 0;
00472   float probSum = 0;
00473   for (std::map<float, float>::iterator it = termProbs.begin(); it != termProbs.end(); it++){
00474     // get key from iterator
00475     float val = (*it).first;
00476     if (val > 1.0) val = 1.0;
00477     if (val < 0.0) val = 0.0;
00478     float prob = (*it).second;
00479     if (MODEL_DEBUG) cout << "Term value " << val << " had prob of " << prob << endl;
00480     termSum += (prob * val);
00481     probSum += prob;
00482   }
00483 
00484   retval->termProb = termSum / probSum;
00485   if (retval->termProb < 0 || retval->termProb > 1){
00486     cout << "Invalid termination probability!!! " << retval->termProb << endl;
00487   }
00488   if (MODEL_DEBUG) cout << "Termination prob is " << retval->termProb << endl;
00489 
00490   return 1.0;
00491 
00492 }
00493 
00494 
00495 // fill in StateActionInfo struct and return it
00496 float FactoredModel::getStateActionInfo(const std::vector<float> &state, int act, StateActionInfo* retval){
00497   if (MODEL_DEBUG) cout << "getStateActionInfo, " << &state <<  ", " << act << endl;
00498 
00499 
00500 
00501   if (MODEL_DEBUG){
00502     for (unsigned i = 0; i < state.size(); i++){
00503       cout << state[i] << ", ";
00504     }
00505     //    cout << endl;
00506     cout << "a: " << act << " has " << retval->transitionProbs.size() << " outcomes already" << endl;
00507   }
00508 
00509   retval->transitionProbs.clear();
00510 
00511   if (outputModels.size() == 0){
00512     retval->reward = -0.001;
00513 
00514     // add to transition map
00515     retval->transitionProbs[state] = 1.0;
00516     retval->known = false;
00517     retval->termProb = 0.0;
00518     return 0;
00519   }
00520 
00521   // input we want predictions for
00522   std::vector<float> inputs(state.size() + nact);
00523   for (unsigned i = 0; i < state.size(); i++){
00524     inputs[i] = state[i];
00525   }
00526   // convert to binary vector of length nact
00527   for (int k = 0; k < nact; k++){
00528     if (act == k)
00529       inputs[state.size()+k] = 1;
00530     else
00531       inputs[state.size()+k] = 0;
00532   }
00533 
00534 
00535   // get the separate predictions for each outcome variable from the respective trees
00536   // combine together for outcome predictions
00537 
00538   // combine together and put into StateActionInfo struct
00539   retval->known = true;
00540   float confSum = 0.0;
00541 
00542   if (nModels == 1 &&
00543       (modelType == M5MULTI || modelType == M5SINGLE ||
00544        modelType == M5ALLMULTI || modelType == M5ALLSINGLE ||
00545        modelType == LSTMULTI || modelType == LSTSINGLE ||
00546        modelType == ALLM5TYPES)){
00547     //cout << "mdptree, combine deterministic outputs for each feature" << endl;
00549     // alternate version -> assuming one model that gives one prediction
00551     std::vector<float> MLnext(nfactors);
00552     std::vector<float> inputCopy = inputs;
00553     for (int i = 0; i < nfactors; i++){
00554       // get single outcome for this factor
00555       std::map<float, float> outputPreds;
00556       outputModels[i]->testInstance(inputCopy, &outputPreds);
00557       if (needConf && dep) confSum += outputModels[i]->getConf(inputCopy);
00558       float val = outputPreds.begin()->first;
00559       if (relTrans) val = val + inputs[i];
00560       MLnext[i] = val;
00561       if (dep){
00562         inputCopy.push_back(val);
00563       }
00564     }
00565     //add this one
00566     retval->transitionProbs[MLnext] = 1.0;
00567     if (MODEL_DEBUG){
00568       cout << "Final prob of outcome: ";
00569       for (int i = 0; i < nfactors; i++){
00570         cout << MLnext[i] << ", ";
00571       }
00572       cout << " is " << 1.0 << endl;
00573     }
00575   }
00576 
00577   else {
00578     //cout << "mdp tree, combine stochastic predictions for each feature" << endl;
00580     // Full version: assume possibly stochastic prediction for each model
00582     // grab predicted transition probs just once
00583     std::vector< std::map<float,float> > predictions(nfactors);
00584     if (!dep){
00585       for (int i = 0; i < nfactors; i++){
00586         outputModels[i]->testInstance(inputs, &(predictions[i]));
00587       }
00588     }
00589 
00591     // get probability of each transition
00592     float* probs = new float[nfactors];
00593     std::vector<float> next(nfactors, 0);
00594     addFactorProb(probs, &next, inputs, retval, 0, predictions, &confSum);
00595     delete[] probs;
00596 
00598   }
00599 
00600   // calculate expected reward
00601   float rewardSum = 0.0;
00602   // each value
00603   std::map<float, float> rewardPreds;
00604   rewardModel->testInstance(inputs, &rewardPreds);
00605 
00606   if (rewardPreds.size() == 0){
00607     //cout << "FactoredModel setting state known false" << endl;
00608     retval->known = false;
00609     return 0;
00610   }
00611 
00612   float totalVisits = 0.0;
00613   for (std::map<float, float>::iterator it = rewardPreds.begin(); it != rewardPreds.end(); it++){
00614     // get key from iterator
00615     float val = (*it).first;
00616     float prob = (*it).second;
00617     totalVisits += prob;
00618     if (MODEL_DEBUG) cout << "Reward value " << val << " had prob of " << prob << endl;
00619     rewardSum += (prob * val);
00620 
00621   }
00622 
00623   retval->reward = rewardSum / totalVisits;
00624   if (MODEL_DEBUG) cout << "Average reward was " << retval->reward << endl;
00625 
00626   if (isnan(retval->reward))
00627     cout << "FactoredModel setting model reward to NaN" << endl;
00628 
00629 
00630   // get termination prob
00631   std::map<float, float> termProbs;
00632   if (!episodic){
00633     termProbs[0.0] = 1.0;
00634   } else {
00635     terminalModel->testInstance(inputs, &termProbs);
00636   }
00637   // this needs to be a weighted sum.
00638   // discrete trees will give some probabilty of termination (outcome 1)
00639   // where continuous ones will give some value between 0 and 1
00640   float termSum = 0;
00641   float probSum = 0;
00642   for (std::map<float, float>::iterator it = termProbs.begin(); it != termProbs.end(); it++){
00643     // get key from iterator
00644     float val = (*it).first;
00645     if (val > 1.0) val = 1.0;
00646     if (val < 0.0) val = 0.0;
00647     float prob = (*it).second;
00648     if (MODEL_DEBUG) cout << "Term value " << val << " had prob of " << prob << endl;
00649     termSum += (prob * val);
00650     probSum += prob;
00651   }
00652 
00653   retval->termProb = termSum / probSum;
00654   if (retval->termProb < 0 || retval->termProb > 1){
00655     cout << "Invalid termination probability!!! " << retval->termProb << endl;
00656   }
00657   if (MODEL_DEBUG) cout << "Termination prob is " << retval->termProb << endl;
00658 
00659   // if we need confidence measure
00660   if (needConf){
00661     // conf is avg of each variable's model's confidence
00662     float rConf = rewardModel->getConf(inputs);
00663     float tConf = 1.0;
00664     if (episodic)
00665       tConf = terminalModel->getConf(inputs);
00666 
00667     //cout << "conf is " << confSum << ", r: " << rConf << ", " << tConf << endl;
00668 
00669     confSum += rConf + tConf;
00670 
00671     if (!dep){
00672       for (int i = 0; i < nfactors; i++){
00673         float featConf = outputModels[i]->getConf(inputs);
00674         confSum += featConf;
00675         //cout << "indep, conf for " << i << ": " << featConf << endl;
00676       }
00677     }
00678     confSum /= (float)(state.size() + 2.0);
00679   } else {
00680     confSum = 1.0;
00681   }
00682 
00683   if (MODEL_DEBUG) cout << "avg conf returned " << confSum << endl;
00684 
00685   //cout << "now has " << retval->transitionProbs.size() << " outcomes" << endl;
00686 
00687   // return filled-in struct
00688   retval->known = true;
00689   return confSum;
00690 
00691 }
00692 
00693 
00694 
00695 // gets the values/probs for index and adds them to the appropriate spot in the array
00696 void FactoredModel::addFactorProb(float* probs, std::vector<float>* next, std::vector<float> x, StateActionInfo* retval, int index, std::vector< std::map<float,float> > predictions, float* confSum){
00697 
00698   // get values, probs etc for this index
00699   std::map<float, float> outputPreds = predictions[index];
00700 
00701   // get prediction each time for dep
00702   if (dep){
00703     outputModels[index]->testInstance(x, &outputPreds);
00704   }
00705 
00706   // sum up confidences
00707   if (dep && needConf){
00708     float conf = outputModels[index]->getConf(x);
00709     if (index > 0)
00710       (*confSum) += conf * probs[index-1];
00711     else
00712       (*confSum) += conf;
00713   }
00714 
00715   for (std::map<float, float>::iterator it1 = outputPreds.begin(); it1 != outputPreds.end(); it1++){
00716     // get key from iterator
00717     float val = (*it1).first;
00718 
00719     if (MODEL_DEBUG) cout << "Prob of outcome " << val << " on factor " << index << " is " << (*it1).second << endl;
00720 
00721     // ignore it if it has prob 0
00722     if ((*it1).second == 0){
00723       if (MODEL_DEBUG) cout << "Prob 0, ignore" << endl;
00724       continue;
00725     }
00726 
00727     if (dep){
00728       x.push_back(val);
00729     }
00730 
00731     if (relTrans)
00732       val = val + x[index];
00733 
00734     (*next)[index] = val;
00735     if (index == 0)
00736       probs[index] = (*it1).second;
00737     else
00738       probs[index] = probs[index-1] * (*it1).second;
00739 
00740     // if last one, lets set it in our transition prob map
00741     if (index == nfactors - 1 && probs[index] > 0.0){
00742 
00743       if (MODEL_DEBUG){
00744         cout << "Final prob of outcome: ";
00745         for (int i = 0; i < nfactors; i++){
00746           cout << (*next)[i] << ", ";
00747         }
00748         cout << " is " << probs[index] << endl;
00749         cout << " was " << retval->transitionProbs[*next] << endl;
00750         cout << " now " << (retval->transitionProbs[*next]+probs[index]) << endl;
00751       }
00752 
00753       retval->transitionProbs[*next] += probs[index];
00754       continue;
00755     }
00756 
00757     // next factors
00758     addFactorProb(probs, next, x, retval, index+1, predictions, confSum);
00759 
00760   }
00761 }
00762 
00763 std::vector<float> FactoredModel::addVec(const std::vector<float> &a, const std::vector<float> &b){
00764   //if (a.size() != b.size())
00765   // cout << "ERROR: vector sizes wrong" << endl;
00766 
00767 
00768   int smaller = a.size();
00769   if (b.size() < a.size())
00770     smaller = b.size();
00771 
00772   std::vector<float> c(smaller, 0.0);
00773   for (int i = 0; i < smaller; i++){
00774     c[i] = a[i] + b[i];
00775   }
00776 
00777   return c;
00778 }
00779 
00780 std::vector<float> FactoredModel::subVec(const std::vector<float> &a, const std::vector<float> &b){
00781   //if (a.size() != b.size())
00782   // cout << "ERROR: vector sizes wrong" << endl;
00783 
00784   int smaller = a.size();
00785   if (b.size() < a.size())
00786     smaller = b.size();
00787 
00788   std::vector<float> c(smaller, 0.0);
00789   for (int i = 0; i < smaller; i++){
00790     c[i] = a[i] - b[i];
00791   }
00792 
00793   return c;
00794 }
00795 


rl_agent
Author(s): Todd Hester
autogenerated on Thu Jun 6 2019 22:00:13