00001
00002
00003
00004
00005
00006 #include "FactoredPomdp.h"
00007
00008 using namespace momdp;
00009
00010 struct EndState{
00011 int endStateX;
00012 int endStateY;
00013 double probX;
00014 double probY;
00015 } typedef EndState;
00016
00017 struct IndexProbTuple{
00018 int index;
00019 double prob;
00020 } typedef IndexProbTuple;
00021
00022 FactoredPomdp::FactoredPomdp()
00023 {
00024 preprocessBeliefTablesDone = false;
00025 }
00026
00027 FactoredPomdp::FactoredPomdp(string f){
00028 preprocessBeliefTablesDone = false;
00029 filename = f;
00030 }
00031
00032 FactoredPomdp::~FactoredPomdp() {
00033 DEBUG_LOG (cout << "destroying FactoredPomdp" << endl;);
00034
00035
00036 DEBUG_LOG (cout << "FactoredPomdp destroyed" << endl; );
00037 }
00038
00040
00041
00042 void FactoredPomdp::Tokenize(const string& str, vector<string>& tokens, const string& delimiters)
00043 {
00044
00045
00046 string::size_type lastPos = str.find_first_not_of(delimiters, 0);
00047
00048 string::size_type pos = str.find_first_of(delimiters, lastPos);
00049
00050 while (string::npos != pos || string::npos != lastPos)
00051 {
00052
00053 tokens.push_back(str.substr(lastPos, pos - lastPos));
00054
00055 lastPos = str.find_first_not_of(delimiters, pos);
00056
00057 pos = str.find_first_of(delimiters, lastPos);
00058 }
00059 }
00060
00062
00065
00066
00067 State FactoredPomdp::createState(TiXmlElement* varChild)
00068 {
00069
00070 string vnamePrev = varChild->Attribute("vnamePrev");
00071 string vnameCurr = varChild->Attribute("vnameCurr");
00072 const char* fullyObserved;
00073
00074 if (varChild->Attribute("fullyObs"))
00075 fullyObserved = varChild->Attribute("fullyObs");
00076 else
00077 fullyObserved = "false";
00078
00079
00080
00081 string valueEnumList = "";
00082 const char* valueConciseNum = NULL;
00083
00084 State newState;
00085 newState.setVNamePrev(vnamePrev);
00086 newState.setVNameCurr(vnameCurr);
00087 if (strcmp(fullyObserved,"true")==0)
00088 newState.setObserved(true);
00089 else
00090 newState.setObserved(false);
00091 vector<string> tokens;
00092
00093 if (varChild->FirstChild("ValueEnum")) {
00094 valueEnumList = varChild->FirstChild("ValueEnum")->ToElement()->GetText();
00095 Tokenize(valueEnumList, tokens);
00096 } else {
00097 valueConciseNum = varChild->FirstChild("NumValues")->ToElement()->GetText();
00098 int numOfStates = atoi(valueConciseNum);
00099
00100 for (int i = 0; i < numOfStates; i++) {
00101 char buffer[10];
00102 sprintf(buffer, "s%d", i);
00103 tokens.push_back(buffer);
00104 }
00105 }
00106 newState.setValueEnum(tokens);
00107
00108 return newState;
00109 }
00110
00111 ObsAct FactoredPomdp::createObservation(TiXmlElement* varChild) {
00112
00113 ObsAct obs;
00114
00115 string vname = varChild->Attribute("vname");
00116 string valueEnumList = "";
00117 const char* valueConciseNum = NULL;
00118
00119 obs.setVName(vname);
00120 vector<string> tokens;
00121 if (varChild->FirstChild("ValueEnum")) {
00122 valueEnumList
00123 = varChild->FirstChild("ValueEnum")->ToElement()->GetText();
00124 Tokenize(valueEnumList, tokens);
00125 } else {
00126 valueConciseNum
00127 = varChild->FirstChild("NumValues")->ToElement()->GetText();
00128 int numOfObs = atoi(valueConciseNum);
00129 for (int i = 0; i < numOfObs; i++) {
00130 char buffer[10];
00131 sprintf(buffer, "o%d", i);
00132 tokens.push_back(buffer);
00133 }
00134 }
00135
00136 obs.setValueEnum(tokens);
00137
00138 return obs;
00139 }
00140
00141 ObsAct FactoredPomdp::createAction(TiXmlElement* varChild) {
00142
00143 ObsAct act;
00144
00145 string vname = varChild->Attribute("vname");
00146
00147 act.setVName(vname);
00148 vector<string> tokens;
00149 string valueEnumList = "";
00150 const char* valueConciseNum = NULL;
00151 if (varChild->FirstChild("ValueEnum")) {
00152 valueEnumList
00153 = varChild->FirstChild("ValueEnum")->ToElement()->GetText();
00154 Tokenize(valueEnumList, tokens);
00155 } else {
00156 valueConciseNum
00157 = varChild->FirstChild("NumValues")->ToElement()->GetText();
00158 int numOfAct = atoi(valueConciseNum);
00159 for (int i = 0; i < numOfAct; i++) {
00160 char buffer[10];
00161 sprintf(buffer, "a%d", i);
00162 tokens.push_back(buffer);
00163 }
00164 }
00165
00166 act.setValueEnum(tokens);
00167
00168 return act;
00169 }
00170
00171
00173
00174
00177
00178
00179
00180 const bool FactoredPomdp::checkIdentityIsValid(vector<string> tokens) const {
00181 bool valid = true;
00182 int numDash = 0;
00183 for (unsigned int i=0; i < tokens.size()-1; i++) {
00184 if (strcmp((tokens.at(i).c_str()),"-") == 0)
00185 numDash++;
00186 }
00187 if (numDash != 1) valid = false;
00188 if (strcmp((tokens.at(tokens.size()-1).c_str()),"-") != 0)
00189 valid = false;
00190 return valid;
00191 }
00192
00193 const bool FactoredPomdp::checkStateNameExists(string stateName) const {
00194 for (unsigned int i=0; i < stateList.size(); i++) {
00195 if (stateList[i].getVNamePrev() == stateName || stateList[i].getVNameCurr() == stateName)
00196 return true;
00197 }
00198 return false;
00199 }
00200
00201 const bool FactoredPomdp::checkObsNameExists(string obsName) const{
00202 for (unsigned int i=0; i < observationList.size(); i++) {
00203 if (observationList[i].getVName() == obsName)
00204 return true;
00205 }
00206 return false;
00207 }
00208
00209 const bool FactoredPomdp::checkActionNameExists(string actionName) const{
00210 for (unsigned int i=0; i < actionList.size(); i++) {
00211 if (actionList[i].getVName() == actionName)
00212 return true;
00213 }
00214 return false;
00215 }
00216
00217 const bool FactoredPomdp::checkRewardNameExists(string rewardName) const{
00218 for (unsigned int i=0; i < rewardList.size(); i++) {
00219 if (rewardList[i].getVName() == rewardName)
00220 return true;
00221 }
00222 return false;
00223 }
00224
00225 const bool FactoredPomdp::checkTerminalNameExists(string terminalName) const{
00226 for (unsigned int i=0; i < terminalStateRewardList.size(); i++) {
00227 if (terminalStateRewardList[i].getVName() == terminalName)
00228 return true;
00229 }
00230 return false;
00231 }
00232
00233 const bool FactoredPomdp::checkParentNameExists(string parentName) const{
00234 if (checkStateNameExists(parentName) || (checkActionNameExists(parentName) || (checkObsNameExists(parentName))))
00235 return true;
00236 else
00237 return false;
00238 }
00239
00240 const bool FactoredPomdp::checkInstanceMatchesParent(string instanceName, string parent) {
00241 StateObsAct* soa = mymap[parent];
00242 if (soa->containsInstance(instanceName)) return true;
00243 else return false;
00244 }
00245
00246
00247 const State& FactoredPomdp::findState(string varName) {
00248 for (unsigned int i=0; i < stateList.size(); i++) {
00249 if (stateList[i].getVNamePrev() == varName || stateList[i].getVNameCurr() == varName)
00250 return stateList[i];
00251 }
00252 cerr << "State named \" " << varName << "\"cannot be found" << endl;
00253 exit(XML_INPUT_ERROR);
00254 }
00255
00256 void FactoredPomdp::printXMLErrorHeader(TiXmlBase* base){
00257 cerr << "ERROR" <<endl;
00258 cerr << " " << filename <<":Line " << base->Row() <<":" << endl << " ";
00259 }
00260
00261 void FactoredPomdp::printXMLWarningHeader(TiXmlBase* base){
00262 cerr << "WARNING" <<endl;
00263 cerr << " " << filename <<":Line " << base->Row() <<":"<< endl << " ";
00264 }
00265
00266 Function FactoredPomdp::createFunction(TiXmlElement* pFunction, int whichFunction)
00267 {
00268
00269 const char* pvtable;
00270 const char* pvname;
00271 if (whichFunction == REWARDFUNCTION || whichFunction == TERMINALFUNCTION) {
00272 pvtable = "ValueTable";
00273 } else
00274 pvtable = "ProbTable";
00275
00276
00277 Function nFunction;
00278
00279 string vnameCurr = pFunction->FirstChild("Var")->ToElement()->GetText();
00280
00281
00282 switch (whichFunction)
00283 {
00284
00285 case BELIEFFUNCTION:
00286 case STATEFUNCTION:
00287 if (!(checkStateNameExists(vnameCurr))) {
00288 printXMLErrorHeader(pFunction->FirstChild("Var"));
00289 cerr << "In State Function: " << vnameCurr << " has not been declared. \n Check your pomdpX file for missing declaration within <Variable> tag." << endl;
00290 exit(XML_INPUT_ERROR);
00291 }
00292 break;
00293
00294 case OBSERVFUNCTION:
00295 if (!(checkObsNameExists(vnameCurr))) {
00296 printXMLErrorHeader(pFunction->FirstChild("Var"));
00297 cerr << "In Observation Function: " << vnameCurr << " has not been declared. \n Check your pomdpX file for missing declaration within <Variable> tag." << endl;
00298 exit(XML_INPUT_ERROR);
00299 }
00300 break;
00301 case REWARDFUNCTION:
00302
00303 if (!(checkRewardNameExists(vnameCurr))) {
00304 printXMLErrorHeader(pFunction->FirstChild("Var"));
00305 cerr << "In Reward Function: " << vnameCurr << " has not been declared. \n Check your pomdpX file for missing declaration within <Variable> tag." << endl;
00306 exit(XML_INPUT_ERROR);
00307 }
00308 break;
00309
00310 case TERMINALFUNCTION:
00311
00312 if (!(checkTerminalNameExists(vnameCurr))) {
00313 printXMLErrorHeader(pFunction->FirstChild("Var"));
00314 cerr << "In Terminal State Reward Function: " << vnameCurr << " has not been declared. \n Check your pomdpX file for missing declaration within <Variable> tag." << endl;
00315 exit(XML_INPUT_ERROR);
00316 }
00317 break;
00318 }
00319
00320 nFunction.setVNameCurr(vnameCurr);
00321
00322
00323 vector<string> parentTokens;
00324 Tokenize(pFunction->FirstChild("Parent")->ToElement()->GetText(), parentTokens);
00325
00326 for (int i=0; i < parentTokens.size(); i++) {
00327 if (!(checkParentNameExists(parentTokens[i]))) {
00328 if (!((parentTokens[i] == "null") && (whichFunction == BELIEFFUNCTION))) {
00329 printXMLErrorHeader(pFunction->FirstChild("Parent"));
00330 cerr << "In Parents, " << parentTokens[i] <<" has not been declared.\n Check your pomdpX file for missing declaration or invalid parent name" << endl;
00331 exit(XML_INPUT_ERROR);
00332 }
00333 }
00334 }
00335
00336 nFunction.setParents(parentTokens);
00337
00338 nFunction.initSparseTables(&mymap);
00339
00340
00341
00342 TiXmlElement *pInstance = pFunction->FirstChild("Parameter")->ToElement();
00343
00344 string paramType = "";
00345 if (pInstance->Attribute("type") == NULL) paramType = "TBL";
00346 else paramType = pInstance->Attribute("type");
00347 if (paramType == "DD") {
00348 cerr<<"Sorry the parameter type=\"DD\" is currently not implemented."<<endl;
00349 cerr<<"Please use type=\"TBL\" instead"<<endl;
00350 exit(XML_INPUT_ERROR);
00351 }
00352
00353 TiXmlElement *pNextSibling = pInstance->FirstChildElement("Entry");
00354
00355 while (pNextSibling != 0) {
00356 string instance = pNextSibling->FirstChildElement("Instance")->GetText();
00357 TiXmlElement *pInstance = pNextSibling->FirstChildElement("Instance");
00358 vector<string> insttokens;
00359 Tokenize(instance, insttokens);
00360
00361
00362
00363
00364
00365 switch (whichFunction)
00366 {
00367 case OBSERVFUNCTION:
00368 for (unsigned int i=0; i < parentTokens.size(); i++) {
00369 if (!(checkActionNameExists(parentTokens[i]))) {
00370 if (isPreviousTimeSlice(parentTokens[i])) {
00371 printXMLErrorHeader(pFunction->FirstChild("Parent"));
00372 cerr << "In Observation Function, all the parents must be of the next time slice." << endl;
00373 exit(XML_INPUT_ERROR);
00374 }
00375 }
00376 }
00377 case STATEFUNCTION:
00378
00379 if (parentTokens.size() + 1 != insttokens.size()) {
00380 printXMLErrorHeader(pInstance);
00381 cerr<<"In Function " << vnameCurr << endl;
00382 cerr<<" instance " << instance << " does not match the parents" << endl;
00383 cerr<<" Check if number of entries in the <Instance> tag is correct" << endl;
00384 exit(XML_INPUT_ERROR);
00385 }
00386
00387
00388 for (unsigned int i=0; i<parentTokens.size(); i++) {
00389 if (insttokens[i] != "*" && insttokens[i] != "-") {
00390 if (!checkInstanceMatchesParent(insttokens[i],parentTokens[i])) {
00391 printXMLErrorHeader(pInstance);
00392 cerr<<"In Function: " << vnameCurr << endl;
00393 cerr << " in <Instance> ";
00394 for (unsigned int j=0; j < insttokens.size(); j++)
00395 cerr << insttokens[j] << " ";
00396 cerr<< "\n "<< insttokens[i] << " does not exist "<< endl;
00397 exit(XML_INPUT_ERROR);
00398 }
00399 }
00400 }
00401
00402
00403 if (insttokens[insttokens.size()-1] != "*" && insttokens[insttokens.size()-1] != "-")
00404 {
00405 if (!checkInstanceMatchesParent(insttokens[insttokens.size() -1],vnameCurr))
00406 {
00407 printXMLErrorHeader(pInstance);
00408 cerr<<"In Function: " << vnameCurr << endl;
00409 cerr << " in <Instance> ";
00410 for (unsigned int j=0; j < insttokens.size(); j++)
00411 cerr << insttokens[j] << " ";
00412 cerr << "\n " << insttokens[insttokens.size()-1] << " does not exist "<< endl;
00413 exit(XML_INPUT_ERROR);
00414 }
00415 }
00416
00417 break;
00418
00419 case TERMINALFUNCTION:
00420 case REWARDFUNCTION:
00421 if (parentTokens.size() != insttokens.size()) {
00422 printXMLErrorHeader(pInstance);
00423 cerr<<"In Function: " << vnameCurr << endl;
00424 cerr<<" " << instance << " does not match the parents" << endl;
00425 cerr<<" Check if number of entries in the <Instance> tag is correct" << endl;
00426 exit(XML_INPUT_ERROR);
00427 }
00428
00429 for (unsigned int i=0; i<parentTokens.size(); i++) {
00430 if (insttokens[i] != "*" && insttokens[i] != "-") {
00431 if (!checkInstanceMatchesParent(insttokens[i],parentTokens[i])) {
00432 printXMLErrorHeader(pInstance);
00433 cerr<<"In Function: " << vnameCurr << endl;
00434 cerr << " in <Instance> ";
00435 for (unsigned int j=0; j < insttokens.size(); j++)
00436 cerr << insttokens[j] << " ";
00437 cerr<< "\n "<< insttokens[i] << " does not exist "<< endl;
00438 exit(XML_INPUT_ERROR);
00439 }
00440 }
00441 }
00442 break;
00443
00444
00445 case BELIEFFUNCTION:
00446 if (parentTokens.size() == 1 && parentTokens[0] == "null") {
00447
00448 if (parentTokens.size() != insttokens.size()) {
00449 printXMLErrorHeader(pInstance);
00450 cerr<<"In Belief Function " << vnameCurr << endl;
00451 cerr<<" Instance " << instance << " does not match the parents" << endl;
00452 cerr<<" Check if the <Instance> input is correct" << endl;
00453 exit(XML_INPUT_ERROR);
00454 }
00455
00456
00457
00458
00459
00460
00461
00462
00463 }
00464
00465 break;
00466
00467
00468 }
00469
00470 TiXmlElement *tableElement = pNextSibling->FirstChildElement(pvtable);
00471 if (tableElement == NULL) {
00472 cerr << "ERROR\n Cannot find tag " << pvtable << endl;
00473 exit(XML_INPUT_ERROR);
00474 }
00475 string table = tableElement->GetText();
00476 vector<double> probttokensdouble;
00477
00478 std::transform(table.begin(), table.end(),table.begin(), ::tolower);
00479
00480
00481 if (strcmp(table.c_str(),"uniform") == 0) {
00482 if (whichFunction == BELIEFFUNCTION) {
00483 if (parentTokens[0] == "null" && insttokens[0] == "-") {
00484 const State& s = findState(vnameCurr);
00485 int valueEnum = s.getValueEnum().size();
00486
00487 for (int i = 0; i < valueEnum; i++) {
00488 probttokensdouble.push_back(1.0 / valueEnum);
00489 }
00490 }else {
00491 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00492 cerr << "Parents must be null and instance must be - for \"uniform\"" << endl;
00493 exit(XML_INPUT_ERROR);
00494 }
00495 }else {
00496 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00497 cerr << "uniform keyword can only be used for initial belief" << endl;
00498 exit(XML_INPUT_ERROR);
00499 }
00500 } else if (strcmp(table.c_str(),"identity") == 0) {
00501
00502 if (checkIdentityIsValid(insttokens)) {
00503
00504 int indexDash = -99;
00505 for (unsigned int i=0; i < insttokens.size()-1; i++)
00506 if (insttokens[i] == "-") indexDash = i;
00507
00508 if (indexDash == -99) assert(false);
00509
00510 const State& lastParent = findState(parentTokens[indexDash]);
00511 const State& self = findState(vnameCurr);
00512
00513 if (lastParent.getValueEnum().size() == self.getValueEnum().size()) {
00514
00515 for (unsigned int i=0; i < self.getValueEnum().size(); i++) {
00516 for (unsigned int j=0; j < self.getValueEnum().size(); j++) {
00517 if (i==j)
00518 probttokensdouble.push_back(1);
00519 else
00520 probttokensdouble.push_back(0);
00521 }
00522 }
00523 }else {
00524 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00525 cerr<<"In Function " << vnameCurr << endl;
00526 cerr << " The last Parent token should have the same number of enumerated states as the Var itself if 'identity' keyword is used" << endl;
00527 exit(XML_INPUT_ERROR);
00528 }
00529
00530 }else {
00531 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00532 cerr <<"In Function " << vnameCurr << endl;
00533 cerr << " Error in using \"identity\" in Prob Table. Check whether <Instance> contains two '-' as the last variables" << endl;
00534 exit(XML_INPUT_ERROR);
00535 }
00536 }else {
00537
00538
00539 vector<string> probttokens;
00540 Tokenize(table, probttokens);
00541
00542
00543
00544 for (unsigned int i=0; i < probttokens.size(); i++) {
00545 if (probttokens[i] == "identity" || (probttokens[i] == "uniform")) {
00546 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00547 cerr << "The keyword \"" << probttokens[i] << "\" must be used without other arguments\n";
00548 exit(XML_INPUT_ERROR);
00549 }
00550 }
00551
00552 int numEntries = 1;
00553 int numTokens;
00554 if (whichFunction == REWARDFUNCTION || whichFunction == TERMINALFUNCTION)
00555 numTokens = insttokens.size();
00556 else
00557 numTokens = insttokens.size() -1;
00558
00559 for (unsigned int j=0 ; j < numTokens; j++) {
00560 if (insttokens[j] == "-") {
00561 StateObsAct* soa = mymap[parentTokens[j]];
00562 numEntries *= soa->getValueEnum().size();
00563 }
00564 }
00565
00566 if (whichFunction != REWARDFUNCTION && whichFunction != TERMINALFUNCTION) {
00567 if (insttokens[insttokens.size()-1] == "-") {
00568 StateObsAct* soa = mymap[vnameCurr];
00569 numEntries *= soa->getValueEnum().size();
00570 }
00571 }
00572
00573
00574 if (numEntries != probttokens.size()) {
00575 printXMLErrorHeader(pNextSibling->FirstChild(pvtable));
00576 cerr << "In <Instance>";
00577 for (unsigned int k=0; k < insttokens.size(); k++) {
00578 cerr << insttokens[k] << " ";
00579 }
00580 cerr << "</Instance>\n <ProbTable>";
00581 for (unsigned int k=0; k < probttokens.size() ; k++) {
00582 cerr << probttokens[k] << " ";
00583 }
00584 cerr << "</ProbTable>\n ";
00585 cerr << "The ProbTable does not contain the correct number of entries."<< endl;
00586 exit(XML_INPUT_ERROR);
00587 }
00588
00589
00590 for (unsigned int i = 0; i < probttokens.size(); i++) {
00591 probttokensdouble.push_back(atof(probttokens[i].c_str()));
00592 }
00593
00594 }
00595
00596 if (whichFunction == REWARDFUNCTION || whichFunction == TERMINALFUNCTION)
00597 insttokens.push_back(REWVALUEENUM);
00598
00599
00600 nFunction.fillTables(mymap, insttokens, probttokensdouble);
00601
00602
00603 pNextSibling = pNextSibling->NextSiblingElement();
00604 }
00605
00606 return nFunction;
00607 }
00608
00609
00612
00615
00616
00617
00619
00620 unsigned int FactoredPomdp::start() {
00621 ofstream debugfile;
00622 if (DEBUGREADXMLINPUT)
00623 {
00624 debugfile.open("debug_ReadXMLInput.txt");
00625 }
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00641
00642 TiXmlDocument doc(filename.c_str());
00643
00644 TiXmlHandle hDoc(&doc);
00645 bool loadOkay = doc.LoadFile();
00646
00647 if (!loadOkay)
00648 {
00649 cerr << "ERROR\n Could not load pomdpX file"<<endl ;
00650 cerr << " Line"<< doc.ErrorRow() <<":"<< doc.ErrorDesc() << endl;
00651 cerr << "Check pomdpX file with pomdpX's XML schema using a XML validator." << endl;
00652 exit(1);
00653 }
00654 if (DEBUGREADXMLINPUT)
00655 debugfile << "** Docs read from " << filename.c_str() << endl;
00656
00657
00658 TiXmlElement *pElem = hDoc.FirstChildElement().ToElement();
00659 string msg = pElem->Value();
00660 cout << "input file : " << filename << endl;
00661
00662
00663 TiXmlElement *pElemDesc = hDoc.FirstChild("pomdpx").FirstChild("Description").ToElement();
00664 string description;
00665 if(pElemDesc)
00666 description = pElemDesc->GetText();
00667 if (DEBUGREADXMLINPUT)
00668 debugfile << "description: " << description << endl;
00669
00670
00671 TiXmlElement *pElemDisc =
00672 hDoc.FirstChild("pomdpx").FirstChild("Discount").ToElement();
00673 if(!pElemDisc){
00674 cerr << "ERROR\n Cannot find Discount tag" << endl;
00675 exit(1);
00676 }
00677 string disstr = pElemDisc->GetText();
00678 discount = atof(disstr.c_str());
00679 if (DEBUGREADXMLINPUT)
00680 debugfile << "discount: " << discount << endl;
00681
00682
00683 TiXmlElement *pElemVariable = hDoc.FirstChild("pomdpx").FirstChild("Variable").ToElement();
00684 if(!pElemVariable){
00685 cerr << "ERROR\n Cannot find Variable tag" << endl;
00686 exit(1);
00687 }
00688
00689 TiXmlElement *pNextSibling = pElemVariable->FirstChild()->ToElement();
00690 while (pNextSibling != 0) {
00691
00692 const char* tmp = pNextSibling->Value();
00693 string s = tmp;
00694 if (s == "StateVar") {
00695 State nState = createState(pNextSibling);
00696 stateList.push_back(nState);
00697
00698 } else if (s == "ObsVar") {
00699 ObsAct obs = createObservation(pNextSibling);
00700 observationList.push_back(obs);
00701
00702 } else if (s == "ActionVar") {
00703
00704 ObsAct act = createAction(pNextSibling);
00705 actionList.push_back(act);
00706 } else if (s == "RewardVar") {
00707
00708 string vname=pNextSibling->Attribute("vname");
00709 ObsAct rew ;
00710 rew.setVName(vname);
00711
00712 vector<string> rewardEnum;
00713 rewardEnum.push_back(REWVALUEENUM);
00714 rew.setValueEnum(rewardEnum);
00715 rewardList.push_back(rew);
00716
00717 } else if (s == "TerminalStateRewardVar") {
00718
00719 string vname=pNextSibling->Attribute("vname");
00720 ObsAct rew ;
00721 rew.setVName(vname);
00722
00723 vector<string> rewardEnum;
00724 rewardEnum.push_back(REWVALUEENUM);
00725 rew.setValueEnum(rewardEnum);
00726 terminalStateRewardList.push_back(rew);
00727
00728 }else {
00729 printXMLErrorHeader(pNextSibling);
00730 cerr << " Unknown XML tag: " << s << " encountered" << endl;
00731 exit(XML_INPUT_ERROR);
00732 }
00733
00734 pNextSibling = pNextSibling->NextSiblingElement();
00735
00736 }
00737
00740 sortStateList();
00741
00743
00744 if (DEBUGREADXMLINPUT)
00745 debugfile << "State List Size: " << stateList.size() << endl;
00746
00747 for (unsigned int i = 0; i < stateList.size(); i++) {
00748 if (DEBUGREADXMLINPUT) {
00749 debugfile << "State " << i << ": " << endl;
00750 stateList[i].write(debugfile);
00751 debugfile << endl;
00752 }
00753
00754 if (mymap.find(stateList[i].getVNamePrev()) == mymap.end()) {
00755 mymap[stateList[i].getVNamePrev()] = &stateList[i];
00756 } else {
00757 cerr << "ERROR\n There seems to be repeated variable names for: "
00758 << stateList[i].getVNamePrev() << endl;
00759 cerr << " Check pomdpX file for repeated variable names." << endl;
00760 exit(XML_INPUT_ERROR);
00761 }
00762 if (mymap.find(stateList[i].getVNameCurr()) == mymap.end()) {
00763 mymap[stateList[i].getVNameCurr()] = &stateList[i];
00764 } else {
00765 cerr << "ERROR\n There seems to be repeated variable names for: "
00766 << stateList[i].getVNameCurr() << endl;
00767 cerr << " Check pomdpX file for repeated variable names." << endl;
00768 exit(XML_INPUT_ERROR);
00769 }
00770 }
00771
00772 if (DEBUGREADXMLINPUT)
00773 debugfile << "Observation List Size: " << observationList.size()
00774 << endl;
00775 for (unsigned int i = 0; i < observationList.size(); i++) {
00776 if (DEBUGREADXMLINPUT) {
00777 debugfile << "Observation " << i << ": " << endl;
00778 observationList[i].write(debugfile);
00779 debugfile << endl;
00780 }
00781 if (mymap.find(observationList[i].getVName()) == mymap.end()) {
00782 mymap[observationList[i].getVName()] = &observationList[i];
00783 } else {
00784 cerr << "ERROR\n There seems to be repeated variable names for: "
00785 << observationList[i].getVName() << endl;
00786 cerr << " Check pomdpX file for repeated variable names." << endl;
00787
00788 exit(XML_INPUT_ERROR);
00789 }
00790 }
00791
00792 if (DEBUGREADXMLINPUT)
00793 debugfile << "Action List Size: " << actionList.size() << endl;
00794 for (unsigned int i = 0; i < actionList.size(); i++) {
00795 if (DEBUGREADXMLINPUT) {
00796 debugfile << "Action " << i << ": " << endl;
00797 actionList[i].write(debugfile);
00798 debugfile << endl;
00799 }
00800 if (mymap.find(actionList[i].getVName()) == mymap.end()) {
00801 mymap[actionList[i].getVName()] = &actionList[i];
00802 } else {
00803 cerr << "ERROR\n There seems to be repeated variable names for: "
00804 << actionList[i].getVName() << endl;
00805 cerr << " Check pomdpX file for repeated variable names." << endl;
00806 exit(XML_INPUT_ERROR);
00807 }
00808 }
00809
00810 if (DEBUGREADXMLINPUT)
00811 debugfile << "Reward List Size: " << rewardList.size() << endl;
00812
00813 for (unsigned int i = 0; i < rewardList.size(); i++) {
00814 if (DEBUGREADXMLINPUT) {
00815 debugfile << "Reward " << i << ": " << endl;
00816 rewardList[i].write(debugfile);
00817 debugfile << endl;
00818 }
00819 if (mymap.find(rewardList[i].getVName()) == mymap.end()) {
00820 mymap[rewardList[i].getVName()] = &rewardList[i];
00821 } else {
00822 cerr << "ERROR\n There seems to be repeated variable names for: "
00823 << rewardList[i].getVName() << endl;
00824 cerr << " Check pomdpX file for repeated variable names." << endl;
00825 exit(XML_INPUT_ERROR);
00826 }
00827 }
00828
00829
00830 if (DEBUGREADXMLINPUT)
00831 debugfile << "Terminal State Reward List Size: " << terminalStateRewardList.size() << endl;
00832
00833 for (unsigned int i = 0; i < terminalStateRewardList.size(); i++) {
00834 if (DEBUGREADXMLINPUT) {
00835 debugfile << "Terminal State Reward " << i << ": " << endl;
00836 terminalStateRewardList[i].write(debugfile);
00837 debugfile << endl;
00838 }
00839 if (mymap.find(terminalStateRewardList[i].getVName()) == mymap.end()) {
00840 mymap[terminalStateRewardList[i].getVName()] = &terminalStateRewardList[i];
00841 } else {
00842 cerr << "ERROR\n There seems to be repeated variable names for: "
00843 << terminalStateRewardList[i].getVName() << endl;
00844 cerr << " Check pomdpX file for repeated variable names." << endl;
00845 exit(XML_INPUT_ERROR);
00846 }
00847 }
00848
00849
00850
00851
00853
00854
00855 TiXmlElement *pInitialStateBelief = hDoc.FirstChild("pomdpx").FirstChild(
00856 "InitialStateBelief").FirstChild("CondProb").ToElement();
00857 while (pInitialStateBelief != 0) {
00858
00859 Function nBeliefFunction = createFunction(pInitialStateBelief,BELIEFFUNCTION);
00860
00861 beliefFunctionList.push_back(nBeliefFunction);
00862
00863 string info;
00864 if (checkFunctionProbabilities(&nBeliefFunction, pInitialStateBelief, "Belief Function"))
00865 {
00866 exit(XML_INPUT_ERROR);
00867 }
00868
00869 pInitialStateBelief = pInitialStateBelief->NextSiblingElement();
00870 }
00871
00872
00873 if (beliefFunctionList.size() != stateList.size()) {
00874 cerr << "ERROR\n The number of states do not correspond to the number of belief functions" << endl;
00875 exit(XML_INPUT_ERROR);
00876 }
00877
00878
00879
00880
00881 if (DEBUGREADXMLINPUT)
00882 debugfile << "\nState Functions: " << endl;
00883
00884
00885 TiXmlElement *pStructFunction = hDoc.FirstChild("pomdpx").FirstChild("StateTransitionFunction").ToElement();
00886 if(pStructFunction ==NULL){
00887 cerr << "ERROR\n Cannot find StateTransitionFunction tag"<< endl;
00888 exit(XML_INPUT_ERROR);
00889 }
00890 TiXmlElement *pSF = pStructFunction->FirstChildElement();
00891
00892 while (pSF != 0) {
00893
00894 Function nStateFunction = createFunction(pSF,STATEFUNCTION);
00895
00896
00897 string info;
00898 if (!(nStateFunction.checkNoMissingEntries(mymap, info))) {
00899 printXMLErrorHeader(pSF);
00900 cerr << "In State Function " << nStateFunction.getVNameCurr() << " : there are missing transitions" << endl;
00901 cerr << info << endl;
00902 exit(XML_INPUT_ERROR);
00903 }
00904 nStateFunction.sparseT->convertForUse();
00905
00906 checkFunctionProbabilities(&nStateFunction, pSF, "State Function");
00907
00908
00909 if (!(validateModel(nStateFunction, info))) {
00910 printXMLErrorHeader(pSF);
00911 cerr << info << " The input file does not match our DBN model" << endl;
00912 exit(XML_INPUT_ERROR);
00913 }
00914
00915 stateFunctionList.push_back(nStateFunction);
00916 if (DEBUGREADXMLINPUT)
00917 nStateFunction.write(debugfile);
00918
00919 pSF = pSF->NextSiblingElement();
00920
00921
00922 }
00923
00924 if (stateFunctionList.size() != stateList.size()) {
00925 printXMLErrorHeader(pStructFunction);
00926 cerr << "The number of states do not correspond to the number of state transition functions" << endl;
00927 exit(XML_INPUT_ERROR);
00928 }
00929
00930 if (DEBUGREADXMLINPUT)
00931 debugfile << "\nObservation Functions: " << endl;
00932
00933 if (observationList.size() > 0) {
00934 TiXmlElement *pObsFunction = hDoc.FirstChild("pomdpx").FirstChild("ObsFunction").ToElement();
00935 if(pObsFunction ==NULL){
00936 cerr << "ERROR\n Cannot find ObsFunction tag"<< endl;
00937 exit(XML_INPUT_ERROR);
00938 }
00939 TiXmlElement *pOF = pObsFunction->FirstChild()->ToElement();
00940
00941 while (pOF != 0) {
00942
00943 Function nFunction = createFunction(pOF, OBSERVFUNCTION);
00944
00945
00946
00947 string info;
00948 if (!(nFunction.checkNoMissingEntries(mymap, info))) {
00949 printXMLErrorHeader(pOF);
00950 cerr << "In Observation Function " << nFunction.getVNameCurr() << " : there are missing transitions" << endl;
00951 cerr << info << endl;
00952 exit(XML_INPUT_ERROR);
00953 }
00954
00955 nFunction.sparseT->convertForUse();
00956 checkFunctionProbabilities(&nFunction, pOF, "Observation Function");
00957
00958 observFunctionList.push_back(nFunction);
00959 if (DEBUGREADXMLINPUT)
00960 nFunction.write(debugfile);
00961
00962 pOF = pOF->NextSiblingElement();
00963
00964 }
00965
00966 if (observFunctionList.size() != observationList.size()) {
00967 printXMLErrorHeader(pObsFunction);
00968 cerr << "The number of observation variables do not correspond to the number of observations functions" << endl;
00969 exit(XML_INPUT_ERROR);
00970 }
00971
00972 }
00973
00974 TiXmlElement *pRewardFunction = hDoc.FirstChild("pomdpx").FirstChild("RewardFunction").ToElement();
00975 if(pRewardFunction ==NULL){
00976 cerr << "ERROR\n Cannot find RewardFunction tag"<< endl;
00977 exit(XML_INPUT_ERROR);
00978 }
00979
00980 TiXmlElement *pRF = pRewardFunction->FirstChild()->ToElement();
00981
00982 if (DEBUGREADXMLINPUT)
00983 debugfile << "\nReward Functions: " << endl;
00984
00985 while (pRF != 0) {
00986
00987 Function rewardFunction = createFunction(pRF, REWARDFUNCTION);
00988
00989
00990
00991
00992
00993
00994
00995
00996 rewardFunction.sparseT->convertForUse();
00997 rewardFunctionList.push_back(rewardFunction);
00998 if (DEBUGREADXMLINPUT)
00999 rewardFunction.write(debugfile);
01000
01001 pRF = pRF->NextSiblingElement();
01002
01003 }
01004
01005 if (rewardFunctionList.size() != rewardList.size()) {
01006 printXMLErrorHeader(pRewardFunction);
01007 cerr << "The number of reward variables do not correspond to the number of reward functions" << endl;
01008 exit(XML_INPUT_ERROR);
01009 }
01010
01011
01012
01013 if (DEBUGREADXMLINPUT)
01014 debugfile << "\nTerminal State Reward Functions: " << endl;
01015
01016 TiXmlElement *pTerminalStateRewardFunction;
01017 if (terminalStateRewardList.size() > 0) {
01018 pTerminalStateRewardFunction = hDoc.FirstChild("pomdpx").FirstChild("TerminalStateRewardFunction").ToElement();
01019
01020 TiXmlElement *pTSRF = pTerminalStateRewardFunction->FirstChild()->ToElement();
01021
01022 if (DEBUGREADXMLINPUT)
01023 debugfile << "\nTerminal State Reward Functions: " << endl;
01024
01025 while (pTSRF != 0) {
01026
01027 Function terminalStateRewardFunction = createFunction(pTSRF, TERMINALFUNCTION);
01028
01029 terminalStateRewardFunction.sparseT->sortEntries();
01030 terminalStateRewardFunction.sparseT->removeRedundant();
01031
01032 terminalStateRewardFunctionList.push_back(terminalStateRewardFunction);
01033 if (DEBUGREADXMLINPUT)
01034 terminalStateRewardFunction.write(debugfile);
01035
01036 pTSRF = pTSRF->NextSiblingElement();
01037
01038 }
01039 }
01040
01041 if (terminalStateRewardFunctionList.size() != terminalStateRewardList.size()) {
01042 cerr << "The number of terminal state reward variables do not correspond to the number of terminal state reward functions" << endl;
01043 exit(XML_INPUT_ERROR);
01044 }
01045
01046
01047
01048
01049
01050 for (unsigned int i = 0; i < stateFunctionList.size(); i++) {
01051 mapFunc[stateFunctionList[i].getVNameCurr()] = &stateFunctionList[i];
01052 }
01053
01054 for (unsigned int i = 0; i < observFunctionList.size(); i++) {
01055 mapFunc[observFunctionList[i].getVNameCurr()] = &observFunctionList[i];
01056 }
01057
01058 for (unsigned int i = 0; i < rewardFunctionList.size(); i++) {
01059 mapFunc[rewardFunctionList[i].getVNameCurr()] = &rewardFunctionList[i];
01060 }
01061
01062 for (unsigned int i = 0; i < terminalStateRewardFunctionList.size(); i++) {
01063 mapFunc[terminalStateRewardFunctionList[i].getVNameCurr()] = &terminalStateRewardFunctionList[i];
01064 }
01065
01066 debugfile.close();
01069
01070 return checkProblemType();
01071
01072 }
01073
01074 bool FactoredPomdp::checkFunctionProbabilities(Function* f, TiXmlElement* xmlNode, string whichFunction){
01075 vector<vector<int> > commonIndices;
01076 vector<double> probs;
01077 if (f->sparseT->errorInProbabilities(commonIndices, probs))
01078 {
01079
01080 printXMLWarningHeader(xmlNode);
01081 cerr << "In "<< whichFunction <<" Tables " << f->getVNameCurr() << endl;
01082 for(int k=0;k<commonIndices.size();k++){
01083 vector<int> commonIndex= commonIndices[k];
01084 double prob = probs[k];
01085 cerr << " In instance ";
01086 for(int i=0;i<commonIndex.size();i++){
01087 if(f->getParents()[i]!="null"){
01088 StateObsAct* soa = mymap[f->getParents()[i]];
01089 cerr << soa->getValueEnum()[commonIndex[i]] << " ";
01090 }else{
01091 cerr << "null" << endl;
01092 }
01093 }
01094 cerr << endl;
01095 cerr << " Probabilities sum up to " << prob <<". It should sum up to 1" << endl;
01096 }
01097 return true;
01098 }
01099 else
01100 return false;
01101 }
01102
01103 const int FactoredPomdp::checkProblemType() {
01104
01105
01106 int numObserved = 0;
01107 int numUnobserved = 0;
01108 for (unsigned int i = 0; i < stateList.size(); i++) {
01109 if (stateList[i].getObserved())
01110 numObserved++;
01111 else
01112 numUnobserved++;
01113 }
01114
01115 if (numUnobserved > 0) {
01116 if (numObserved == 0)
01117 return FULLY_UNOBSERVED;
01118 else {
01119 for (unsigned int i=0; i < stateFunctionList.size(); i++) {
01120
01121 for(unsigned int j=0; j < stateFunctionList[i].sparseT->cIheader.size(); j++) {
01122
01123 if (!(checkActionNameExists(stateFunctionList[i].sparseT->cIheader[j]))) {
01124 if (isCurrentTimeSlice(stateFunctionList[i].sparseT->cIheader[j])) {
01125 cout << "MIXED REPARM since state function list has parents in current time slice" << endl;
01126 return MIXED_REPARAM;
01127 }
01128 }
01129 }
01130 }
01131
01132
01133 for (unsigned int i=0; i < beliefFunctionList.size(); i++) {
01134 for (unsigned int j=0; j < beliefFunctionList[i].getParents().size(); j++) {
01135 if (beliefFunctionList[i].getParents()[j] != "null") {
01136 cout << "MIXED REPARAM because belief function has parents that are not null" << endl;
01137 return MIXED_REPARAM;
01138 }
01139 }
01140 }
01141
01142
01143 return MIXED;
01144 }
01145 } else {
01146 if (observationList.size() > 0) {
01147 cerr << "WARNING\n This problem is an MDP (all state variables are observed) but also contains observation variables. The observation variables will be ignored. "<< endl;
01148 }
01149 return FULLY_OBSERVED;
01150 }
01151
01152 }
01153
01154 void FactoredPomdp::sortStateList() {
01155 vector<State> newStateList;
01156
01157
01158 for (unsigned int i = 0; i < stateList.size(); i++) {
01159 if (stateList[i].getObserved())
01160 newStateList.push_back(stateList[i]);
01161 }
01162
01163 for (unsigned int i = 0; i < stateList.size(); i++) {
01164 if (!(stateList[i].getObserved()))
01165 newStateList.push_back(stateList[i]);
01166 }
01167
01168 stateList = newStateList;
01169 }
01170
01171 bool FactoredPomdp::validateModel(Function sf, string& info) {
01172 stringstream ssinfo;
01173
01174 if (isPreviousTimeSlice(sf.getVNameCurr())) {
01175 ssinfo << "For State Transition Function "<< sf.getVNameCurr()<<", <Var> </Var> should not contain variables from the previous time slice" << endl;
01176 info = ssinfo.str();
01177 return false;
01178 }
01179
01180 const State& sfState = findState(sf.getVNameCurr());
01181 if (sfState.getObserved()) {
01182 vector<string> parents = sf.getParents();
01183 for (unsigned int i=0; i < parents.size(); i++) {
01184 if (!(checkActionNameExists(parents[i])) && isCurrentTimeSlice(parents[i])) {
01185 ssinfo << "If variable " << sf.getVNameCurr() << " is observed, it cannot have any parents in the SAME time slice: " << parents[i] << endl;
01186 info = ssinfo.str();
01187 return false;
01188 }
01189 }
01190
01191 }else {
01192
01193 vector<string> parents = sf.getParents();
01194 for (unsigned int i=0; i < parents.size(); i++) {
01195 if (!(checkActionNameExists(parents[i])) && isCurrentTimeSlice(parents[i])) {
01196 const State& tempS = findState(parents[i]);
01197 if (!(tempS.getObserved())) {
01198 ssinfo << "If variable " << sf.getVNameCurr() << " is not observed, it cannot have any UNOBSERVED parents in its same time slice: " << parents[i] << endl;
01199 info = ssinfo.str();
01200 return false;
01201 }
01202 }
01203 }
01204 }
01205
01206 return true;
01207
01208 }
01209
01210 const bool FactoredPomdp::checkRewardFunctionHasOnlyPreviousTimeSliceAndAction() const{
01211
01212 for (unsigned int i=0; i < rewardFunctionList.size(); i++) {
01213 vector<string> parents = rewardFunctionList[i].getParents();
01214 for (unsigned int j=0; j<parents.size(); j++) {
01215 if ((checkStateNameExists(parents[j]))) {
01216 if (isCurrentTimeSlice(parents[j])) return false;
01217 }
01218 if ((checkObsNameExists(parents[j]))) return false;
01219 }
01220 }
01221 return true;
01222 }
01223
01224
01225 const set<string> FactoredPomdp::getRewardFunctionCurrentTimeSliceVars(Function* rewardFunction) {
01226 set<string> variables;
01227 vector<string> obsVariable;
01228 vector<string> parents = rewardFunction->getParents();
01229 for (unsigned int j=0; j<parents.size(); j++) {
01230 if ((checkStateNameExists(parents[j]))) {
01231 if (isCurrentTimeSlice(parents[j])){
01232 variables.insert(parents[j]);
01233 }
01234 }
01235 if ((checkObsNameExists(parents[j]))){
01236 obsVariable.push_back(parents[j]);
01237 }
01238 }
01239
01240 for(vector<string>::iterator it=obsVariable.begin();it!=obsVariable.end();it++){
01241 Function obsFunction = *mapFunc[*it];
01242 vector<string> parents = obsFunction.getParents();
01243 for (unsigned int j=0; j<parents.size(); j++) {
01244 if ((checkStateNameExists(parents[j]))) {
01245 if (isCurrentTimeSlice(parents[j])){
01246 variables.insert(parents[j]);
01247 }
01248 }
01249 }
01250 variables.insert(*it);
01251 }
01252 return variables;
01253 }
01254
01255 const bool FactoredPomdp::isPreviousTimeSlice(string name) const{
01256 assert(stateList.size() > 0);
01257 for (unsigned int i = 0 ; i < stateList.size(); i++) {
01258 if (stateList[i].getVNamePrev() == name)
01259 return true;
01260 }
01261 for (unsigned int i = 0 ; i < stateList.size(); i++) {
01262 if (stateList[i].getVNameCurr() == name)
01263 return false;
01264 }
01265 cerr << "Not a State variable" << endl;
01266 assert(false);
01267 }
01268
01269 const bool FactoredPomdp::isCurrentTimeSlice(string name) const{
01270 assert(stateList.size() > 0);
01271 for (unsigned int i = 0 ; i < stateList.size(); i++) {
01272 if (stateList[i].getVNameCurr() == name)
01273 return true;
01274 }
01275 for (unsigned int i = 0 ; i < stateList.size(); i++) {
01276 if (stateList[i].getVNamePrev() == name)
01277 return false;
01278 }
01279 cerr << "Not a State variable" << endl;
01280 assert(false);
01281 }
01282
01283
01284 void FactoredPomdp::convertFast()
01285 {
01286 DEBUG_LOG (cout << "convert fast" << endl; );
01287
01288 convertFastStateTrans();
01289
01290 if (observationList.size() == 0)
01291 {
01292 convertFastNoObservationsVariables();
01293 }
01294 else
01295 {
01296 convertFastObsTrans();
01297 }
01298 convertFastRewardTrans();
01299 convertFastBelief();
01300 convertFastVariables();
01301
01302 DEBUG_LOG (cout << "convert fast subfunctions done" << endl; );
01303
01304 }
01305
01306
01307 void FactoredPomdp::mapFastStatesToValue() {
01308
01314
01315 int increment = 1;
01316 for (int i = (int) stateList.size() - 1; i >= 0; i--) {
01317 positionStringIndexMap[stateList[i].getVNamePrev()] = increment;
01318 positionStringIndexMap[stateList[i].getVNameCurr()] = increment;
01319 increment *= stateList[i].getValueEnum().size();
01320 }
01321
01322 numMergedStates = increment;
01323 }
01324
01325 void FactoredPomdp::mapFastIndexesToValues(SharedPointer<SparseTable> st) {
01326
01327
01328
01329
01330
01331
01332
01333 for (unsigned int j = 0; j < st->cIheader.size(); j++) {
01334 if (!(checkActionNameExists(st->cIheader[j])))
01335 fastPositionCIIndexMap[j] = positionStringIndexMap[st->cIheader[j]];
01336 else
01337 fastPositionCIIndexMap[j] = actionStringIndexMap[st->cIheader[j]];
01338 }
01339
01340 for (unsigned int j = 0; j < st->uIheader.size(); j++)
01341 fastPositionUIIndexMap[j]
01342 = positionStringIndexMap[st->uIheader[j]];
01343
01344
01346 }
01347
01348
01349 SharedPointer<SparseTable> FactoredPomdp::reduceUnmatchedCIWithUI(SharedPointer<SparseTable> st, ofstream& debugfile, bool printDebugFile) {
01350
01351 if (printDebugFile) {
01352 debugfile << "before re-param" << endl;
01353 st->write(debugfile);
01354 debugfile << endl;
01355 }
01356
01357
01358 for (unsigned int uIIndex=0; uIIndex < st->uIheader.size(); uIIndex++) {
01359 for (unsigned int cIIndex=0; cIIndex < st->cIheader.size(); cIIndex++) {
01360 if (st->uIheader[uIIndex] == st->cIheader[cIIndex]) {
01361 st=st->removeUnmatchedCI(cIIndex, uIIndex);
01362 }
01363 }
01364 }
01365
01366 if (printDebugFile) {
01367 debugfile << "after re-param" << endl;
01368 st->write(debugfile);
01369 debugfile << endl;
01370 }
01371
01372 return st;
01373
01374
01375 }
01376
01377 void FactoredPomdp::resortFastStateTables(ofstream& debugfile, bool printDebugFile) {
01378
01379 if (printDebugFile) {
01380 debugfile << "finalStateTable before fast re-sorting" << endl;
01381 finalStateTable->write(debugfile);
01382 debugfile << endl;
01383 }
01384
01385
01386 unsigned int pos;
01387 for (unsigned int i=0; i < actionList.size(); i++) {
01388 pos = finalStateTable->findPosition(actionList[i].getVName());
01389 finalStateTable->swapCIHeaders(i,pos);
01390 finalStateTable->swapSparseColumns(i,pos);
01391 }
01392
01393 for (unsigned int i=0; i < stateList.size(); i++) {
01394 pos = finalStateTable->findPosition(stateList[i].getVNamePrev());
01395 finalStateTable->swapCIHeaders(i+actionList.size(),pos);
01396 finalStateTable->swapSparseColumns(i+actionList.size(),pos);
01397 }
01398
01399 finalStateTable->sortEntries();
01400
01401 if (printDebugFile) {
01402 debugfile << "finalStateTable after fast re-sorting" << endl;
01403 finalStateTable->write(debugfile);
01404 debugfile << endl;
01405 }
01406
01407
01408 }
01409
01410 void FactoredPomdp::convertFastStateTrans() {
01411
01412
01413
01414 ofstream debugfile;
01415 if (DEBUGFASTCONVERSIONSTATE) {
01416 debugfile.open("debug_FactoredPomdp_convertFastStateTrans.txt");
01417 }
01418
01419 finalStateTable = mergeTables(&stateFunctionList, STATEFUNCTION, debugfile, false);
01420 finalStateTable = reduceUnmatchedCIWithUI(finalStateTable, debugfile, false);
01421 resortFastStateTables(debugfile, false);
01422 defineCanonicalNames();
01423
01424 mapActionsToValue();
01425 mapFastStatesToValue();
01426 mapFastIndexesToValues(finalStateTable);
01427
01429
01431
01432
01433 vector<PreSparseMatrix> cOstTrPre;
01434 for (unsigned int i = 0; i < numActions; i++) {
01435 PreSparseMatrix cm(numMergedStates, numMergedStates);
01436 cOstTrPre.push_back(cm);
01437 }
01438
01439 vector<PreSparseMatrix> cOstPre;
01440 for (unsigned int i = 0; i < numActions; i++) {
01441 PreSparseMatrix cm(numMergedStates, numMergedStates);
01442 cOstPre.push_back(cm);
01443 }
01444
01445 int action;
01446 int startState;
01447 int endState;
01448 double prob;
01449
01450
01451 unsigned int *terminalStates = new unsigned int[numMergedStates];
01452
01453 for (int i = 0; i < numMergedStates; i++)
01454 terminalStates[i] = 0;
01455
01456
01457 SparseEntry se;
01458 while(finalStateTable->getNext(se)){
01459 vector<int> commonIndex = finalStateTable->getIterPosition();
01460 action = startState = endState = 0;
01461 prob = 1;
01462
01463 for (unsigned int j = 0; j < commonIndex.size(); j++) {
01464 if (!(checkActionNameExists(finalStateTable->cIheader[j])))
01465 startState += commonIndex[j] * fastPositionCIIndexMap[j];
01466 else
01467 action += commonIndex[j] * fastPositionCIIndexMap[j];
01468 }
01469 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
01470 endState += se.uniqueIndex[j].index * fastPositionUIIndexMap[j];
01471 prob *= se.uniqueIndex[j].value;
01472 }
01473 cOstTrPre[action].addEntries(endState, startState, prob);
01474
01475
01476 if ((endState == startState) && (fabs(prob - 1) < 0.000001))
01477 terminalStates[endState]++;
01478
01479
01480 cOstPre[action].addEntries(startState, endState,prob);
01481 }
01482
01483
01484 vector<SharedPointer<SparseMatrix> > cOst;
01485 for (unsigned int i = 0; i < cOstPre.size(); i++)
01486 {
01487 cOst.push_back(cOstPre[i].convertSparseMatrix());
01488 if (DEBUGFASTCONVERSIONSTATE) {
01489 debugfile << "compressed old state transition matrix" << i << endl;
01490 cOst[i]->write(debugfile);
01491 debugfile << endl;
01492 }
01493 }
01494
01495 vector<SharedPointer<SparseMatrix> > cOstTr;
01496 for (unsigned int i = 0; i < cOstTrPre.size(); i++) {
01497 cOstTr.push_back(cOstTrPre[i].convertSparseMatrix());
01498 if (DEBUGFASTCONVERSIONSTATE) {
01499 debugfile << "compressed old state transition transposed matrix"
01500 << i << endl;
01501 cOstTr[i]->write(debugfile);
01502 debugfile << endl;
01503 }
01504 }
01505
01506 layer.pomdpT = cOst;
01507 layer.pomdpTtr = cOstTr;
01508
01509
01510 vector<int> termStates;
01511 for (int i = 0; i < numMergedStates; i++) {
01512 if (terminalStates[i] == numActions)
01513 termStates.push_back(1);
01514 else
01515 termStates.push_back(0);
01516 }
01517
01518 layer.pomdpIsPOMDPTerminalState = termStates;
01519 if (DEBUGFASTCONVERSIONSTATE) {
01520 debugfile << "terminal states" << endl;
01521 for (unsigned int i = 0; i < termStates.size(); i++) {
01522 debugfile << termStates[i] << " ";
01523 }
01524 }
01525
01526 debugfile.close();
01527 delete [] terminalStates;
01528
01529 }
01530
01532
01533
01534 SharedPointer<SparseTable> FactoredPomdp::preprocessRewardTable()
01535 {
01536 ofstream debugfile;
01537 if (DEBUGFACTOREDCONVERSIONREWARD) {
01538 debugfile.open("debug_FactoredPomdp_preprocessRewardTable.txt");
01539 }
01540
01541 vector<SharedPointer<SparseTable> > rewardTables;
01542
01543
01544 for (unsigned int i=0; i < rewardFunctionList.size(); i++) {
01545 debugfile << "reward function " << i <<endl;
01546
01547
01548
01549 vector<Function> functionsDepend;
01550 set<string> curTimeVars = getRewardFunctionCurrentTimeSliceVars(&rewardFunctionList[i] );
01551 for(set<string>::iterator it=curTimeVars.begin();it!=curTimeVars.end();it++)
01552 {
01553 functionsDepend.push_back(*mapFunc[*it]);
01554 }
01555 functionsDepend.push_back(rewardFunctionList[i]);
01556
01557
01558 SharedPointer<SparseTable> rewardTable = mergeTables(&functionsDepend, BELIEFFUNCTION, debugfile, DEBUGFACTOREDCONVERSIONREWARD);
01559 rewardTable->write(debugfile);
01560
01561 debugfile << "after reducing common indexes that are not matched with unique indexes" << endl;
01562 rewardTable = reduceUnmatchedCIWithUI(rewardTable, debugfile, DEBUGFACTOREDCONVERSIONREWARD);
01563 rewardTable->write(debugfile);
01564
01565 debugfile << "after removing redundant unique indexes fron reward table" << endl;
01566 rewardTable = removeRedundantUIsFromReward(rewardTable);
01567 rewardTable->write(debugfile);
01568
01569 rewardTables.push_back(rewardTable);
01570 }
01571
01572 SharedPointer<SparseTable> result = mergeSparseTables(rewardTables, REWARDFUNCTION, debugfile, DEBUGFACTOREDCONVERSIONREWARD);
01573 result->write(debugfile);
01574 return result;
01575 }
01576
01577
01578 void FactoredPomdp::convertFastObsTrans() {
01579
01580 ofstream debugfile;
01581 if (DEBUGFASTCONVERSIONOBS) {
01582 debugfile.open("debug_FactoredPomdp_convertFastObsTrans.txt");
01583 }
01584 mapObservationsToValue();
01585
01586
01587 vector<PreSparseMatrix> cOobsTrPre;
01588 for (unsigned int i = 0; i < numActions; i++) {
01589 PreSparseMatrix cm(numObservations, numMergedStates);
01590 cOobsTrPre.push_back(cm);
01591 }
01592
01593 vector<PreSparseMatrix> cOobsPre;
01594 for (unsigned int i = 0; i < numActions; i++) {
01595 PreSparseMatrix cm(numMergedStates, numObservations);
01596 cOobsPre.push_back(cm);
01597 }
01598
01599 int action=0;
01600 int endState=0;
01601 map<string, int> variableValues = getStartActionSVarValues();
01602 do{
01603 vector<IndexProbTuple> observations;
01604
01605 for(vector<Function>::iterator obsFunc=observFunctionList.begin();obsFunc!=observFunctionList.end();obsFunc++){
01606
01607
01608 SharedPointer<SparseTable> obsTable = obsFunc->sparseT;
01609
01610 vector<int> commonIndex;
01611 for(vector<string>::iterator cI=obsTable->cIheader.begin();cI!=obsTable->cIheader.end();cI++){
01612 commonIndex.push_back(variableValues[*cI]);
01613
01614 }
01615 vector<SparseEntry> entries = obsTable->getSparseEntries(commonIndex);
01616
01617
01618 if(observations.empty()){
01619 IndexProbTuple pt;
01620 pt.index = 0;
01621 pt.prob = 1;
01622 observations.push_back(pt);
01623 }
01624 vector<IndexProbTuple> temp = observations;
01625 int lastSize = temp.size();
01626
01627
01628
01629 for(int i=1;i<entries.size();i++){
01630 observations.insert(observations.end(), temp.begin(), temp.end());
01631 }
01632
01633
01634 for(int i=0;i<entries.size();i++){
01635 SparseEntry se = entries[i];
01636 double probMul=1;
01637 int obsInc=0;
01638
01639
01640 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
01641 StateObsAct* soa = mymap[obsTable->uIheader[j]];
01642 ObsAct* s = static_cast<ObsAct*> (soa);
01643 obsInc += se.uniqueIndex[j].index * observationStringIndexMap[obsTable->uIheader[j]];
01644 probMul *= se.uniqueIndex[j].value;
01645 }
01646
01647
01648 for(int k=i*lastSize;k<(i+1)*lastSize;k++){
01649 observations[k].index += obsInc;
01650 observations[k].prob *= probMul;
01651 }
01652 }
01653 }
01654 for(vector<IndexProbTuple>::iterator pt=observations.begin();pt!=observations.end();pt++){
01655 cOobsTrPre[action].addEntries(pt->index, endState, pt->prob);
01656 cOobsPre[action].addEntries(endState, pt->index, pt->prob);
01657 }
01658 }while(getNextActionSVarValues(variableValues, action, endState));
01659
01660
01661 vector<SharedPointer<SparseMatrix> > cOobs;
01662 for (unsigned int i = 0; i < cOobsPre.size(); i++) {
01663 cOobs.push_back(cOobsPre[i].convertSparseMatrix());
01664 if (DEBUGFASTCONVERSIONOBS) {
01665 debugfile << "compressed old observation transition matrix" << i
01666 << endl;
01667 cOobs[i]->write(debugfile);
01668 debugfile << endl;
01669 }
01670 }
01671
01672 vector<SharedPointer<SparseMatrix> > cOobsTr;
01673 for (unsigned int i = 0; i < cOobsTrPre.size(); i++) {
01674 cOobsTr.push_back(cOobsTrPre[i].convertSparseMatrix());
01675 if (DEBUGFASTCONVERSIONOBS) {
01676 debugfile
01677 << "compressed old observation transition transposed matrix"
01678 << i << endl;
01679 cOobsTr[i]->write(debugfile);
01680 debugfile << endl;
01681 }
01682 }
01683
01684 layer.pomdpO = cOobs;
01685 layer.pomdpOtr = cOobsTr;
01686 debugfile.close();
01687 }
01688
01689
01690 void FactoredPomdp::convertFastNoObservationsVariables() {
01691
01692 numObservations = 1;
01693
01694 ofstream debugfile;
01695 if (DEBUGFASTCONVERSIONOBS) {
01696 debugfile.open("debug_FactoredPomdp_convertFastObsTrans.txt");
01697 }
01698 debugfile << "convertFastNoObservationsVariables()" << endl;
01699
01700
01701 vector<PreSparseMatrix> cOobsTrPre;
01702 for (unsigned int i = 0; i < numActions; i++) {
01703 PreSparseMatrix cm(1, numMergedStates);
01704 cOobsTrPre.push_back(cm);
01705 }
01706
01707 vector<PreSparseMatrix> cOobsPre;
01708
01709 for (unsigned int i = 0; i < numActions; i++) {
01710 PreSparseMatrix cm(numMergedStates, 1);
01711 cOobsPre.push_back(cm);
01712 }
01713
01714
01715 for (unsigned int i = 0 ; i < numActions; i++) {
01716 for (unsigned int j = 0 ; j < numMergedStates; j++) {
01717 cOobsTrPre[i].addEntries(0, j, 1.0);
01718 cOobsPre[i].addEntries(j, 0, 1.0);
01719 }
01720 }
01721
01722 vector<SharedPointer<SparseMatrix> > cOobs;
01723 for (unsigned int i = 0; i < cOobsPre.size(); i++) {
01724 cOobs.push_back(cOobsPre[i].convertSparseMatrix());
01725 if (DEBUGFASTCONVERSIONOBS) {
01726 debugfile << "compressed old observation transition matrix" << i
01727 << endl;
01728 cOobs[i]->write(debugfile);
01729 debugfile << endl;
01730 }
01731 }
01732
01733 vector<SharedPointer<SparseMatrix> > cOobsTr;
01734 for (unsigned int i = 0; i < cOobsTrPre.size(); i++) {
01735 cOobsTr.push_back(cOobsTrPre[i].convertSparseMatrix());
01736 if (DEBUGFASTCONVERSIONOBS) {
01737 debugfile
01738 << "compressed old observation transition transposed matrix"
01739 << i << endl;
01740 cOobsTr[i]->write(debugfile);
01741 debugfile << endl;
01742 }
01743 }
01744
01745 layer.pomdpO = cOobs;
01746 layer.pomdpOtr = cOobsTr;
01747 debugfile.close();
01748 }
01749
01750
01751 void FactoredPomdp::convertFastRewardTrans() {
01752
01753 ofstream debugfile;
01754 if (DEBUGFASTCONVERSIONREWARD) {
01755 debugfile.open("debug_FactoredPomdp_convertFastRewardTrans.txt");
01756 }
01757
01758 if (!checkRewardFunctionHasOnlyPreviousTimeSliceAndAction()) {
01759 preprocessRewardFunction();
01760 }
01761
01762
01763 PreSparseMatrix cOrewardPre(numMergedStates, numActions);
01764
01765 int action = 0;
01766 int startState = 0;
01767 double reward = 0;
01768
01769 map<string, int> variableValues = getStartActionSVarValues();
01770 do{
01771
01772 double reward = 0;
01773 for(vector<Function>::iterator rewardFunc=rewardFunctionList.begin();rewardFunc!=rewardFunctionList.end();rewardFunc++){
01774
01775
01776 SharedPointer<SparseTable> rewardTable = rewardFunc->sparseT;
01777
01778 vector<int> commonIndex;
01779 for(vector<string>::iterator cI=rewardTable->cIheader.begin();cI!=rewardTable->cIheader.end();cI++){
01780 commonIndex.push_back(variableValues[*cI]);
01781 }
01782 vector<SparseEntry>& entries = rewardTable->getSparseEntries(commonIndex);
01783
01784 for(int i=0;i<entries.size();i++){
01785 SparseEntry se = entries[i];
01786
01787
01788 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
01789 reward += se.uniqueIndex[j].value;
01790 }
01791 }
01792 }
01793 if (reward != 0) {
01794 cOrewardPre.addEntries(startState, action, reward);
01795 }
01796 }while(getNextActionSVarValues(variableValues,action,startState));
01797
01798 layer.pomdpR = cOrewardPre.convertSparseMatrix();
01799 if (DEBUGFASTCONVERSIONREWARD) {
01800 debugfile << "compressed old reward matrix" << endl;
01801 layer.pomdpR->write(debugfile);
01802 debugfile << endl;
01803 }
01804
01805 debugfile.close();
01806 }
01807
01808 void FactoredPomdp::convertFastBelief()
01809 {
01810 ofstream debugfile;
01811 if (DEBUGFASTCONVERSIONBELIEF)
01812 debugfile.open("debug_FactoredPomdp_convertFastBelief.txt");
01813 if (DEBUGFASTCONVERSIONBELIEF)
01814 debugfile << "Converting Fast Belief State" << endl;
01815
01816
01817
01818 preprocessBeliefTables(debugfile, DEBUGFASTCONVERSIONBELIEF);
01819 mergeBeliefTables(debugfile, DEBUGFASTCONVERSIONBELIEF);
01820
01821 finalBeliefTable->sortEntries();
01822
01823 mapFastIndexesToValues(finalBeliefTable);
01824
01825 SparseVector cv(numMergedStates);
01826 vector<double> vec_cv(numMergedStates,0.0);
01827
01828 int startState, endState;
01829 double prob;
01830
01831 SparseEntry se;
01832 while(finalBeliefTable->getNext(se))
01833 {
01834 startState = endState = 0;
01835 prob = 1.0;
01836 vector<int> commonIndex = finalBeliefTable->getIterPosition();
01837
01838 for (unsigned int j = 0; j < commonIndex.size(); j++)
01839 {
01840 startState += commonIndex[j] * fastPositionCIIndexMap[j];
01841 }
01842
01843 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++)
01844 {
01845 endState += se.uniqueIndex[j].index * fastPositionUIIndexMap[j];
01846 prob *= se.uniqueIndex[j].value;
01847 }
01848
01849 if (startState != endState)
01850 {
01851 cerr << "for initial belief state, the entries should have common indexes correspoding to unique indexes" << endl;
01852 assert(false);
01853 }
01854
01855 vec_cv[startState] = prob;
01856
01857 }
01858 finalBeliefTable = NULL;
01859 for (unsigned int i=0 ; i < vec_cv.size(); i++) {
01860 if (fabs(vec_cv[i]) > 0.000001) {
01861 cv.push_back(i, vec_cv[i]);
01862 }
01863 }
01864 layer.pomdpInitialBelief = cv;
01865
01866 if (DEBUGFASTCONVERSIONBELIEF)
01867 layer.pomdpInitialBelief.write(debugfile);
01868
01869 debugfile << endl;
01870 debugfile.close();
01871
01872 }
01873
01874 void FactoredPomdp::convertFastVariables() {
01875
01876 ofstream debugfile;
01877 if (DEBUGFASTCONVERSIONOTHERS)
01878 debugfile.open("debug_FactoredPomdp_convertFastOthers.txt");
01879
01880 layer.pomdpDiscount = discount;
01881 if (DEBUGFASTCONVERSIONOTHERS)
01882 debugfile << "\ndiscount: " << layer.pomdpDiscount << endl;
01883
01884 layer.pomdpNumActions = numActions;
01885 if (DEBUGFASTCONVERSIONOTHERS)
01886 debugfile << "num actions: " << layer.pomdpNumActions << endl;
01887 layer.pomdpNumObservations = numObservations;
01888 if (DEBUGFASTCONVERSIONOTHERS)
01889 debugfile << "num observations: " << layer.pomdpNumObservations << endl;
01890 layer.pomdpNumStates = layer.pomdpInitialBelief.size();
01891
01892 if (DEBUGFASTCONVERSIONOTHERS)
01893 debugfile << "num states: " << layer.pomdpNumStates << endl;
01894 if (DEBUGFASTCONVERSIONOTHERS)
01895 debugfile << "num states dimensions: " << layer.pomdpNumStates
01896 << endl;
01897
01898 debugfile.close();
01899 }
01900
01901
01904 void FactoredPomdp::convertFactored() {
01905
01906 DEBUG_LOG( cout << "convert factored" << endl; );
01907 convertFactoredStateTrans();
01908 if (observationList.size() == 0)
01909 convertFactoredNoObservationsVariables();
01910 else
01911 convertFactoredObsTrans();
01912 convertFactoredRewardTrans();
01913
01914 if (terminalStateRewardList.size() >0)
01915 convertFactoredTerminalStateReward();
01916
01917 convertFactoredBelief();
01918
01919 convertFactoredVariables();
01920
01921 DEBUG_LOG( cout << "convert factored subfunctions done" << endl; );
01922
01923 }
01924
01925 void FactoredPomdp::convertFactoredReparam() {
01926
01927
01928 cout << "convert factored reparam" << endl;
01929 convertFactoredStateReparamTrans();
01930
01931 if (observationList.size() == 0)
01932 convertFactoredNoObservationsVariables();
01933 else
01934 convertFactoredObsTrans();
01935
01936 convertFactoredRewardTrans();
01937 convertFactoredBeliefReparam();
01938 convertFactoredVariables();
01939
01940
01941
01942
01943
01944
01945
01946 }
01947 SharedPointer<SparseTable> FactoredPomdp::mergeSparseTables(vector<SharedPointer<SparseTable> > stList, int whichFunction, ofstream& debugfile, bool printDebugFile) {
01948
01949 for (unsigned int i = 0; i < stList.size(); i++) {
01950
01951 if (printDebugFile) {
01952 debugfile << "function" << i << endl;
01953 stList[i]->write(debugfile);
01954 debugfile << endl;
01955 }
01956 }
01957
01958 SharedPointer<SparseTable> resultTable = stList[0];
01959 for (unsigned int i = 1; i < stList.size(); i++)
01960 {
01961 resultTable = SparseTable::join(*resultTable,*stList[i], whichFunction);
01962
01963 if (printDebugFile)
01964 {
01965 debugfile << "Intermediate table " << i << endl;
01966 resultTable->write(debugfile);
01967 debugfile << endl;
01968 }
01969 }
01970 return resultTable;
01971
01972 }
01973
01974 SharedPointer<SparseTable> FactoredPomdp::mergeTables(vector<Function>* functionList, int whichFunction, ofstream& debugfile, bool printDebugFile) {
01975
01976 for (unsigned int i = 0; i < functionList->size(); i++) {
01977
01978 if (printDebugFile) {
01979 debugfile << "function" << i << endl;
01980 (*functionList)[i].sparseT->write(debugfile);
01981
01982 debugfile << endl;
01983 }
01984 }
01985
01986 (*functionList)[0].sparseT->sortEntries();
01987
01988
01989
01990 SharedPointer<SparseTable> resultTable = (*functionList)[0].sparseT;
01991
01992 resultTable->sortEntries();
01993 for (unsigned int i = 1; i < functionList->size(); i++)
01994 {
01995 resultTable = SparseTable::join(*resultTable,*(*functionList)[i].sparseT, whichFunction);
01996
01997 if (printDebugFile)
01998 {
01999 debugfile << "Intermediate table " << i << endl;
02000 resultTable->write(debugfile);
02001
02002 debugfile << endl;
02003 }
02004 }
02005 return resultTable;
02006
02007 }
02008
02009
02010 void FactoredPomdp::expandFactoredStateTable(SharedPointer<SparseTable> sf) {
02011
02012 vector<string> cIheader = sf->cIheader;
02013 int oldNumCI = sf->cIheader.size();
02014 vector<int> numCIValues = sf->numCIValues;
02015 for (unsigned int i = 0; i < stateList.size() ; i++) {
02016
02017 if ((!(sf->containsCI(stateList[i].getVNameCurr()))) && (stateList[i].getObserved())) {
02018 cIheader.push_back(stateList[i].getVNameCurr());
02019
02020 StateObsAct* soa = mymap[stateList[i].getVNameCurr()];
02021 State* s = static_cast<State*> (soa);
02022 numCIValues.push_back(s->getValueEnum().size());
02023
02024 }
02025 }
02026
02027 SharedPointer<SparseTable> expandedTable (new SparseTable(cIheader, sf->uIheader, numCIValues, sf->numUIValues));
02028 vector<int> newCI = expandedTable->getIterBegin();
02029 do{
02030
02031 vector<int> oldCI;
02032 for(int k=0;k<oldNumCI;k++){
02033 oldCI.push_back(newCI[k]);
02034 }
02035
02036 vector<SparseEntry>& curRow = sf->getSparseEntries(oldCI);
02037 for(int j=0;j<curRow.size();j++){
02038 expandedTable->add(newCI, curRow[j]);
02039 }
02040 }while(expandedTable->getNextCI(newCI));
02041
02042 sf = expandedTable;
02043 }
02044
02045 void FactoredPomdp::resortFactoredStateTables(ofstream& debugfile, bool printDebugFile, const int MIXEDTYPE) {
02046
02047 if (printDebugFile) {
02048 debugfile << "finalStateTable before factored re-sorting" << endl;
02049 finalStateTable->write(debugfile);
02050 debugfile << endl;
02051 }
02052
02053
02054 unsigned int pos;
02055 for (unsigned int i=0; i < actionList.size(); i++) {
02056 pos = finalStateTable->findPosition(actionList[i].getVName());
02057 finalStateTable->swapCIHeaders(i,pos);
02058 finalStateTable->swapSparseColumns(i,pos);
02059 }
02060
02061
02062 for (unsigned int i=0; i < stateList.size(); i++) {
02063 pos = finalStateTable->findPosition(stateList[i].getVNamePrev());
02064 finalStateTable->swapCIHeaders(i+actionList.size(),pos);
02065 finalStateTable->swapSparseColumns(i+actionList.size(),pos);
02066 }
02067
02068 if (MIXEDTYPE == MIXED_REPARAM) {
02069
02070 for (unsigned int i=0; i < stateList.size(); i++) {
02071 if (stateList[i].getObserved()) {
02072 pos = finalStateTable->findPosition(stateList[i].getVNameCurr());
02073 finalStateTable->swapCIHeaders(i+actionList.size()+stateList.size(),pos);
02074 finalStateTable->swapSparseColumns(i+actionList.size()+stateList.size(),pos);
02075 }
02076 }
02077 }
02078
02079 finalStateTable->sortEntries();
02080
02081 if(printDebugFile) {
02082 debugfile << "finalStateTable after factored re-sorting" << endl;
02083 finalStateTable->write(debugfile);
02084 debugfile << endl;
02085 }
02086
02087
02088 }
02089
02090
02091
02092 const void FactoredPomdp::defineCanonicalNames() {
02093
02094
02095
02096
02097 canonicalNamePrev.clear();
02098 canonicalNameCurr.clear();
02099 canonicalNameForTerminal.clear();
02100
02101
02102
02103 for (unsigned int j = 0; j < finalStateTable->cIheader.size(); j++) {
02104
02105 if (checkActionNameExists(finalStateTable->cIheader[j]))
02106 canonicalNamePrev.push_back(finalStateTable->cIheader[j]);
02107 else if (isPreviousTimeSlice(finalStateTable->cIheader[j]))
02108 canonicalNamePrev.push_back(finalStateTable->cIheader[j]);
02109 }
02110
02111 for (unsigned int j = 0; j < finalStateTable->cIheader.size(); j++) {
02112 if (checkActionNameExists(finalStateTable->cIheader[j])) {
02113 canonicalNameCurr.push_back(finalStateTable->cIheader[j]);
02114 }else{
02115 if (isPreviousTimeSlice(finalStateTable->cIheader[j])) {
02116 const State& s = findState(finalStateTable->cIheader[j]);
02117 canonicalNameCurr.push_back(s.getVNameCurr());
02118 }
02119 }
02120 }
02121
02122 for (unsigned int j = 0; j < finalStateTable->cIheader.size(); j++) {
02123 if (!(checkActionNameExists(finalStateTable->cIheader[j])))
02124 if (isPreviousTimeSlice(finalStateTable->cIheader[j]))
02125 canonicalNameForTerminal.push_back(finalStateTable->cIheader[j]);
02126 }
02127
02128
02129 }
02130
02131 void FactoredPomdp::mapActionsToValue() {
02132
02133 int increment = 1;
02134 for (int i = (int) actionList.size() - 1; i >= 0; i--) {
02135 actionStringIndexMap[actionList[i].getVName()] = increment;
02136 increment *= actionList[i].getValueEnum().size();
02137 }
02138 numActions = increment;
02139
02140 }
02141
02142 void FactoredPomdp::mapObservationsToValue() {
02143
02144 int increment = 1;
02145 for (int i = (int) observationList.size() - 1; i >= 0; i--) {
02146 observationStringIndexMap[observationList[i].getVName()] = increment;
02147 increment *= observationList[i].getValueEnum().size();
02148 }
02149 numObservations = increment;
02150 }
02151
02152
02153 void FactoredPomdp::mapObservationsUIsToValue(SharedPointer<SparseTable> st) {
02154
02155 observationUIIndexMap.clear();
02156
02157 for (unsigned int j = 0; j < st->uIheader.size(); j++) {
02158
02159 StateObsAct* soa = mymap[st->uIheader[j]];
02160 observationUIIndexMap[j] = observationStringIndexMap[st->uIheader[j]];
02161
02162 }
02163
02164 }
02165
02166 void FactoredPomdp::mapFactoredStatesToValue() {
02167
02171
02172 int incrementX = 1;
02173 int incrementY = 1;
02174
02175 for (int i = (int) stateList.size() - 1; i >= 0; i--) {
02176 const State& s = stateList[i];
02177 if (s.getObserved()) {
02178 positionXStringIndexMap[stateList[i].getVNamePrev()] = incrementX;
02179 positionXStringIndexMap[stateList[i].getVNameCurr()] = incrementX;
02180 incrementX *= stateList[i].getValueEnum().size();
02181 } else {
02182 positionYStringIndexMap[stateList[i].getVNamePrev()] = incrementY;
02183 positionYStringIndexMap[stateList[i].getVNameCurr()] = incrementY;
02184 incrementY *= stateList[i].getValueEnum().size();
02185 }
02186 }
02187
02188 numMergedStatesX = incrementX;
02189 numMergedStatesY = incrementY;
02190 }
02191
02192 void FactoredPomdp::mapFactoredCIsToValue(SharedPointer<SparseTable> st) {
02193
02194 factoredPositionCIIndexMap.clear();
02195
02196
02197
02198 for (unsigned int j = 0; j < st->cIheader.size(); j++) {
02199 if (!(checkActionNameExists(st->cIheader[j]))) {
02200 StateObsAct* soa = mymap[st->cIheader[j]];
02201 State* s = static_cast<State*> (soa);
02202 if (s->getObserved()) {
02203 factoredPositionCIIndexMap[j]
02204 = positionXStringIndexMap[st->cIheader[j]];
02205 } else {
02206 factoredPositionCIIndexMap[j]
02207 = positionYStringIndexMap[st->cIheader[j]];
02208 }
02209 }else{
02210 factoredPositionCIIndexMap[j] = actionStringIndexMap[st->cIheader[j]];
02211 }
02212 }
02213 }
02214
02215 void FactoredPomdp::mapFactoredStateUIsToValue(SharedPointer<SparseTable> st) {
02216
02217 factoredPositionUIIndexMap.clear();
02218
02219 for (unsigned int j = 0; j < st->uIheader.size(); j++) {
02220
02221 StateObsAct* soa = mymap[st->uIheader[j]];
02222 State* s = static_cast<State*> (soa);
02223 if (s->getObserved())
02224 factoredPositionUIIndexMap[j]
02225 = positionXStringIndexMap[st->uIheader[j]];
02226 else
02227 factoredPositionUIIndexMap[j]
02228 = positionYStringIndexMap[st->uIheader[j]];
02229
02230 }
02231
02232 }
02233
02234
02235 void FactoredPomdp::mapFactoredBeliefIndexesToValue(SharedPointer<SparseTable> st) {
02236
02237 factoredPositionCIIndexMap.clear();
02238 factoredPositionUIIndexMap.clear();
02239
02240
02241 for (unsigned int j = 0; j < st->cIheader.size(); j++) {
02242
02243 StateObsAct* soa = mymap[st->cIheader[j]];
02244 State* s = static_cast<State*> (soa);
02245 if (s->getObserved()) {
02246 factoredPositionCIIndexMap[j]
02247 = positionXStringIndexMap[st->cIheader[j]];
02248 } else {
02249 factoredPositionCIIndexMap[j]
02250 = positionYStringIndexMap[st->cIheader[j]];
02251 }
02252
02253 }
02254
02255 for (unsigned int j = 0; j < st->uIheader.size(); j++) {
02256
02257 StateObsAct* soa = mymap[st->uIheader[j]];
02258 State* s = static_cast<State*> (soa);
02259 if (s->getObserved())
02260 factoredPositionUIIndexMap[j]
02261 = positionXStringIndexMap[st->uIheader[j]];
02262 else
02263 factoredPositionUIIndexMap[j]
02264 = positionYStringIndexMap[st->uIheader[j]];
02265 }
02266
02267 }
02268
02269 void FactoredPomdp::convertFactoredStateReparamTrans() {
02270
02271 ofstream debugfile;
02272 if (DEBUGFACTOREDCONVERSIONSTATE) {
02273 debugfile.open("debug_FactoredPomdp_convertFactoredState.txt");
02274
02275 }
02276
02277 mapActionsToValue();
02278 mapFactoredStatesToValue();
02279
02280
02281
02282
02283 vvPreSparseMatrix cOstXTrPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesX, numMergedStatesY);
02284
02285 vvPreSparseMatrix cOstXPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, numMergedStatesX);
02286
02287
02288 vector<vvPreSparseMatrix> cOstYTrPre;
02289 for (unsigned int i = 0; i < numActions; i++) {
02290 vvPreSparseMatrix tempX = createVvPreSparseMatrix(numMergedStatesX, numMergedStatesX, numMergedStatesY, numMergedStatesY);
02291 cOstYTrPre.push_back(tempX);
02292 }
02293
02294
02295 vector<vvPreSparseMatrix > cOstYPre;
02296 for (unsigned int i = 0; i < numActions; i++) {
02297 vvPreSparseMatrix tempX = createVvPreSparseMatrix(numMergedStatesX, numMergedStatesX, numMergedStatesY, numMergedStatesY);
02298 cOstYPre.push_back(tempX);
02299 }
02300
02301 int action;
02302 int startStateX;
02303 int startStateY;
02304 int startStateX_prime;
02305
02306
02307 unsigned int *terminalStatesX = new unsigned int[numMergedStatesX];
02308 for (int i = 0; i < numMergedStatesX; i++)
02309 terminalStatesX[i] = 0;
02310
02311 unsigned int *terminalStatesY = new unsigned int[numMergedStatesY];
02312 for (int i = 0; i < numMergedStatesY; i++)
02313 terminalStatesY[i] = 0;
02314
02315 map<string, int> variableValues = getStartActionXYVarValues();
02316 action = startStateX = startStateX_prime = startStateY = 0;
02317 do{
02318
02319 vector<EndState> endStates;
02320 for(vector<Function>::iterator stateFunc=stateFunctionList.begin();stateFunc!=stateFunctionList.end();stateFunc++){
02321
02322
02323 SharedPointer<SparseTable> stateTable = stateFunc->sparseT;
02324
02325 vector<int> commonIndex;
02326 for(vector<string>::iterator cI=stateTable->cIheader.begin();cI!=stateTable->cIheader.end();cI++){
02327 commonIndex.push_back(variableValues[*cI]);
02328
02329 }
02330
02331 vector<SparseEntry> entries = stateTable->getSparseEntries(commonIndex);
02332
02333
02334 if(endStates.empty()){
02335 EndState es;
02336 es.endStateX = 0;
02337 es.endStateY = 0;
02338 es.probX = 1;
02339 es.probY = 1;
02340 endStates.push_back(es);
02341 }
02342 vector<EndState> temp = endStates;
02343 int lastSize = temp.size();
02344
02345
02346
02347 for(int i=1;i<entries.size();i++){
02348 endStates.insert(endStates.end(), temp.begin(), temp.end());
02349 }
02350
02351
02352 for(int i=0;i<entries.size();i++){
02353 SparseEntry se = entries[i];
02354 double probXmul, probYmul;
02355 int endStateXinc, endStateYinc;
02356 probXmul= probYmul= 1;
02357 endStateXinc = endStateYinc = 0;
02358
02359
02360 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
02361
02362 StateObsAct* soa = mymap[stateTable->uIheader[j]];
02363 State* s = static_cast<State*> (soa);
02364
02365 if (s->getObserved()) {
02366 endStateXinc += se.uniqueIndex[j].index * positionXStringIndexMap[stateTable->uIheader[j]];
02367 probXmul *= se.uniqueIndex[j].value;
02368 } else {
02369 endStateYinc += se.uniqueIndex[j].index * positionYStringIndexMap[stateTable->uIheader[j]];
02370 probYmul *= se.uniqueIndex[j].value;
02371 }
02372 }
02373
02374
02375
02376 for(int k=i*lastSize;k<(i+1)*lastSize;k++){
02377 endStates[k].endStateX += endStateXinc;
02378 endStates[k].endStateY += endStateYinc;
02379 endStates[k].probX *= probXmul;
02380 endStates[k].probY *= probYmul;
02381 }
02382 }
02383
02384 }
02385
02386 for(vector<EndState>::iterator es=endStates.begin();es!=endStates.end();es++){
02387
02388 cOstXTrPre[action][startStateX].addEntries(es->endStateX, startStateY, es->probX);
02389
02390 cOstXPre[action][startStateX].addEntries(startStateY, es->endStateX, es->probX);
02391
02392 cOstYTrPre[action][startStateX][startStateX_prime].addEntries(es->endStateY, startStateY, es->probY);
02393
02394 cOstYPre[action][startStateX][startStateX_prime].addEntries(startStateY, es->endStateY, es->probY);
02395
02396
02397 if ((es->endStateX == startStateX) && (fabs(es->probX - 1) < 0.000001))
02398 terminalStatesX[es->endStateX]++;
02399 if ((es->endStateY == startStateY) && (fabs(es->probY - 1) < 0.000001))
02400 terminalStatesY[es->endStateY]++;
02401
02402 }
02403 }while(getNextActionXXpYVarValues(variableValues,action,startStateX,startStateX_prime, startStateY));
02404
02405
02406 vvSparseMatrix cOstX = helperPreSparseMatrixToSparseMatrix(cOstXPre);
02407
02408 if (DEBUGFACTOREDCONVERSIONSTATE) {
02409 for (unsigned int i = 0; i < cOstX.size(); i++) {
02410 for (unsigned int j = 0; j < cOstX[0].size(); j++) {
02411 debugfile << "cOstX ( TX[a][x](y,x') ), action: " << i
02412 << " x: " << j << endl;
02413 cOstX[i][j]->write(debugfile);
02414 debugfile << endl;
02415 }
02416 }
02417 }
02418
02419
02420 vvSparseMatrix cOstXTr = helperPreSparseMatrixToSparseMatrix(cOstXTrPre);
02421
02422 if (DEBUGFACTOREDCONVERSIONSTATE) {
02423 for (unsigned int i = 0; i < cOstXTr.size(); i++) {
02424 for (unsigned int j = 0; j < cOstXTr[0].size(); j++) {
02425 debugfile << "cOstXTr ( TXtr[a][x](x',y) ), action: " << i
02426 << "x: " << j << endl;
02427 cOstXTr[i][j]->write(debugfile);
02428 debugfile << endl;
02429 }
02430 }
02431 }
02432
02433
02434 vector<vvSparseMatrix > cOstY = helperPreSparseMatrixToSparseMatrix(cOstYPre);
02435
02436 if (DEBUGFACTOREDCONVERSIONSTATE) {
02437 for (unsigned int i = 0; i < cOstY.size(); i++) {
02438 for (unsigned int j = 0; j < cOstY[0].size(); j++) {
02439 for (unsigned int k = 0; k < cOstY[0][0].size(); k++) {
02440 debugfile << "cOstY ( TY[a][x][x'](y,y') ), action: " << i << "x: "
02441 << j << "x': " << k << endl;
02442 cOstY[i][j][k]->write(debugfile);
02443 debugfile << endl;
02444 }
02445 }
02446 }
02447 }
02448
02449
02450
02451 vector<vvSparseMatrix > cOstYTr = helperPreSparseMatrixToSparseMatrix(cOstYTrPre);
02452
02453 if (DEBUGFACTOREDCONVERSIONSTATE) {
02454 for (unsigned int i = 0; i < cOstYTr.size(); i++) {
02455 for (unsigned int j = 0; j < cOstYTr[0].size(); j++) {
02456 for (unsigned int k = 0 ; k < cOstYTr[0][0].size(); k++) {
02457 debugfile << "cOstYTr ( TYtr[a][x][x'](y',y) ), action: " << i << "x: "
02458 << j << "x': " << k << endl;
02459 cOstYTr[i][j][k]->write(debugfile);
02460 debugfile << endl;
02461 }
02462 }
02463 }
02464 }
02465
02466
02467
02468 vector<int> termStatesX;
02469 for (int i = 0; i < numMergedStatesX; i++) {
02470
02471 if (terminalStatesX[i] == numActions * numMergedStatesY * numMergedStatesX)
02472 termStatesX.push_back(1);
02473 else
02474 termStatesX.push_back(0);
02475 }
02476
02477 vector<int> termStatesY;
02478 for (int i = 0; i < numMergedStatesY; i++) {
02479 if (terminalStatesY[i] == numActions * numMergedStatesX * numMergedStatesX)
02480 termStatesY.push_back(1);
02481 else
02482 termStatesY.push_back(0);
02483 }
02484
02485 if (DEBUGFACTOREDCONVERSIONSTATE) {
02486 debugfile << "terminal states X" << endl;
02487 for (unsigned int i = 0; i < termStatesX.size(); i++) {
02488 debugfile << termStatesX[i] << " ";
02489 }
02490
02491 debugfile << "\nterminal states Y" << endl;
02492 for (unsigned int i = 0; i < termStatesY.size(); i++) {
02493 debugfile << termStatesY[i] << " ";
02494 }
02495
02496 }
02497
02498 layer.TX = cOstX;
02499 layer.TXtr = cOstXTr;
02500 layer.TY_reparam = cOstY;
02501 layer.TYtr_reparam = cOstYTr;
02502
02503 layer.isPOMDPTerminalState.push_back(termStatesX);
02504 layer.isPOMDPTerminalState.push_back(termStatesY);
02505
02506 debugfile.close();
02507
02508
02509
02510 }
02511
02512
02513
02514 vvPreSparseMatrix FactoredPomdp::createVvPreSparseMatrix(int a, int b, int c, int d){
02515 vvPreSparseMatrix M;
02516 for (unsigned int i = 0; i <a; i++) {
02517 vector<PreSparseMatrix> temp;
02518 for (int j = 0; j < b; j++) {
02519 PreSparseMatrix cm(c, d);
02520 temp.push_back(cm);
02521 }
02522 M.push_back(temp);
02523 }
02524 return M;
02525 }
02526
02527 void FactoredPomdp::printSparseMatrix(string title, vvSparseMatrix M, ofstream& debugfile){
02528 if (DEBUGFACTOREDCONVERSIONSTATE) {
02529 for (unsigned int i = 0; i < M.size(); i++) {
02530 for (unsigned int j = 0; j < M[0].size(); j++) {
02531 debugfile << title << i
02532 << "x: " << j << endl;
02533 M[i][j]->write(debugfile);
02534 debugfile << endl;
02535 }
02536 }
02537 }
02538 }
02539
02540
02541 void FactoredPomdp::convertFactoredStateTrans()
02542 {
02543 ofstream debugfile;
02544 if (DEBUGFACTOREDCONVERSIONSTATE) {
02545 debugfile.open("debug_FactoredPomdp_convertFactoredState.txt");
02546
02547 }
02548 mapActionsToValue();
02549 mapFactoredStatesToValue();
02550
02551
02552
02553
02554 vvPreSparseMatrix cOstXTrPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesX, numMergedStatesY);
02555
02556 vvPreSparseMatrix cOstXPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, numMergedStatesX);
02557
02558 vvPreSparseMatrix cOstYTrPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, numMergedStatesY);
02559
02560 vvPreSparseMatrix cOstYPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, numMergedStatesY) ;
02561
02562 int action, startStateX, startStateY;
02563
02564
02565 unsigned int *terminalStatesX = new unsigned int[numMergedStatesX];
02566 for (int i = 0; i < numMergedStatesX; i++)
02567 terminalStatesX[i] = 0;
02568
02569 unsigned int *terminalStatesY = new unsigned int[numMergedStatesY];
02570 for (int i = 0; i < numMergedStatesY; i++)
02571 terminalStatesY[i] = 0;
02572
02573
02574
02575 map<string, int> variableValues = getStartActionXYVarValues();
02576 action = startStateX = startStateY = 0;
02577 do{
02578
02579 vector<EndState> endStates;
02580 for(vector<Function>::iterator stateFunc=stateFunctionList.begin();stateFunc!=stateFunctionList.end();stateFunc++){
02581
02582
02583 SharedPointer<SparseTable> stateTable = stateFunc->sparseT;
02584
02585 vector<int> commonIndex;
02586 for(vector<string>::iterator cI=stateTable->cIheader.begin();cI!=stateTable->cIheader.end();cI++){
02587 commonIndex.push_back(variableValues[*cI]);
02588
02589 }
02590
02591 vector<SparseEntry> entries = stateTable->getSparseEntries(commonIndex);
02592
02593
02594 if(endStates.empty()){
02595 EndState es;
02596 es.endStateX = 0;
02597 es.endStateY = 0;
02598 es.probX = 1;
02599 es.probY = 1;
02600 endStates.push_back(es);
02601 }
02602 vector<EndState> temp = endStates;
02603 int lastSize = temp.size();
02604
02605
02606
02607 for(int i=1;i<entries.size();i++){
02608 endStates.insert(endStates.end(), temp.begin(), temp.end());
02609 }
02610
02611
02612 for(int i=0;i<entries.size();i++){
02613 SparseEntry se = entries[i];
02614 double probXmul, probYmul;
02615 int endStateXinc, endStateYinc;
02616 probXmul= probYmul= 1;
02617 endStateXinc = endStateYinc = 0;
02618
02619
02620 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
02621
02622 StateObsAct* soa = mymap[stateTable->uIheader[j]];
02623 State* s = static_cast<State*> (soa);
02624
02625 if (s->getObserved()) {
02626 endStateXinc += se.uniqueIndex[j].index * positionXStringIndexMap[stateTable->uIheader[j]];
02627 probXmul *= se.uniqueIndex[j].value;
02628 } else {
02629 endStateYinc += se.uniqueIndex[j].index * positionYStringIndexMap[stateTable->uIheader[j]];
02630 probYmul *= se.uniqueIndex[j].value;
02631 }
02632 }
02633
02634
02635
02636 for(int k=i*lastSize;k<(i+1)*lastSize;k++){
02637 endStates[k].endStateX += endStateXinc;
02638 endStates[k].endStateY += endStateYinc;
02639 endStates[k].probX *= probXmul;
02640 endStates[k].probY *= probYmul;
02641 }
02642 }
02643
02644 }
02645
02646 for(vector<EndState>::iterator es=endStates.begin();es!=endStates.end();es++){
02647
02648 cOstXTrPre[action][startStateX].addEntries(es->endStateX, startStateY, es->probX);
02649
02650 cOstXPre[action][startStateX].addEntries(startStateY, es->endStateX, es->probX);
02651
02652 cOstYTrPre[action][startStateX].addEntries(es->endStateY, startStateY, es->probY);
02653
02654 cOstYPre[action][startStateX].addEntries(startStateY, es->endStateY, es->probY);
02655
02656
02657 if ((es->endStateX == startStateX) && (fabs(es->probX - 1) < 0.000001))
02658 terminalStatesX[es->endStateX]++;
02659 if ((es->endStateY == startStateY) && (fabs(es->probY - 1) < 0.000001))
02660 terminalStatesY[es->endStateY]++;
02661
02662 }
02663 }while(getNextActionXYVarValues(variableValues,action,startStateX,startStateY));
02664
02665
02666
02667 vvSparseMatrix cOstX = helperPreSparseMatrixToSparseMatrix(cOstXPre);
02668
02669 if (DEBUGFACTOREDCONVERSIONSTATE) {
02670 for (unsigned int i = 0; i < cOstX.size(); i++) {
02671 for (unsigned int j = 0; j < cOstX[0].size(); j++) {
02672 debugfile << "cOstX ( TX[a][x](y,x') ), action: " << i
02673 << " x: " << j << endl;
02674 cOstX[i][j]->write(debugfile);
02675 debugfile << endl;
02676 }
02677 }
02678 }
02679
02680
02681 vvSparseMatrix cOstXTr = helperPreSparseMatrixToSparseMatrix(cOstXTrPre);
02682 printSparseMatrix("cOstXTr ( TXtr[a][x](x',y) ), action: ", cOstXTr, debugfile);
02683
02684
02685 vvSparseMatrix cOstY = helperPreSparseMatrixToSparseMatrix(cOstYPre);
02686 printSparseMatrix("cOstY ( TY[a][x](y,y') ), action: ", cOstY, debugfile);
02687
02688
02689
02690 vvSparseMatrix cOstYTr = helperPreSparseMatrixToSparseMatrix(cOstYTrPre);
02691 printSparseMatrix("cOstYTr ( TYtr[a][x](y',y) ), action: ", cOstYTr, debugfile);
02692
02693
02694 vector<int> termStatesX;
02695 for (int i = 0; i < numMergedStatesX; i++) {
02696
02697 if (terminalStatesX[i] == numActions * numMergedStatesY)
02698 termStatesX.push_back(1);
02699 else
02700 termStatesX.push_back(0);
02701 }
02702
02703 vector<int> termStatesY;
02704 for (int i = 0; i < numMergedStatesY; i++) {
02705 if (terminalStatesY[i] == numActions * numMergedStatesX)
02706 termStatesY.push_back(1);
02707 else
02708 termStatesY.push_back(0);
02709 }
02710
02711 if (DEBUGFACTOREDCONVERSIONSTATE) {
02712 debugfile << "terminal states X" << endl;
02713 for (unsigned int i = 0; i < termStatesX.size(); i++) {
02714 debugfile << termStatesX[i] << " ";
02715 }
02716
02717 debugfile << "\nterminal states Y" << endl;
02718 for (unsigned int i = 0; i < termStatesY.size(); i++) {
02719 debugfile << termStatesY[i] << " ";
02720 }
02721
02722 }
02723
02724 layer.TX = cOstX;
02725 layer.TXtr = cOstXTr;
02726 layer.TY = cOstY;
02727 layer.TYtr = cOstYTr;
02728
02729 layer.isPOMDPTerminalState.push_back(termStatesX);
02730 layer.isPOMDPTerminalState.push_back(termStatesY);
02731
02732 debugfile.close();
02733 delete [] terminalStatesX;
02734 delete [] terminalStatesY;
02735
02736 }
02737
02738
02739 void FactoredPomdp::convertFactoredObsTrans()
02740 {
02741
02742 ofstream debugfile;
02743 if (DEBUGFACTOREDCONVERSIONOBS) {
02744 debugfile.open("debug_FactoredPomdp_convertFactoredObs.txt");
02745 }
02746
02747 mapObservationsToValue();
02748
02749
02750
02751 vvPreSparseMatrix cOobsTrPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numObservations, numMergedStatesY);
02752
02753 vvPreSparseMatrix cOobsPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, numObservations);
02754
02755
02756 int action;
02757 int endStateX;
02758 int endStateY;
02759 int observ;
02760
02761 map<string, int> variableValues = getStartActionXYVarValues();
02762 action = endStateX = endStateY = 0;
02763 do{
02764
02765 vector<IndexProbTuple> observations;
02766 for(vector<Function>::iterator obsFunc=observFunctionList.begin();obsFunc!=observFunctionList.end();obsFunc++){
02767
02768
02769 SharedPointer<SparseTable> obsTable = obsFunc->sparseT;
02770
02771 vector<int> commonIndex;
02772 for(vector<string>::iterator cI=obsTable->cIheader.begin();cI!=obsTable->cIheader.end();cI++){
02773 commonIndex.push_back(variableValues[*cI]);
02774
02775 }
02776 vector<SparseEntry> entries = obsTable->getSparseEntries(commonIndex);
02777
02778
02779 if(observations.empty()){
02780 IndexProbTuple pt;
02781 pt.index = 0;
02782 pt.prob = 1;
02783 observations.push_back(pt);
02784 }
02785 vector<IndexProbTuple> temp = observations;
02786 int lastSize = temp.size();
02787
02788
02789
02790 for(int i=1;i<entries.size();i++){
02791 observations.insert(observations.end(), temp.begin(), temp.end());
02792 }
02793
02794
02795 for(int i=0;i<entries.size();i++){
02796 SparseEntry se = entries[i];
02797 double probMul=1;
02798 int obsInc=0;
02799
02800
02801 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
02802 StateObsAct* soa = mymap[obsTable->uIheader[j]];
02803 ObsAct* s = static_cast<ObsAct*> (soa);
02804 obsInc += se.uniqueIndex[j].index * observationStringIndexMap[obsTable->uIheader[j]];
02805 probMul *= se.uniqueIndex[j].value;
02806 }
02807
02808
02809 for(int k=i*lastSize;k<(i+1)*lastSize;k++){
02810 observations[k].index += obsInc;
02811 observations[k].prob *= probMul;
02812 }
02813 }
02814 }
02815 for(vector<IndexProbTuple>::iterator pt=observations.begin();pt!=observations.end();pt++){
02816
02817 cOobsPre[action][endStateX].addEntries(endStateY, pt->index, pt->prob);
02818
02819 cOobsTrPre[action][endStateX].addEntries(pt->index, endStateY, pt->prob);
02820 }
02821
02822 }while(getNextActionXYVarValues(variableValues,action,endStateX,endStateY));
02823
02824
02825 vvSparseMatrix cOobs = helperPreSparseMatrixToSparseMatrix(cOobsPre);
02826
02827 if (DEBUGFACTOREDCONVERSIONOBS) {
02828 for (unsigned int i = 0; i < cOobs.size(); i++) {
02829 for (unsigned int j = 0; j < cOobs[0].size(); j++) {
02830 debugfile << "cOobs ( O[a][x'](y',o) ), action: " << i
02831 << " x' : " << j << endl;
02832 cOobs[i][j]->write(debugfile);
02833 debugfile << endl;
02834 }
02835 }
02836 }
02837
02838
02839 vvSparseMatrix cOobsTr = helperPreSparseMatrixToSparseMatrix(cOobsTrPre);
02840
02841 if (DEBUGFACTOREDCONVERSIONOBS) {
02842 for (unsigned int i = 0; i < cOobsTr.size(); i++) {
02843 for (unsigned int j = 0; j < cOobsTr[0].size(); j++) {
02844 debugfile << "cOobsTr ( Otr[a][x'](o,y') ), action: " << i
02845 << " x' : " << j << endl;
02846 cOobsTr[i][j]->write(debugfile);
02847 debugfile << endl;
02848 }
02849 }
02850 }
02851
02852 layer.O = cOobs;
02853 layer.Otr = cOobsTr;
02854 debugfile.close();
02855 }
02856
02857
02858 void FactoredPomdp::convertFactoredNoObservationsVariables() {
02859
02860 numObservations = 1;
02861
02862 ofstream debugfile;
02863 if (DEBUGFACTOREDCONVERSIONOBS) {
02864 debugfile.open("debug_FactoredPomdp_convertFactoredObs.txt");
02865 }
02866 debugfile << "convertFactoredNoObservationsVariables()" << endl;
02867
02868
02869
02870 vvPreSparseMatrix cOobsTrPre = createVvPreSparseMatrix(numActions, numMergedStatesX, 1, numMergedStatesY);
02871
02872 vvPreSparseMatrix cOobsPre = createVvPreSparseMatrix(numActions, numMergedStatesX, numMergedStatesY, 1);
02873
02874
02875
02876 for (unsigned int i = 0; i < numActions; i++) {
02877 for (unsigned int j = 0 ; j < numMergedStatesX; j++) {
02878 for (unsigned int k = 0; k < numMergedStatesY; k++) {
02879
02880 cOobsPre[i][j].addEntries(k, 0, 1.0);
02881
02882 cOobsTrPre[i][j].addEntries(0, k, 1.0);
02883 }
02884 }
02885 }
02886
02887
02888 vvSparseMatrix cOobs = helperPreSparseMatrixToSparseMatrix(cOobsPre);
02889
02890 if (DEBUGFACTOREDCONVERSIONOBS) {
02891 for (unsigned int i = 0; i < cOobs.size(); i++) {
02892 for (unsigned int j = 0; j < cOobs[0].size(); j++) {
02893 debugfile << "cOobs ( O[a][x'](y',o) ), action: " << i
02894 << " x' : " << j << endl;
02895 cOobs[i][j]->write(debugfile);
02896 debugfile << endl;
02897 }
02898 }
02899 }
02900
02901
02902 vvSparseMatrix cOobsTr = helperPreSparseMatrixToSparseMatrix(cOobsTrPre);
02903
02904 if (DEBUGFACTOREDCONVERSIONOBS) {
02905 for (unsigned int i = 0; i < cOobsTr.size(); i++) {
02906 for (unsigned int j = 0; j < cOobsTr[0].size(); j++) {
02907 debugfile << "cOobsTr ( Otr[a][x'](o,y') ), action: " << i
02908 << " x' : " << j << endl;
02909 cOobsTr[i][j]->write(debugfile);
02910 debugfile << endl;
02911 }
02912 }
02913 }
02914
02915 layer.O = cOobs;
02916 layer.Otr = cOobsTr;
02917 debugfile.close();
02918
02919 }
02920
02921
02922
02923 vvSparseMatrix FactoredPomdp::helperPreSparseMatrixToSparseMatrix(vvPreSparseMatrix precm)
02924 {
02925 vvSparseMatrix results;
02926 for (unsigned int i = 0; i < precm.size(); i++)
02927 {
02928 vector<SharedPointer<SparseMatrix> > temp;
02929 for (unsigned int j = 0; j < precm[i].size(); j++)
02930 {
02931 temp.push_back(precm[i][j].convertSparseMatrix());
02932 }
02933 results.push_back(temp);
02934 }
02935 return results;
02936 }
02937
02938 vector<vvSparseMatrix > FactoredPomdp::helperPreSparseMatrixToSparseMatrix(vector<vvPreSparseMatrix >precm)
02939 {
02940 vector<vvSparseMatrix > results;
02941 for (unsigned int i = 0; i < precm.size(); i++) {
02942 vvSparseMatrix temp_vec;
02943 for (unsigned int j = 0; j < precm[0].size(); j++) {
02944 vector<SharedPointer<SparseMatrix> > temp;
02945 for(unsigned int k=0 ; k < precm[0][0].size(); k++) {
02946 temp.push_back(precm[i][j][k].convertSparseMatrix());
02947 }
02948 temp_vec.push_back(temp);
02949 }
02950 results.push_back(temp_vec);
02951 }
02952 return results;
02953 }
02954
02955 void FactoredPomdp::convertFactoredRewardTrans() {
02956
02957 ofstream debugfile;
02958 if (DEBUGFACTOREDCONVERSIONREWARD) {
02959 debugfile.open("debug_FactoredPomdp_convertFactoredReward.txt");
02960 }
02961
02962 if (!checkRewardFunctionHasOnlyPreviousTimeSliceAndAction()) {
02963 preprocessRewardFunction();
02964 }
02965
02966
02967
02968 vector<PreSparseMatrix> cOrewardPre;
02969 for (int i = 0; i < numMergedStatesX; i++) {
02970 PreSparseMatrix cm(numMergedStatesY, numActions);
02971 cOrewardPre.push_back(cm);
02972 }
02973
02974 int action;
02975 int startStateX;
02976 int startStateY;
02977 double reward;
02978 map<string, int> variableValues = getStartActionXYVarValues();
02979 action = startStateX = startStateY = 0;
02980 do{
02981
02982 double reward = 0;
02983 for(vector<Function>::iterator rewardFunc=rewardFunctionList.begin();rewardFunc!=rewardFunctionList.end();rewardFunc++){
02984
02985 SharedPointer<SparseTable> rewardTable = rewardFunc->sparseT;
02986
02987 vector<int> commonIndex;
02988 for(vector<string>::iterator cI=rewardTable->cIheader.begin();cI!=rewardTable->cIheader.end();cI++){
02989 commonIndex.push_back(variableValues[*cI]);
02990 }
02991 vector<SparseEntry>& entries = rewardTable->getSparseEntries(commonIndex);
02992
02993 for(int i=0;i<entries.size();i++){
02994 SparseEntry se = entries[i];
02995
02996
02997 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
02998 reward += se.uniqueIndex[j].value;
02999 }
03000 }
03001 }
03002 if (reward != 0) {
03003 cOrewardPre[startStateX].addEntries(startStateY,action, reward);
03004 }
03005 }while(getNextActionXYVarValues(variableValues,action,startStateX,startStateY));
03006
03007 vector<SharedPointer<SparseMatrix> > cOreward;
03008 for (unsigned int i = 0; i < cOrewardPre.size(); i++) {
03009 cOreward.push_back(cOrewardPre[i].convertSparseMatrix());
03010 if (DEBUGFACTOREDCONVERSIONREWARD) {
03011 debugfile << "R[x] (y,a): startx " << i << endl;
03012 cOreward[i]->write(debugfile);
03013 debugfile << endl;
03014 }
03015 }
03016 layer.R = cOreward;
03017 debugfile.close();
03018 }
03019
03020 void FactoredPomdp::convertFactoredTerminalStateReward()
03021 {
03022 ofstream debugfile;
03023 if (DEBUGFACTOREDCONVERSIONTERMINAL) {
03024 debugfile.open("debug_FactoredPomdp_convertFactoredTerminalStateReward.txt");
03025 }
03026 PreSparseMatrix terminalPre(numMergedStatesX, numMergedStatesY);
03027
03028 int startStateX;
03029 int startStateY;
03030 startStateX = startStateY = 0;
03031 double reward;
03032 map<string, int> variableValues = getStartXYVarValues();
03033 SparseEntry se;
03034 do{
03035 bool exist = false;
03036
03037 double reward = 0;
03038 for(vector<Function>::iterator tRewardFunc=terminalStateRewardFunctionList.begin();tRewardFunc!=terminalStateRewardFunctionList.end();tRewardFunc++){
03039
03040
03041 SharedPointer<SparseTable> tRewardTable = tRewardFunc->sparseT;
03042
03043 vector<int> commonIndex;
03044 for(vector<string>::iterator cI=tRewardTable->cIheader.begin();cI!=tRewardTable->cIheader.end();cI++){
03045 commonIndex.push_back(variableValues[*cI]);
03046 }
03047 vector<SparseEntry>& entries = tRewardTable->getSparseEntries(commonIndex);
03048
03049 for(int i=0;i<entries.size();i++){
03050 SparseEntry se = entries[i];
03051 exist = true;
03052
03053 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
03054 reward += se.uniqueIndex[j].value;
03055 }
03056 }
03057 }
03058
03059 if(exist){
03060 terminalPre.addEntries(startStateX,startStateY, reward);
03061 }
03062 }while(getNextXYVarValues(variableValues, startStateX, startStateY));
03063
03064 SharedPointer<SparseMatrix> terminalReward;
03065
03066 terminalReward = terminalPre.convertSparseMatrix();
03067 if (DEBUGFACTOREDCONVERSIONTERMINAL) {
03068 debugfile << "R(x, y)" << endl;
03069 terminalReward->write(debugfile);
03070 debugfile << endl;
03071 }
03072
03073 layer.terminalStateReward = terminalReward;
03074 debugfile.close();
03075
03076 }
03077
03078 void FactoredPomdp::convertFactoredBeliefCommon(ofstream& debugfile, bool printDebugFile) {
03079
03080 preprocessBeliefTables(debugfile, DEBUGFACTOREDCONVERSIONSTATE);
03081 mergeBeliefTables(debugfile, DEBUGFACTOREDCONVERSIONSTATE);
03082
03083
03084 mapFactoredBeliefIndexesToValue(finalBeliefTable);
03085
03086 }
03087
03088 void FactoredPomdp::convertFactoredBelief() {
03089
03090 ofstream debugfile;
03091 if (DEBUGFACTOREDCONVERSIONBELIEF)
03092 debugfile.open("debug_FactoredPomdp_convertFactoredBelief.txt");
03093 if (DEBUGFACTOREDCONVERSIONBELIEF)
03094 debugfile << "Converting Factored Belief State" << endl;
03095
03096 convertFactoredBeliefCommon(debugfile, DEBUGFACTOREDCONVERSIONBELIEF);
03097
03098 SparseVector cvX(numMergedStatesX);
03099 vector<double> vec_cvX(numMergedStatesX, 0.0);
03100 SparseVector cvY(numMergedStatesY);
03101 vector<double> vec_cvY(numMergedStatesY, 0.0);
03102
03103 int startX, startY, endX, endY;
03104 double probX, probY;
03105
03106 SparseEntry se;
03107 while(finalBeliefTable->getNext(se)){
03108 startX = startY = endX = endY = 0;
03109 probX = probY = 1.0;
03110
03111 vector<int> commonIndex = finalBeliefTable->getIterPosition();
03112
03113 for (unsigned int j = 0; j < commonIndex.size(); j++) {
03114
03115 StateObsAct* soa = mymap[finalBeliefTable->cIheader[j]];
03116 State* s = static_cast<State*> (soa);
03117
03118 if (s->getObserved())
03119 startX += commonIndex[j] * factoredPositionCIIndexMap[j];
03120 else
03121 startY += commonIndex[j] * factoredPositionCIIndexMap[j];
03122 }
03123
03124
03125 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
03126
03127 StateObsAct* soa = mymap[finalBeliefTable->uIheader[j]];
03128 State* s = static_cast<State*> (soa);
03129
03130 if (s->getObserved()) {
03131 endX += se.uniqueIndex[j].index * factoredPositionUIIndexMap[j];
03132 probX *= se.uniqueIndex[j].value;
03133 } else {
03134 endY += se.uniqueIndex[j].index * factoredPositionUIIndexMap[j];
03135 probY *= se.uniqueIndex[j].value;
03136 }
03137 }
03138
03139 if (startX != endX) {
03140 cerr << "for initial belief state, the entries should have common indexes correspoding to unique indexes" << endl;
03141 assert(false);
03142 }
03143 if (startY != endY) {
03144 cerr << "for initial belief state, the entries should have common indexes correspoding to unique indexes" << endl;
03145 assert(false);
03146 }
03147
03148 vec_cvX[startX] = probX;
03149 vec_cvY[startY] = probY;
03150
03151 }
03152
03153 int numRandomPositions = 0;
03154 for (unsigned int i=0 ; i < vec_cvX.size(); i++) {
03155 if (fabs(vec_cvX[i]) > 0.000001) {
03156 layer.initialStateX = i;
03157 numRandomPositions++;
03158 cvX.push_back(i, vec_cvX[i]);
03159 }
03160 }
03161
03162 for (unsigned int i=0 ; i < vec_cvY.size(); i++) {
03163 if (fabs(vec_cvY[i]) > 0.000001)
03164 cvY.push_back(i, vec_cvY[i]);
03165 }
03166
03167 if (numRandomPositions > 1)
03168 layer.initialStateX = -1;
03169
03170 layer.initialBeliefX = cvX;
03171 layer.initialBeliefY = cvY;
03172
03173
03174 if (DEBUGFACTOREDCONVERSIONBELIEF) {
03175 debugfile << "inital state X: " << layer.initialStateX << endl;
03176 debugfile << "initial belief X " << endl;
03177 layer.initialBeliefX.write(debugfile);
03178 debugfile << "\ninitial belief Y " << endl;
03179 layer.initialBeliefY.write(debugfile);
03180 }
03181 debugfile << endl;
03182 debugfile.close();
03183
03184 }
03185
03186
03187 void FactoredPomdp::convertFactoredBeliefReparam() {
03188
03189 ofstream debugfile;
03190 if (DEBUGFACTOREDCONVERSIONBELIEF)
03191 debugfile.open("debug_FactoredPomdp_convertFactoredBelief.txt");
03192 if (DEBUGFACTOREDCONVERSIONBELIEF)
03193 debugfile << "Converting Factored Belief State REPARAM" << endl;
03194
03195 convertFactoredBeliefCommon(debugfile, DEBUGFACTOREDCONVERSIONBELIEF);
03196
03197 SparseVector cvX(numMergedStatesX);
03198 vector<double> vec_cvX(numMergedStatesX, 0.0);
03199
03200 vector<SparseVector> cvY;
03201 for (unsigned int i=0; i < numMergedStatesX; i++) {
03202 SparseVector cv(numMergedStatesY);
03203 cvY.push_back(cv);
03204 }
03205 vector<vector<double> > vec_cvY;
03206 for (unsigned int i=0; i < numMergedStatesX; i++) {
03207 vector<double> vec_cv(numMergedStatesY,0.0);
03208 vec_cvY.push_back(vec_cv);
03209 }
03210
03211
03212 int startX, startY, endX, endY;
03213 double probX, probY;
03214 SparseEntry se;
03215 while(finalBeliefTable->getNext(se)){
03216 startX = startY = endX = endY = 0;
03217 probX = probY = 1.0;
03218 vector<int> commonIndex = finalBeliefTable->getIterPosition();
03219
03220
03221 for (unsigned int j = 0; j < commonIndex.size(); j++) {
03222
03223 StateObsAct* soa = mymap[finalBeliefTable->cIheader[j]];
03224 State* s = static_cast<State*> (soa);
03225
03226 if (s->getObserved())
03227 startX += commonIndex[j] * factoredPositionCIIndexMap[j];
03228 else
03229 startY += commonIndex[j] * factoredPositionCIIndexMap[j];
03230 }
03231
03232
03233 for (unsigned int j = 0; j < se.uniqueIndex.size(); j++) {
03234
03235 StateObsAct* soa = mymap[finalBeliefTable->uIheader[j]];
03236 State* s = static_cast<State*> (soa);
03237
03238 if (s->getObserved()) {
03239 endX += se.uniqueIndex[j].index * factoredPositionUIIndexMap[j];
03240 probX *= se.uniqueIndex[j].value;
03241 } else {
03242 endY += se.uniqueIndex[j].index * factoredPositionUIIndexMap[j];
03243 probY *= se.uniqueIndex[j].value;
03244 }
03245 }
03246
03247 if (startX != endX) {
03248 cerr << "for initial belief state, the entries should have common indexes correspoding to unique indexes" << endl;
03249 assert(false);
03250 }
03251 if (startY != endY) {
03252 cerr << "for initial belief state, the entries should have common indexes correspoding to unique indexes" << endl;
03253 assert(false);
03254 }
03255
03256 vec_cvX[startX] = probX;
03257 vec_cvY[startX][startY] = probY;
03258 }
03259 finalBeliefTable = NULL;
03260
03261 int numRandomPositions = 0;
03262 for (unsigned int i=0 ; i < vec_cvX.size(); i++) {
03263 if (fabs(vec_cvX[i]) > 0.000001) {
03264 layer.initialStateX = i;
03265 numRandomPositions++;
03266 cvX.push_back(i, vec_cvX[i]);
03267 }
03268 }
03269
03270 for (unsigned int i=0 ; i < vec_cvY.size(); i++) {
03271 for (unsigned int j=0; j < vec_cvY[0].size(); j++) {
03272 if (fabs(vec_cvY[i][j]) > 0.000001)
03273 cvY[i].push_back(j, vec_cvY[i][j]);
03274 }
03275 }
03276
03277 if (numRandomPositions > 1)
03278 layer.initialStateX = -1;
03279
03280 layer.initialBeliefX = cvX;
03281 layer.initialBeliefY_reparam = cvY;
03282
03283 if (DEBUGFACTOREDCONVERSIONBELIEF) {
03284 debugfile << "inital state X: " << layer.initialStateX << endl;
03285 debugfile << "initial belief X " << endl;
03286 layer.initialBeliefX.write(debugfile);
03287 debugfile << "\ninitial belief Y reparam" << endl;
03288 for (unsigned int i=0; i < layer.initialBeliefY_reparam.size(); i++) {
03289 debugfile << "when x is " << i << endl;
03290 layer.initialBeliefY_reparam[i].write(debugfile);
03291 }
03292 }
03293 debugfile << endl;
03294 debugfile.close();
03295
03296 }
03297
03298
03299
03300 void FactoredPomdp::preprocessBeliefTables(ofstream& debugfile, bool printDebugFile)
03301 {
03302 if(preprocessBeliefTablesDone)
03303 {
03304 return;
03305 }
03306 preprocessBeliefTablesDone = true;
03307
03308 for (unsigned int i = 0; i < beliefFunctionList.size(); i++) {
03309
03310 if (printDebugFile) {
03311 debugfile << "belief function before preprocess" << i << endl;
03312 beliefFunctionList[i].write(debugfile);
03313 debugfile << endl;
03314 }
03315 }
03316
03317
03318 processedBeliefFunctionList.clear();
03319 for(unsigned int i=0; i < beliefFunctionList.size(); i++) {
03320 processedBeliefFunctionList.push_back(beliefFunctionList[i]);
03321 }
03322
03323 for (unsigned int i = 0; i < processedBeliefFunctionList.size(); i++) {
03324
03325 vector<string> parents = processedBeliefFunctionList[i].getParents();
03326 SharedPointer<SparseTable> old = processedBeliefFunctionList[i].sparseT;
03327 vector<string> newParent;
03328 vector<int> newNumCIValues;
03329 if (!((parents.size() == 1) && (parents[0] == "null"))) {
03330
03331 newParent = processedBeliefFunctionList[i].getParents();
03332 newNumCIValues = old->numCIValues;
03333 }
03334 newParent.push_back(processedBeliefFunctionList[i].getVNameCurr());
03335 processedBeliefFunctionList[i].setParents(newParent);
03336
03337
03338
03339
03340 newNumCIValues.insert(newNumCIValues.end(), old->numUIValues.begin(), old->numUIValues.end());
03341 processedBeliefFunctionList[i].sparseT = SharedPointer<SparseTable> (new SparseTable(newParent, old->uIheader, newNumCIValues, old->numUIValues));
03342
03343 old->sortEntries();
03344 SparseEntry se;
03345 old->resetIterator();
03346 vector<int> newCI;
03347 while(old->getNext(se)){
03348 newCI = old->getIterPosition();
03349 if( (parents.size() == 1) && (parents[0] == "null") ){
03350 newCI.clear();
03351 }
03352 newCI.push_back(se.uniqueIndex[0].index);
03353 processedBeliefFunctionList[i].sparseT->add(newCI, se);
03354 }
03355 }
03356
03357 for (unsigned int i = 0; i < processedBeliefFunctionList.size(); i++) {
03358
03359 if (printDebugFile) {
03360 debugfile << "belief function after preprocess" << i << endl;
03361 processedBeliefFunctionList[i].write(debugfile);
03362 debugfile << endl;
03363 }
03364 }
03365
03366 }
03367
03368 void FactoredPomdp::mergeBeliefTables(ofstream& debugfile, bool printDebugFile) {
03369
03370 unsigned int pos;
03371
03372 finalBeliefTable = processedBeliefFunctionList[0].sparseT;
03373 SharedPointer<SparseTable> finalBeliefTable2;
03374 for (unsigned int i = 1; i < processedBeliefFunctionList.size(); i++) {
03375
03376 finalBeliefTable = SparseTable::join(*finalBeliefTable,
03377 *processedBeliefFunctionList[i].sparseT, BELIEFFUNCTION);
03378
03379 if (printDebugFile) {
03380 debugfile << "Intermediate finalBeliefTable " << i << endl;
03381 finalBeliefTable->write(debugfile);
03382 debugfile << endl;
03383 }
03384
03385 }
03386
03387 }
03388
03389
03390 void FactoredPomdp::convertFactoredVariables() {
03391
03392 ofstream debugfile;
03393 if (DEBUGFACTOREDCONVERSIONOTHERS)
03394 debugfile.open("debug_FactoredPomdp_convertFactoredOthers.txt");
03395
03396 layer.discount = discount;
03397 if (DEBUGFACTOREDCONVERSIONOTHERS)
03398 debugfile << "\ndiscount: " << layer.discount << endl;
03399
03400 layer.numActions = numActions;
03401 if (DEBUGFACTOREDCONVERSIONOTHERS)
03402 debugfile << "num actions: " << layer.numActions << endl;
03403
03404 layer.numObservations = numObservations;
03405 if (DEBUGFACTOREDCONVERSIONOTHERS)
03406 debugfile << "num observations: " << layer.numObservations << endl;
03407
03408 layer.numStatesObs = 1;
03409 layer.numStatesUnobs = 1;
03410
03411 for (unsigned int i = 0; i < stateList.size(); i++) {
03412 if (stateList[i].getObserved())
03413 layer.numStatesObs *= stateList[i].getValueEnum().size();
03414 else
03415 layer.numStatesUnobs *= stateList[i].getValueEnum().size();
03416 }
03417
03418 if (DEBUGFACTOREDCONVERSIONOTHERS)
03419 debugfile << "num states obs: " << layer.numStatesObs << endl;
03420 if (DEBUGFACTOREDCONVERSIONOTHERS)
03421 debugfile << "num states unobs: " << layer.numStatesUnobs << endl;
03422
03423 debugfile.close();
03424 }
03425
03426
03427 SharedPointer<SparseTable> FactoredPomdp::expandObsRewSparseTable(SharedPointer<SparseTable> st, int whichFunction) {
03428
03429
03430
03431
03432 vector<string> canonicalValues;
03433 if (whichFunction == OBSERVFUNCTION) {
03434 canonicalValues = canonicalNameCurr;
03435 }else if (whichFunction == REWARDFUNCTION) {
03436 canonicalValues = canonicalNamePrev;
03437 }else if (whichFunction == TERMINALFUNCTION) {
03438 canonicalValues = canonicalNameForTerminal;
03439 }else {
03440 assert(false);
03441 }
03442
03443 vector<string> cIheader, uIheader;
03444 vector<int> numUIValues, numCIValues;
03445 cIheader = st->cIheader;
03446 uIheader = st->uIheader;
03447 numUIValues = st->numUIValues;
03448 numCIValues = st->numCIValues;
03449
03450 for (unsigned int i = 0; i < canonicalValues.size() ; i++) {
03451 if (!(st->containsCI(canonicalValues[i]))) {
03452 cIheader.push_back(canonicalValues[i]);
03453 StateObsAct* soa = mymap[canonicalValues[i]];
03454 numCIValues.push_back(soa->getValueEnum().size());
03455
03456 }
03457 }
03458
03459 SharedPointer<SparseTable> newSt (new SparseTable(cIheader, uIheader, numCIValues, numUIValues));
03460
03461 SparseEntry se;
03462
03463
03464
03465 vector<int> newStPos = newSt->getIterBegin();
03466 bool hasNext = true;
03467 while(hasNext){
03468
03469 vector<int> oldStPos;
03470 for(int i=0;i<st->cIheader.size();i++){
03471 oldStPos.push_back(newStPos[i]);
03472 }
03473 vector<SparseEntry> entries = st->getSparseEntries(oldStPos);
03474
03475 for(int i=0;i<entries.size();i++){
03476 newSt->add(newStPos, entries[i]);
03477 }
03478
03479 hasNext = newSt->getNextCI(newStPos);
03480 }
03481
03482 return newSt;
03483 }
03484
03485
03486
03488
03489
03490 void FactoredPomdp::preprocessRewardFunction()
03491 {
03492 ofstream debugfile;
03493 if (DEBUGFACTOREDCONVERSIONREWARD) {
03494 debugfile.open("debug_FactoredPomdp_preprocessRewardTable.txt");
03495 }
03496
03497 vector<SharedPointer<SparseTable> > rewardTables;
03498
03499
03500 for (unsigned int i=0; i < rewardFunctionList.size(); i++) {
03501 debugfile << "reward function " << i <<endl;
03502
03503
03504
03505 vector<Function> functionsDepend;
03506 set<string> curTimeVars = getRewardFunctionCurrentTimeSliceVars(&rewardFunctionList[i] );
03507 for(set<string>::iterator it=curTimeVars.begin();it!=curTimeVars.end();it++)
03508 {
03509 functionsDepend.push_back(*mapFunc[*it]);
03510 }
03511 functionsDepend.push_back(rewardFunctionList[i]);
03512
03513
03514 SharedPointer<SparseTable> rewardTable = mergeTables(&functionsDepend, BELIEFFUNCTION, debugfile, DEBUGFACTOREDCONVERSIONREWARD);
03515 rewardTable->write(debugfile);
03516
03517 debugfile << "after reducing common indexes that are not matched with unique indexes" << endl;
03518 rewardTable = reduceUnmatchedCIWithUI(rewardTable, debugfile, DEBUGFACTOREDCONVERSIONREWARD);
03519 rewardTable->write(debugfile);
03520
03521 debugfile << "after removing redundant unique indexes fron reward table" << endl;
03522 rewardTable = removeRedundantUIsFromReward(rewardTable);
03523 rewardTable->write(debugfile);
03524
03525
03526 rewardFunctionList[i].sparseT = rewardTable;
03527 }
03528 }
03529
03530 SharedPointer<SparseTable> FactoredPomdp::removeRedundantUIsFromReward(SharedPointer<SparseTable> st) {
03531
03532
03533 vector<bool> indexesToReward;
03534 vector<string> newUIHeader;
03535 vector<string> newCIHeader;
03536 vector<int> newNumCIValues;
03537 vector<int> newNumUIValues;
03538 for (unsigned int i=0; i < st->uIheader.size(); i++) {
03539 if (checkRewardNameExists(st->uIheader[i])) {
03540 newUIHeader.push_back(st->uIheader[i]);
03541 newNumUIValues.push_back(st->numUIValues[i]);
03542 indexesToReward.push_back(true);
03543 }else {
03544 indexesToReward.push_back(false);
03545 }
03546 }
03547
03548 SharedPointer<SparseTable> st2(new SparseTable(st->cIheader, newUIHeader, st->numCIValues, newNumUIValues));
03549
03550 double prob;
03551 SparseEntry se;
03552 st->resetIterator();
03553 while(st->getNext(se)){
03554 SparseEntry newSe;
03555 vector<int> ci = st->getIterPosition();
03556 prob = 1.0;
03557 for (unsigned int j=0; j < se.uniqueIndex.size(); j++) {
03558 if (!(indexesToReward[j])) prob *= se.uniqueIndex[j].value;
03559 }
03560
03561 for (unsigned int j=0; j < se.uniqueIndex.size(); j++) {
03562 if ((indexesToReward[j])) {
03563 UniqueIndex uq;
03564 uq.index = 0;
03565 uq.value = se.uniqueIndex[j].value * prob;
03566 newSe.uniqueIndex.push_back(uq);
03567 }
03568 }
03569 st2->add(ci, newSe);
03570 }
03571 return st2;
03572 }
03573
03574
03576
03577
03578
03579 map<string, string> FactoredPomdp::getActionsSymbols(int actionNum) {
03580
03581 cout << "getActionsSymbols" << endl;
03582
03583 map<string, string> result;
03584
03585 int quotient, remainder;
03586 quotient = actionNum;
03587 for (int i = (int) actionList.size() - 1; i >= 0; i--) {
03588
03589 ObsAct act = actionList[i];
03590
03591 remainder = quotient % act.getValueEnum().size();
03592 result[act.getVName()] = act.getValueEnum()[remainder];
03593 quotient = quotient / act.getValueEnum().size();
03594
03595 }
03596 return result;
03597 }
03598
03599
03600 map<string, string> FactoredPomdp::getFactoredObservedStatesSymbols(int stateNum) {
03601
03602 cout << "getFactoredObservedStatesSymbols" << endl;
03603
03604 map<string, string> result;
03605
03606 int quotient, remainder;
03607 quotient = stateNum;
03608 for (int i = (int) stateList.size() - 1; i >= 0; i--) {
03609
03610 State s = stateList[i];
03611 if (s.getObserved()) {
03612
03613 remainder = quotient % s.getValueEnum().size();
03614 result[s.getVNamePrev()] = s.getValueEnum()[remainder];
03615 result[s.getVNameCurr()] = s.getValueEnum()[remainder];
03616 quotient = quotient / s.getValueEnum().size();
03617 }
03618 }
03619 return result;
03620 }
03621
03622
03623 map<string, string> FactoredPomdp::getFactoredUnobservedStatesSymbols(int stateNum) {
03624
03625 cout << "getFactoredUnObservedStatesSymbols" << endl;
03626
03627 map<string, string> result;
03628
03629 int quotient, remainder;
03630 quotient = stateNum;
03631 for (int i = (int) stateList.size() - 1; i >= 0; i--) {
03632
03633 State s = stateList[i];
03634 if (!(s.getObserved())) {
03635
03636 remainder = quotient % s.getValueEnum().size();
03637 result[s.getVNamePrev()] = s.getValueEnum()[remainder];
03638 result[s.getVNameCurr()] = s.getValueEnum()[remainder];
03639 quotient = quotient / s.getValueEnum().size();
03640 }
03641 }
03642 return result;
03643 }
03644
03645 map<string, string> FactoredPomdp::getObservationsSymbols(int observationNum) {
03646
03647 cout << "getObservationsSymbols" << endl;
03648
03649 map<string, string> result;
03650
03651 int quotient, remainder;
03652 quotient = observationNum;
03653 for (int i = (int) observationList.size() - 1; i >= 0; i--) {
03654
03655 ObsAct obs = observationList[i];
03656
03657 remainder = quotient % obs.getValueEnum().size();
03658 result[obs.getVName()] = obs.getValueEnum()[remainder];
03659 quotient = quotient / obs.getValueEnum().size();
03660
03661 }
03662 return result;
03663 }
03664
03665
03666
03667 map<string, int> FactoredPomdp::getStartActionXYVarValues(){
03668 map<string, int> varValues = getStartXYVarValues();
03669 for(int i=0;i<actionList.size();i++){
03670 varValues[actionList[i].getVName()] = 0;
03671 }
03672 return varValues;
03673 }
03674
03675 map<string, int> FactoredPomdp::getStartXYVarValues(){
03676 map<string, int> varValues;
03677 for(int i=0;i<stateList.size();i++){
03678 varValues[stateList[i].getVNamePrev()] = 0;
03679 varValues[stateList[i].getVNameCurr()] = 0;
03680 }
03681 return varValues;
03682 }
03683
03684
03685
03686 map<string, int> FactoredPomdp::getStartActionSVarValues(){
03687 map<string, int> varValues = getStartSVarValues();
03688 for(int i=0;i<actionList.size();i++){
03689 varValues[actionList[i].getVName()] = 0;
03690 }
03691 return varValues;
03692 }
03693
03694 map<string, int> FactoredPomdp::getStartSVarValues(){
03695 map<string, int> varValues;
03696 for(int i=0;i<stateList.size();i++){
03697 varValues[stateList[i].getVNamePrev()] = 0;
03698 varValues[stateList[i].getVNameCurr()] = 0;
03699 }
03700 return varValues;
03701 }
03702
03703
03704 bool FactoredPomdp::getNextActionXYVarValues(map<string, int> &curValues, int &action, int &stateX, int &stateY){
03705
03706 if(getNextXYVarValues(curValues, stateX, stateY)){
03707 return true;
03708 }
03709
03710 for(int i=actionList.size()-1;i>=0;i--){
03711 ObsAct act = actionList[i];
03712 if(curValues[act.getVName()] >= act.getValueEnum().size() -1){
03713
03714 curValues[act.getVName()]=0;
03715 }
03716 else{
03717 curValues[act.getVName()]++;
03718 action++;
03719 return true;
03720 }
03721 }
03722 action=0;
03723
03724 return false;
03725 }
03726
03727
03728 bool FactoredPomdp::getNextActionXXpYVarValues(map<string, int> &curValues, int &action, int &stateX, int &stateXp, int &stateY){
03729
03730 for(int i=stateList.size()-1;i>=0;i--){
03731 State s = stateList[i];
03732 if (!s.getObserved()) {
03733 if(curValues[s.getVNamePrev()] >= s.getValueEnum().size()-1){
03734
03735 curValues[s.getVNamePrev()]=0;
03736 curValues[s.getVNameCurr()]=0;
03737 }
03738 else{
03739 curValues[s.getVNamePrev()]++;
03740 curValues[s.getVNameCurr()]++;
03741 stateY++;
03742 return true;
03743 }
03744 }
03745 }
03746
03747 stateY=0;
03748
03749
03750 for(int i=stateList.size()-1;i>=0;i--){
03751 State s = stateList[i];
03752 if (s.getObserved()) {
03753
03754 if(curValues[s.getVNamePrev()] >= s.getValueEnum().size() -1){
03755
03756 curValues[s.getVNamePrev()]=0;
03757 }
03758 else{
03759 curValues[s.getVNamePrev()]++;
03760 stateX++;
03761 return true;
03762 }
03763 }
03764 }
03765 stateX=0;
03766
03767
03768 for(int i=stateList.size()-1;i>=0;i--){
03769 State s = stateList[i];
03770 if (s.getObserved()) {
03771
03772 if(curValues[s.getVNameCurr()] >= s.getValueEnum().size() -1){
03773
03774 curValues[s.getVNameCurr()]=0;
03775 }
03776 else{
03777 curValues[s.getVNameCurr()]++;
03778 stateXp++;
03779 return true;
03780 }
03781 }
03782 }
03783 stateXp =0;
03784
03785
03786 for(int i=actionList.size()-1;i>=0;i--){
03787 ObsAct act = actionList[i];
03788 if(curValues[act.getVName()] >= act.getValueEnum().size() -1){
03789
03790 curValues[act.getVName()]=0;
03791 }
03792 else{
03793 curValues[act.getVName()]++;
03794 action++;
03795 return true;
03796 }
03797 }
03798 action=0;
03799
03800 return false;
03801 }
03802
03803
03804 bool FactoredPomdp::getNextXYVarValues(map<string, int> &curValues, int &stateX, int &stateY){
03805
03806 for(int i=stateList.size()-1;i>=0;i--){
03807 const State& s = stateList[i];
03808 if (!s.getObserved()) {
03809 if(curValues[s.getVNamePrev()] >= s.getValueEnum().size()-1){
03810
03811 curValues[s.getVNamePrev()]=0;
03812 curValues[s.getVNameCurr()]=0;
03813 }
03814 else{
03815 curValues[s.getVNamePrev()]++;
03816 curValues[s.getVNameCurr()]++;
03817 stateY++;
03818 return true;
03819 }
03820 }
03821 }
03822
03823 stateY=0;
03824
03825
03826 for(int i=stateList.size()-1;i>=0;i--){
03827 const State& s = stateList[i];
03828 if (s.getObserved()) {
03829
03830 if(curValues[s.getVNamePrev()] >= s.getValueEnum().size() -1){
03831
03832 curValues[s.getVNamePrev()]=0;
03833 curValues[s.getVNameCurr()]=0;
03834 }
03835 else{
03836 curValues[s.getVNamePrev()]++;
03837 stateX++;
03838 curValues[s.getVNameCurr()]++;
03839 return true;
03840 }
03841 }
03842 }
03843
03844 stateX=0;
03845 return false;
03846 }
03847
03848
03849 bool FactoredPomdp::getNextActionSVarValues(map<string, int> &curValues, int &action, int &stateNum){
03850
03851 if(getNextSVarValues(curValues, stateNum)){
03852 return true;
03853 }
03854
03855 for(int i=actionList.size()-1;i>=0;i--){
03856 ObsAct act = actionList[i];
03857 if(curValues[act.getVName()] >= act.getValueEnum().size() -1){
03858
03859 curValues[act.getVName()]=0;
03860 }
03861 else{
03862 curValues[act.getVName()]++;
03863 action++;
03864 return true;
03865 }
03866 }
03867 action=0;
03868
03869 return false;
03870 }
03871
03872
03873 bool FactoredPomdp::getNextSVarValues(map<string, int> &curValues,int &stateNum){
03874
03875 for(int i=stateList.size()-1;i>=0;i--){
03876 State s = stateList[i];
03877 if(curValues[s.getVNamePrev()] >= s.getValueEnum().size()-1){
03878
03879 curValues[s.getVNamePrev()]=0;
03880 curValues[s.getVNameCurr()]=0;
03881 }
03882 else{
03883 curValues[s.getVNamePrev()]++;
03884 curValues[s.getVNameCurr()]++;
03885 stateNum++;
03886 return true;
03887 }
03888 }
03889
03890 stateNum=0;
03891 return false;
03892 }
03893