00001 #include <fstream>
00002 #include <cfloat>
00003 #include <cstdlib>
00004 #include <cstdio>
00005 extern "C"{
00006 #include "xml_parse_lib.h"
00007 }
00008 #include "AlphaVectorPolicy.h"
00009 #include "AlphaPlanePool.h"
00010 #include "AlphaPlanePoolSet.h"
00011
00012 #define MaxStr 2048
00013
00014
00015 using namespace std;
00016 using namespace momdp;
00017
00018 AlphaVectorPolicy::AlphaVectorPolicy(SharedPointer<MOMDP> problem) : valueAction(0)
00019 {
00020 this->problem = problem;
00021 this->policyFile = policyFile;
00022 alphaPlanePoolSet = new AlphaPlanePoolSet(NULL);
00023 alphaPlanePoolSet->setProblem(problem);
00024 alphaPlanePoolSet->initialize();
00025 }
00026
00027
00028
00029 static std::string stripWhiteSpace(const std::string& s)
00030 {
00031 string::size_type p1, p2;
00032 p1 = s.find_first_not_of(" \t");
00033 if (string::npos == p1) {
00034 return s;
00035 } else {
00036 p2 = s.find_last_not_of(" \t")+1;
00037 return s.substr(p1, p2-p1);
00038 }
00039 }
00040
00041 int AlphaVectorPolicy::getValueAction() {
00042 return this->valueAction;
00043 }
00044
00045 bool AlphaVectorPolicy::readFromFile(const std::string& inFileName)
00046 {
00047
00048 char tag[MaxStr], contents[MaxStr], tagname[MaxStr], attrname[MaxStr], value[MaxStr];
00049 float x1, y1, z1, x2, y2, z2, t0, t1;
00050 int linum=0;
00051 FILE *infile=0, *outfile=0;
00052
00053 infile = fopen(inFileName.c_str(),"r");
00054 if(infile==0)
00055 {
00056 cerr << "ERROR: couldn't open " << inFileName << " for reading " << endl;
00057 exit(EXIT_FAILURE);
00058 }
00059 xml_parse( infile, tag, contents, MaxStr, &linum );
00060 xml_grab_tag_name( tag, tagname, MaxStr );
00061 xml_grab_attrib( tag, attrname, value, MaxStr );
00062 while(value[0] != '\0'){
00063 xml_grab_attrib( tag, attrname, value, MaxStr );
00064 }
00065
00066 xml_parse( infile, tag, contents, MaxStr, &linum );
00067 xml_grab_tag_name( tag, tagname, MaxStr );
00068 if(string(tagname)!="Policy"){
00069 cerr << "ERROR:\n\tExpected Policy tag as root" << endl;
00070 exit(EXIT_FAILURE);
00071 }
00072
00073 xml_grab_attrib( tag, attrname, value, MaxStr );
00074 while(value[0] != '\0'){
00075 if(string(attrname)=="type" && string(value)!="value"){
00076 cerr << "ERROR:\n\tOnly policy of type \"value\" is supported" << endl;
00077 exit(EXIT_FAILURE);
00078 }
00079 xml_grab_attrib( tag, attrname, value, MaxStr );
00080 }
00081 xml_parse( infile, tag, contents, MaxStr, &linum );
00082 xml_grab_tag_name( tag, tagname, MaxStr );
00083
00084
00085 if(string(tagname)!="AlphaVector"){
00086 cerr << "ERROR:\n\tExpected AlphaVector tag" << endl;
00087 exit(EXIT_FAILURE);
00088 }
00089 int vectorLength = -1;
00090
00091 xml_grab_attrib( tag, attrname, value, MaxStr );
00092
00093 while(value[0] != '\0'){
00094 if(string(attrname)=="vectorLength")
00095 {
00096 vectorLength = atoi(value);
00097 }
00098 xml_grab_attrib( tag, attrname, value, MaxStr );
00099 }
00100
00101 if(vectorLength == -1){
00102 cerr << "ERROR:\n\tCannot find integer attribute vectorLength in AlphaVector Tag" << endl;
00103 exit(EXIT_FAILURE);
00104 }
00105 if(vectorLength != problem->YStates->size()){
00106 cerr << "ERROR:\n\tVector length does not match problem unobserved state size" << endl;
00107 exit(EXIT_FAILURE);
00108 }
00109
00110 AlphaPlane plane;
00111 double entryVal;
00112 xml_parse_tag_only( infile, tag, MaxStr, &linum );
00113 xml_grab_tag_name( tag, tagname, MaxStr);
00114
00115
00116
00117
00118 string str2,str3, str4;
00119 size_t pos;
00120 str2 = string(tag);
00121 pos = str2.find("action");
00122 str3 = str2.substr(pos+8);
00123 pos = str3.find("\"");
00124 str4 = str3.substr(0,pos);
00125 this->valueAction = atoi(str4.c_str());
00126
00127
00128 while( tag[0]!='\0'){
00129
00130 plane.alpha->resize(problem->YStates->size());
00131 bool foundAct = false; bool foundObs = false;
00132 xml_grab_attrib( tag, attrname, value, MaxStr );
00133 while(value[0] != '\0'){
00134 if(string(attrname)=="action"){
00135 plane.action = atoi(value);
00136 foundAct = true;
00137 }
00138 else if(string(attrname)=="obsValue"){
00139 plane.sval = atoi(value);
00140 foundObs = true;
00141 }
00142 xml_grab_attrib( tag, attrname, value, MaxStr );
00143 }
00144
00145 if(!foundAct){
00146 cerr << "ERROR:\n\tCannot find integer attribute action in Vector tag" << endl;
00147 assert(false);
00148 exit(EXIT_FAILURE);
00149 }
00150 if(!foundObs){
00151 cerr << "ERROR:\n\tCannot find integer attribute obsValue in Vector tag" << endl;
00152 exit(EXIT_FAILURE);
00153 }
00154
00155 if(plane.sval >= alphaPlanePoolSet->set.size())
00156 {
00157 cerr << "Policy file has more observed state than given POMDP model's, are you using the correct policy file?" << endl;
00158 exit(EXIT_FAILURE);
00159 }
00160
00161
00162 if(string(tagname)=="Vector"){
00163 FOR(i, vectorLength){
00164 char dvalue[200];
00165 if(fscanf(infile, "%s", &dvalue)==EOF){
00166 cerr << "ERROR:\n\tVector is too short, are you using the correct policy file?" << endl;
00167 exit(EXIT_FAILURE);
00168 }
00169 plane.alpha->data[i] =atof(dvalue);
00170 }
00171 alphaPlanePoolSet->set[plane.sval]->planes.push_back(plane.duplicate());
00172 }
00173 else if(string(tagname)=="SparseVector"){
00174 xml_parse( infile, tag, contents, MaxStr, &linum );
00175 xml_grab_tag_name( tag, tagname, MaxStr);
00176 while(string(tagname)=="Entry"){
00177 double value;int index;
00178 sscanf(contents, "%d %f", &index, &value);
00179 plane.alpha->data[index] = value;
00180
00181 xml_parse( infile, tag, contents, MaxStr, &linum );
00182 xml_parse( infile, tag, contents, MaxStr, &linum );
00183 xml_grab_tag_name( tag, tagname, MaxStr);
00184 }
00185 alphaPlanePoolSet->set[plane.sval]->planes.push_back(plane.duplicate());
00186 }
00187 xml_parse(infile, tag,contents, MaxStr, &linum );
00188 xml_parse_tag_only( infile, tag, MaxStr, &linum );
00189 xml_grab_tag_name( tag, tagname, MaxStr);
00190 }
00191 fclose(infile);
00192
00193 return true;
00194 }
00195
00196 int AlphaVectorPolicy::getBestAction(BeliefWithState& b)
00197 {
00198 double maxValue;
00199 return getBestAction(b, maxValue);
00200 }
00201
00202 int AlphaVectorPolicy::getBestAction(BeliefWithState& b, REAL_VALUE& maxValue)
00203 {
00204 SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[b.sval]->getBestAlphaPlane1(b.bvec);
00205 maxValue = inner_prod(*bestAlpha->alpha, *b.bvec);
00206 return bestAlpha->action;
00207 }
00208
00209 int AlphaVectorPolicy::getBestActionLookAhead(BeliefWithState& b)
00210 {
00211 double maxValue;
00212 return getBestActionLookAhead(b, maxValue);
00213 }
00214
00215 int AlphaVectorPolicy::getBestActionLookAhead(BeliefWithState& b, REAL_VALUE& maxValue)
00216 {
00217 double maxActionLB = -DBL_MAX;
00218 int maxAction = 0;
00219 obs_prob_vector opv;
00220 SharedPointer<BeliefWithState> nextb;
00221 obsState_prob_vector spv;
00222 SharedPointer<SparseVector> jspv (new SparseVector());
00223
00224 DEBUG_TRACE(cout << "getBestActionLookAhead" << endl; );
00225
00226 for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00227 {
00228 int a = aIter.index();
00229 DEBUG_TRACE(cout << "a " << a << endl; );
00230
00231 double sum = 0.0;
00232 double immediateReward = problem->rewards->getReward(b, a);
00233
00234 if (problem->XStates->size() == 1)
00235 {
00236 DEBUG_TRACE(cout << "spv1 " << endl; );
00237 spv.resize(1);
00238 spv.push_back(0, 1.0);
00239 }
00240 else
00241 {
00242 DEBUG_TRACE(cout << "spv2 " << endl; );
00243 problem->getObsStateProbVector(spv, b, a);
00244 }
00245
00246 DEBUG_TRACE( cout << "spv " << endl; );
00247 DEBUG_TRACE( spv.write(cout) << endl; );
00248
00249 FOR(Xn, spv.size())
00250 {
00251 DEBUG_TRACE(cout << "Xn " << Xn << endl; );
00252 double sprob = spv(Xn);
00253 if (sprob > OBS_IS_ZERO_EPS)
00254 {
00255 problem->getJointUnobsStateProbVector(*jspv, &b, a, Xn);
00256
00257 DEBUG_TRACE( cout << "jspv " << endl; );
00258 DEBUG_TRACE( jspv->write(cout) << endl; );
00259
00260
00261
00262 problem->getObsProbVectorFast(opv, a, Xn, *jspv);
00263
00264
00265 DEBUG_TRACE( cout << "opv " << endl; );
00266 DEBUG_TRACE( opv.write(cout) << endl; );
00267
00268
00269
00270 FOR(o, opv.size())
00271 {
00272 DEBUG_TRACE(cout << "o " << o << endl; );
00273
00274 double oprob = opv(o);
00275 if (oprob > OBS_IS_ZERO_EPS)
00276 {
00277 double obsProb = oprob * sprob;
00278
00279
00280 nextb = problem->beliefTransition->nextBelief2(NULL, a, o, Xn, jspv);
00281
00282 DEBUG_TRACE( cout << "nextb sval" << nextb->sval << endl; );
00283 DEBUG_TRACE( nextb->bvec->write(cout) << endl; );
00284
00285 SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->getBestAlphaPlane1(*nextb);
00286 double childLB = inner_prod(*bestAlpha->alpha, *nextb->bvec);
00287 sum += childLB * obsProb;
00288
00289 DEBUG_TRACE( cout << "childLB " << childLB << endl; );
00290 DEBUG_TRACE( cout << "sum " << sum << endl; );
00291
00292
00293
00294 }
00295 }
00296 }
00297 }
00298 sum *= problem->getDiscount();
00299 sum += immediateReward;
00300
00301 DEBUG_TRACE( cout << "sum " << sum << endl; );
00302
00303 if(sum > maxActionLB)
00304 {
00305 maxActionLB = sum;
00306 maxAction = a;
00307 DEBUG_TRACE( cout << "maxAction = " << maxAction << endl; );
00308 }
00309 assert(maxActionLB != -DBL_MAX);
00310 }
00311 return maxAction;
00312 }
00313
00314
00315 int AlphaVectorPolicy::getBestActionLookAhead(std::vector<belief_vector>& belYs, DenseVector& belX)
00316 {
00317 unsigned int bestAction = 0;
00318 double bestValue, observationValue, actionValue;
00319
00320
00321 SharedPointer<BeliefWithState> nextStB (new BeliefWithState());
00322 SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00323
00324 FOR (a, problem->getNumActions())
00325 {
00326 actionValue = 0;
00327
00328 FOR (o, problem->observations->size())
00329 {
00330 observationValue = 0;
00331 FOR (xn, problem->XStates->size())
00332 {
00333
00334 if (!((problem->obsProb->getMatrix(a, xn))->isColumnEmpty(o)))
00335 {
00336 SparseVector jspv_sum;
00337 jspv_sum.resize(problem->YStates->size());
00338 belief_vector next_belY;
00339
00340 bool nonzero_p_xn = false;
00341
00342 FOR (xc, problem->XStates->size())
00343 {
00344 if (!(belX(xc) == 0))
00345 {
00346
00347
00348 if (!((problem->XTrans->getMatrix(a, xc))->isColumnEmpty(xn)))
00349 {
00350 DenseVector tmp, tmp1;
00351 DenseVector Bc;
00352 SparseVector jspv;
00353
00354 copy(Bc, belYs[xc]);
00355
00356 emult_column( tmp, *(problem->XTrans->getMatrix(a, xc)), xn, Bc );
00357
00358 mult( tmp1, *(problem->YTrans->getMatrixTr(a, xc, xn)), tmp );
00359 copy(jspv, tmp1);
00360
00361
00362 jspv *= belX(xc);
00363 jspv_sum += jspv;
00364
00365 if (nonzero_p_xn == false) nonzero_p_xn = true;
00366 }
00367 }
00368 }
00369
00370 if (nonzero_p_xn)
00371 {
00372 emult_column( next_belY, *(problem->obsProb->getMatrix(a, xn)), o, jspv_sum);
00373
00374
00375 if (!(next_belY.data.size() == 0)) {
00376
00377
00378 double jp_xn_and_o = next_belY.norm_1();
00379
00380
00381 next_belY *= (1.0/next_belY.norm_1());
00382
00383 nextStB->sval = xn;
00384
00385
00386 *nextStB->bvec = next_belY;
00387
00388
00389 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec);
00390
00391
00392 observationValue += jp_xn_and_o * childLB;
00393 }
00394 }
00395 }
00396
00397 }
00398
00399 actionValue += observationValue;
00400 }
00401
00402 actionValue = problem->getDiscount() * actionValue;
00403
00404 FOR (xc, problem->XStates->size())
00405 {
00406 if (!(belX(xc) == 0))
00407 {
00408 currStB->sval = xc;
00409
00410 *currStB->bvec = belYs[xc];
00411
00412
00413 actionValue += (belX(xc) * problem->rewards->getReward(*currStB, a));
00414 }
00415 }
00416
00417
00418 if (a == 0)
00419 {
00420 bestValue = actionValue;
00421 }
00422 else
00423 {
00424 if (actionValue > bestValue)
00425 {
00426 bestValue = actionValue;
00427 bestAction = a;
00428 }
00429 }
00430 }
00431 return bestAction;
00432 }
00433
00434
00435 int AlphaVectorPolicy::getBestActionLookAhead(SharedPointer<belief_vector>& belY, DenseVector& belX)
00436 {
00437 unsigned int bestAction = 0;
00438 double bestValue, observationValue, actionValue;
00439
00440
00441 SharedPointer<BeliefWithState> nextStB (new BeliefWithState());
00442 SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00443
00444 FOR (a, problem->getNumActions())
00445 {
00446 actionValue = 0;
00447
00448 FOR (o, problem->observations->size())
00449 {
00450 observationValue = 0;
00451 FOR (xn, problem->XStates->size())
00452 {
00453
00454 if (!((problem->obsProb->getMatrix(a, xn))->isColumnEmpty(o)))
00455 {
00456 SparseVector jspv_sum;
00457 jspv_sum.resize(problem->YStates->size());
00458 belief_vector next_belY;
00459
00460 bool nonzero_p_xn = false;
00461
00462 FOR (xc, problem->XStates->size())
00463 {
00464 if (!(belX(xc) == 0))
00465 {
00466
00467
00468 if (!((problem->XTrans->getMatrix(a, xc))->isColumnEmpty(xn)))
00469 {
00470 DenseVector tmp, tmp1;
00471 DenseVector Bc;
00472 SparseVector jspv;
00473
00474 copy(Bc, *belY);
00475
00476 emult_column( tmp, *(problem->XTrans->getMatrix(a, xc)), xn, Bc );
00477
00478 mult( tmp1, *(problem->YTrans->getMatrixTr(a, xc, xn)), tmp );
00479 copy(jspv, tmp1);
00480
00481
00482 jspv *= belX(xc);
00483 jspv_sum += jspv;
00484
00485 if (nonzero_p_xn == false) nonzero_p_xn = true;
00486 }
00487 }
00488 }
00489
00490 if (nonzero_p_xn)
00491 {
00492 emult_column( next_belY, *(problem->obsProb->getMatrix(a, xn)), o, jspv_sum);
00493
00494
00495 if (!(next_belY.data.size() == 0)) {
00496
00497
00498 double jp_xn_and_o = next_belY.norm_1();
00499
00500
00501 next_belY *= (1.0/next_belY.norm_1());
00502
00503 nextStB->sval = xn;
00504
00505
00506 *nextStB->bvec = next_belY;
00507
00508
00509 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec);
00510
00511
00512 observationValue += jp_xn_and_o * childLB;
00513 }
00514 }
00515 }
00516
00517 }
00518
00519 actionValue += observationValue;
00520 }
00521
00522 actionValue = problem->getDiscount() * actionValue;
00523
00524 FOR (xc, problem->XStates->size())
00525 {
00526 if (!(belX(xc) == 0))
00527 {
00528 currStB->sval = xc;
00529
00530 *currStB->bvec = *belY;
00531
00532 actionValue += (belX(xc) * problem->rewards->getReward(*currStB, a));
00533 }
00534 }
00535
00536
00537 if (a == 0)
00538 {
00539 bestValue = actionValue;
00540 }
00541 else
00542 {
00543 if (actionValue > bestValue)
00544 {
00545 bestValue = actionValue;
00546 bestAction = a;
00547 }
00548 }
00549 }
00550 return bestAction;
00551 }
00552
00553
00554
00555 int AlphaVectorPolicy::getBestActionLookAhead_alternative(std::vector<belief_vector>& b, DenseVector& belX)
00556 {
00557 unsigned int bestAction = 0;
00558 double bestValue, xValue, actionValue;
00559
00560 obs_prob_vector spv, opv;
00561
00562 SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00563 SharedPointer<BeliefWithState> nextStB;
00564
00565
00566 FOR (a, problem->getNumActions())
00567 {
00568 actionValue = 0;
00569 FOR (xc, problem->XStates->size())
00570 {
00571
00572 if (!(belX(xc) == 0))
00573 {
00574
00575 xValue = 0;
00576 currStB->sval = xc;
00577
00578 *currStB->bvec = b[xc];
00579
00580 problem->getObsStateProbVector(spv, *currStB, a);
00581
00582 FOR (xn, spv.size())
00583 {
00584 double sprob = spv(xn);
00585 if (sprob > OBS_IS_ZERO_EPS)
00586 {
00587 problem->getObsProbVector(opv, *currStB, a, xn);
00588
00589
00590
00591 FOR(o, opv.size())
00592 {
00593
00594 double oprob = opv(o);
00595 if (oprob > OBS_IS_ZERO_EPS)
00596 {
00597
00598 nextStB = problem->beliefTransition->nextBelief(currStB, a, o, xn);
00599
00600 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec);
00601
00602
00603 xValue += oprob * sprob * childLB;
00604
00605 }
00606 }
00607 }
00608 }
00609
00610 xValue = problem->rewards->getReward(*currStB, a) + problem->getDiscount() * xValue;
00611 actionValue += (belX(xc) * xValue);
00612 }
00613 }
00614
00615 if (a == 0)
00616 {
00617 bestValue = actionValue;
00618 }
00619 else
00620 {
00621 if (actionValue > bestValue)
00622 {
00623 bestValue = actionValue;
00624 bestAction = a;
00625 }
00626
00627 }
00628
00629 }
00630 return bestAction;
00631 }
00632
00633
00634
00635 int AlphaVectorPolicy::getBestActionLookAhead_alternative(SharedPointer<belief_vector>& b, DenseVector& belX)
00636 {
00637
00638 unsigned int bestAction = 0;
00639 double bestValue, xValue, actionValue;
00640
00641 obs_prob_vector spv, opv;
00642 SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00643 currStB->bvec = b;
00644
00645 SharedPointer<BeliefWithState> nextStB;
00646
00647 FOR (a, problem->getNumActions())
00648 {
00649 actionValue = 0;
00650 FOR (xc, problem->XStates->size())
00651 {
00652
00653 if (!(belX(xc) == 0))
00654 {
00655
00656 xValue = 0;
00657 currStB->sval = xc;
00658
00659
00660 problem->getObsStateProbVector(spv, *currStB, a);
00661
00662 FOR (xn, spv.size())
00663 {
00664 double sprob = spv(xn);
00665 if (sprob > OBS_IS_ZERO_EPS)
00666 {
00667 problem->getObsProbVector(opv, *currStB, a, xn);
00668
00669
00670
00671 FOR(o, opv.size())
00672 {
00673
00674 double oprob = opv(o);
00675 if (oprob > OBS_IS_ZERO_EPS)
00676 {
00677
00678 nextStB = problem->beliefTransition->nextBelief(currStB, a, o, xn);
00679
00680 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec);
00681
00682
00683 xValue += oprob * sprob * childLB;
00684
00685 }
00686 }
00687 }
00688 }
00689
00690 xValue = problem->rewards->getReward(*currStB, a) + problem->getDiscount() * xValue;
00691 actionValue += (belX(xc) * xValue);
00692 }
00693 }
00694 if (a == 0)
00695 {
00696 bestValue = actionValue;
00697 }
00698 else
00699 {
00700 if (actionValue > bestValue)
00701 {
00702 bestValue = actionValue;
00703 bestAction = a;
00704 }
00705
00706 }
00707
00708 }
00709 return bestAction;
00710 }
00711
00712
00713
00714 int AlphaVectorPolicy::getBestAction(SharedPointer<belief_vector>& b, DenseVector& belX)
00715 {
00716 int maxAction;
00717 double maxValue = -DBL_MAX;
00718
00719 FOR (xc, problem->XStates->size())
00720 {
00721 if (!(belX(xc) == 0))
00722 {
00723
00724 SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[xc]->getBestAlphaPlane1(b);;
00725 double xvalue = inner_prod(*bestAlpha->alpha, *b);
00726 xvalue *= belX(xc) ;
00727 if(xvalue > maxValue)
00728 {
00729 maxValue = xvalue;
00730 maxAction = bestAlpha->action;
00731 }
00732 }
00733 }
00734 return maxAction;
00735 }
00736
00737
00738
00739 int AlphaVectorPolicy::getBestAction(std::vector<belief_vector>& belYs, DenseVector& belX)
00740 {
00741 int maxAction;
00742 double maxValue = -DBL_MAX;
00743
00744 SharedPointer<belief_vector> Bc (new belief_vector());
00745
00746 FOR (xc, problem->XStates->size())
00747 {
00748 if (!(belX(xc) == 0))
00749 {
00750 *Bc = belYs[xc];
00751
00752 SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[xc]->getBestAlphaPlane1(Bc);
00753
00754 double xvalue = inner_prod(*bestAlpha->alpha, *Bc);
00755
00756 xvalue *= belX(xc) ;
00757 if(xvalue > maxValue)
00758 {
00759 maxValue = xvalue;
00760 maxAction = bestAlpha->action;
00761 }
00762 }
00763 }
00764 return maxAction;
00765 }
00766