AlphaVectorPolicy.cpp
Go to the documentation of this file.
00001 #include <fstream>
00002 #include <cfloat>
00003 #include <cstdlib>
00004 #include <cstdio>
00005 extern "C"{
00006 #include "xml_parse_lib.h"
00007 }
00008 #include "AlphaVectorPolicy.h"
00009 #include "AlphaPlanePool.h"
00010 #include "AlphaPlanePoolSet.h"
00011 
00012 #define MaxStr 2048
00013 
00014 
00015 using namespace std;
00016 using namespace momdp;
00017 
00018 AlphaVectorPolicy::AlphaVectorPolicy(SharedPointer<MOMDP> problem) : valueAction(0)
00019 {
00020         this->problem = problem;
00021         this->policyFile = policyFile;
00022         alphaPlanePoolSet = new AlphaPlanePoolSet(NULL);
00023         alphaPlanePoolSet->setProblem(problem);
00024         alphaPlanePoolSet->initialize();
00025 }
00026 /**********************************************************************
00027 *READ POLICY
00028 **********************************************************************/
00029 static std::string stripWhiteSpace(const std::string& s)
00030 {
00031         string::size_type p1, p2;
00032         p1 = s.find_first_not_of(" \t");
00033         if (string::npos == p1) {
00034                 return s;
00035         } else {
00036                 p2 = s.find_last_not_of(" \t")+1;
00037                 return s.substr(p1, p2-p1);
00038         }
00039 }
00040 
00041 int AlphaVectorPolicy::getValueAction() {
00042     return this->valueAction;
00043 }
00044 
00045 bool AlphaVectorPolicy::readFromFile(const std::string& inFileName)
00046 {
00047     
00048     char tag[MaxStr], contents[MaxStr], tagname[MaxStr], attrname[MaxStr], value[MaxStr];
00049     float x1, y1, z1, x2, y2, z2, t0, t1;
00050     int linum=0;
00051     FILE *infile=0, *outfile=0;
00052 
00053     infile = fopen(inFileName.c_str(),"r");    
00054     if(infile==0)
00055     {
00056         cerr << "ERROR: couldn't open " << inFileName << " for reading " << endl;
00057         exit(EXIT_FAILURE);
00058     }
00059     xml_parse( infile, tag, contents, MaxStr, &linum );
00060     xml_grab_tag_name( tag, tagname, MaxStr );  /* Get tag name. */
00061     xml_grab_attrib( tag, attrname, value, MaxStr );    /* Get next attribute, if any. */
00062     while(value[0] != '\0'){
00063         xml_grab_attrib( tag, attrname, value, MaxStr );        /* Get next attribute, if any. */
00064     }
00065 
00066     xml_parse( infile, tag, contents, MaxStr, &linum );
00067     xml_grab_tag_name( tag, tagname, MaxStr );  /* Get tag name. */
00068     if(string(tagname)!="Policy"){
00069         cerr << "ERROR:\n\tExpected Policy tag as root" << endl;
00070         exit(EXIT_FAILURE);
00071     }
00072     
00073     xml_grab_attrib( tag, attrname, value, MaxStr );    /* Get any attributes within tag. */
00074     while(value[0] != '\0'){
00075         if(string(attrname)=="type" && string(value)!="value"){
00076             cerr << "ERROR:\n\tOnly policy of type \"value\" is supported" << endl;
00077             exit(EXIT_FAILURE);
00078         }
00079         xml_grab_attrib( tag, attrname, value, MaxStr );        /* Get next attribute, if any. */
00080     }
00081     xml_parse( infile, tag, contents, MaxStr, &linum );
00082     xml_grab_tag_name( tag, tagname, MaxStr );  /* Get tag name. */
00083     
00084     
00085     if(string(tagname)!="AlphaVector"){
00086         cerr << "ERROR:\n\tExpected AlphaVector tag" << endl;
00087         exit(EXIT_FAILURE);
00088     }
00089     int vectorLength = -1;
00090 
00091     xml_grab_attrib( tag, attrname, value, MaxStr );    /* Get any attributes within tag. */
00092 
00093     while(value[0] != '\0'){
00094         if(string(attrname)=="vectorLength")
00095         {
00096             vectorLength = atoi(value);
00097         }
00098         xml_grab_attrib( tag, attrname, value, MaxStr );
00099     }
00100 
00101     if(vectorLength == -1){
00102         cerr << "ERROR:\n\tCannot find integer attribute vectorLength in AlphaVector Tag" << endl;
00103         exit(EXIT_FAILURE);
00104     }
00105     if(vectorLength != problem->YStates->size()){
00106         cerr << "ERROR:\n\tVector length does not match problem unobserved state size" << endl;
00107         exit(EXIT_FAILURE);
00108     }
00109 
00110     AlphaPlane plane;
00111     double entryVal;
00112     xml_parse_tag_only( infile, tag, MaxStr, &linum );          //read the vector tag
00113     xml_grab_tag_name( tag, tagname, MaxStr);
00114 
00115     //Checking to find the value of action=""
00116     //
00117     //
00118     string str2,str3, str4;
00119     size_t pos;
00120     str2 = string(tag);
00121     pos = str2.find("action");
00122     str3 = str2.substr(pos+8);
00123     pos = str3.find("\"");
00124     str4 = str3.substr(0,pos);
00125     this->valueAction = atoi(str4.c_str());
00126 
00127 
00128     while( tag[0]!='\0'){
00129         //initialise the alpha plane
00130         plane.alpha->resize(problem->YStates->size());
00131         bool foundAct = false;  bool foundObs = false;
00132         xml_grab_attrib( tag, attrname, value, MaxStr );
00133         while(value[0] != '\0'){
00134             if(string(attrname)=="action"){
00135                 plane.action = atoi(value);
00136                 foundAct = true;
00137             }
00138             else if(string(attrname)=="obsValue"){
00139                 plane.sval = atoi(value);
00140                 foundObs = true;
00141             }
00142             xml_grab_attrib( tag, attrname, value, MaxStr );
00143         }
00144 
00145         if(!foundAct){
00146             cerr << "ERROR:\n\tCannot find integer attribute action in Vector tag" << endl;
00147             assert(false);
00148             exit(EXIT_FAILURE);
00149         }
00150         if(!foundObs){
00151             cerr << "ERROR:\n\tCannot find integer attribute obsValue in Vector tag" << endl;
00152             exit(EXIT_FAILURE);
00153         }
00154 
00155         if(plane.sval >= alphaPlanePoolSet->set.size())
00156         {
00157             cerr << "Policy file has more observed state than given POMDP model's, are you using the correct policy file?" << endl;
00158             exit(EXIT_FAILURE);
00159         }
00160 
00161         //check if vector is dense or sparse
00162         if(string(tagname)=="Vector"){      
00163             FOR(i, vectorLength){
00164                 char  dvalue[200];
00165                 if(fscanf(infile, "%s", &dvalue)==EOF){
00166                     cerr << "ERROR:\n\tVector is too short, are you using the correct policy file?" << endl;
00167                     exit(EXIT_FAILURE);
00168                 }
00169                 plane.alpha->data[i] =atof(dvalue);
00170             }
00171             alphaPlanePoolSet->set[plane.sval]->planes.push_back(plane.duplicate());
00172         }
00173         else if(string(tagname)=="SparseVector"){
00174             xml_parse( infile, tag, contents, MaxStr, &linum );
00175             xml_grab_tag_name( tag, tagname, MaxStr);
00176             while(string(tagname)=="Entry"){
00177                 double value;int index;
00178                 sscanf(contents, "%d %f", &index, &value);
00179                 plane.alpha->data[index] = value;
00180 
00181                 xml_parse( infile, tag, contents, MaxStr, &linum );
00182                 xml_parse( infile, tag, contents, MaxStr, &linum );
00183                 xml_grab_tag_name( tag, tagname, MaxStr);
00184             }
00185             alphaPlanePoolSet->set[plane.sval]->planes.push_back(plane.duplicate());
00186         }
00187         xml_parse(infile, tag,contents, MaxStr, &linum );
00188         xml_parse_tag_only( infile, tag, MaxStr, &linum );              //read the vector tag
00189         xml_grab_tag_name( tag, tagname, MaxStr);
00190     }
00191     fclose(infile);
00192 
00193     return true;
00194 }
00195 
00196 int AlphaVectorPolicy::getBestAction(BeliefWithState& b)
00197 {
00198         double maxValue;
00199         return getBestAction(b, maxValue);
00200 }
00201 
00202 int AlphaVectorPolicy::getBestAction(BeliefWithState& b, REAL_VALUE& maxValue)
00203 {
00204         SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[b.sval]->getBestAlphaPlane1(b.bvec);
00205         maxValue = inner_prod(*bestAlpha->alpha, *b.bvec);
00206         return bestAlpha->action;
00207 }
00208 
00209 int AlphaVectorPolicy::getBestActionLookAhead(BeliefWithState& b)
00210 {
00211         double maxValue;
00212         return getBestActionLookAhead(b, maxValue);
00213 }
00214 
00215 int AlphaVectorPolicy::getBestActionLookAhead(BeliefWithState& b, REAL_VALUE& maxValue)
00216 {
00217         double maxActionLB = -DBL_MAX;
00218         int maxAction = 0;
00219         obs_prob_vector  opv; // outcome_prob_vector opv;
00220         SharedPointer<BeliefWithState> nextb;
00221         obsState_prob_vector spv;  // outcome probability for values of observed state
00222         SharedPointer<SparseVector> jspv (new SparseVector());
00223 
00224         DEBUG_TRACE(cout << "getBestActionLookAhead" << endl; );
00225 
00226         for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++) 
00227         {
00228                 int a = aIter.index();
00229                 DEBUG_TRACE(cout << "a " << a << endl; );
00230 
00231                 double sum = 0.0;
00232                 double immediateReward = problem->rewards->getReward(b, a);
00233 
00234                 if (problem->XStates->size() == 1) 
00235                 {
00236                         DEBUG_TRACE(cout << "spv1 " << endl; );
00237                         spv.resize(1);
00238                         spv.push_back(0, 1.0);
00239                 } 
00240                 else 
00241                 {
00242                         DEBUG_TRACE(cout << "spv2 " << endl; );
00243                         problem->getObsStateProbVector(spv, b, a); // P(Xn|cn.s,a)
00244                 }
00245 
00246                 DEBUG_TRACE( cout << "spv " << endl; );
00247                 DEBUG_TRACE( spv.write(cout) << endl; );
00248 
00249                 FOR(Xn, spv.size()) 
00250                 {
00251                         DEBUG_TRACE(cout << "Xn " << Xn << endl; );
00252                         double sprob = spv(Xn);
00253                         if (sprob > OBS_IS_ZERO_EPS) 
00254                         {
00255                                 problem->getJointUnobsStateProbVector(*jspv, &b, a, Xn);
00256                                 
00257                                 DEBUG_TRACE( cout << "jspv " << endl; );
00258                                 DEBUG_TRACE( jspv->write(cout) << endl; );
00259 
00260 
00261                                 //problem->getStatenObsProbVectorFast(ospv, a, Xn, jspv);
00262                                 problem->getObsProbVectorFast(opv, a, Xn, *jspv); // only the joint prob is useful for later but we calculate the observation prob P(o|Xn,cn.s,a)
00263                                 //problem->getObsProbVector(opv, cn.s, a, Xn);
00264                                 
00265                                 DEBUG_TRACE( cout << "opv " << endl; );
00266                                 DEBUG_TRACE( opv.write(cout) << endl; );
00267 
00268 
00269 
00270                                 FOR(o, opv.size()) 
00271                                 {
00272                                         DEBUG_TRACE(cout << "o " << o << endl; );
00273 
00274                                         double oprob = opv(o);
00275                                         if (oprob > OBS_IS_ZERO_EPS) 
00276                                         {
00277                                                 double obsProb = oprob * sprob; // P(o,Xn|cn.s,a) = P(Xn|cn.s,a) * P(o|Xn,cn.s,a)
00278                                                 //nextb = problem->getNextBeliefStvalFast(sp, a, o, Xn, jspv);
00279 
00280                                                 nextb = problem->beliefTransition->nextBelief2(NULL, a, o, Xn, jspv);
00281 
00282                                                 DEBUG_TRACE( cout << "nextb sval" << nextb->sval << endl; );
00283                                                 DEBUG_TRACE( nextb->bvec->write(cout) << endl; );
00284 
00285                                                 SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->getBestAlphaPlane1(*nextb);
00286                                                 double childLB = inner_prod(*bestAlpha->alpha, *nextb->bvec);
00287                                                 sum += childLB * obsProb;
00288 
00289                                                 DEBUG_TRACE( cout << "childLB " << childLB << endl; );
00290                                                 DEBUG_TRACE( cout << "sum " << sum << endl; );
00291                                                 
00292                                                 // nextb deletion will be handled by smart pointers
00293                                                 
00294                                         }
00295                                 }
00296                         }
00297                 } 
00298                 sum *= problem->getDiscount();
00299                 sum += immediateReward;
00300 
00301                 DEBUG_TRACE( cout << "sum " << sum << endl; );
00302 
00303                 if(sum > maxActionLB)
00304                 {
00305                         maxActionLB = sum;
00306                         maxAction = a;
00307                         DEBUG_TRACE( cout << "maxAction = " << maxAction << endl; );
00308                 }
00309                 assert(maxActionLB !=  -DBL_MAX);
00310         }
00311         return maxAction;
00312 }
00313 
00314 // SYL07282010 - modify function so that it follows RSS09 paper
00315 int AlphaVectorPolicy::getBestActionLookAhead(std::vector<belief_vector>& belYs, DenseVector& belX)
00316 {
00317         unsigned int bestAction  = 0;
00318         double bestValue, observationValue, actionValue;
00319         // observationValue is the summation over x'; actionValue is the summation over o; bestValue tracks the highest value among the actions, a.
00320 
00321         SharedPointer<BeliefWithState> nextStB (new BeliefWithState());
00322         SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00323 
00324         FOR (a, problem->getNumActions())
00325         {
00326                 actionValue = 0;
00327 
00328                 FOR (o, problem->observations->size())
00329                 {
00330                         observationValue = 0;
00331                         FOR (xn, problem->XStates->size())
00332                         {
00333                                 // for a particular x'
00334                                 if (!((problem->obsProb->getMatrix(a, xn))->isColumnEmpty(o))) 
00335                                 {
00336                                         SparseVector jspv_sum;  // sum of P(x',y' | x, b_{y|x}, a)  * b_x(x) over values of x
00337                                         jspv_sum.resize(problem->YStates->size());
00338                                         belief_vector next_belY; // for P(x',y',o | b_x, b_{y|x}, a) and P(y' | x',  b_x, b_{y|x}, a, o)
00339                         
00340                                         bool nonzero_p_xn = false;  // flag to indicate if P(x',y' | b_x, b_{y|x}, a) == 0 for this particular combination of x', o, and a, and for all y
00341                                         // loop over x
00342                                         FOR (xc, problem->XStates->size())
00343                                         {
00344                                                 if (!(belX(xc) == 0)) 
00345                                                 {
00346                                                         // for a particular x
00347                                                         // skip all the calculations to add to jspv_sum if *(momdpProblem->XTrans->getMatrix(a, xc)) is all zero for column xn
00348                                                         if (!((problem->XTrans->getMatrix(a, xc))->isColumnEmpty(xn))) 
00349                                                         {
00350                                                                 DenseVector tmp, tmp1;
00351                                                                 DenseVector Bc; 
00352                                                                 SparseVector jspv;
00353                                                                 
00354                                                                 copy(Bc, belYs[xc]);
00355                 
00356                                                                 emult_column( tmp, *(problem->XTrans->getMatrix(a, xc)), xn, Bc );
00357                                 
00358                                                                 mult( tmp1, *(problem->YTrans->getMatrixTr(a, xc, xn)), tmp );
00359                                                                 copy(jspv, tmp1);  // P(x',y' | x, b_{y|x}, a) for a particular x and x'
00360                                 
00361                                                                 // multiply with belX(xc) and add to sum over x values
00362                                                                 jspv *= belX(xc);
00363                                                                 jspv_sum += jspv;
00364 
00365                                                                 if (nonzero_p_xn == false) nonzero_p_xn = true;
00366                                                         } 
00367                                                 }
00368                                         }
00369 
00370                                         if (nonzero_p_xn)
00371                                         {
00372                                                 emult_column( next_belY, *(problem->obsProb->getMatrix(a, xn)), o,  jspv_sum); // P(x',y',o | b_x, b_{y|x}, a)
00373 
00374                                                 // check that it's not an all-zero vector!!
00375                                                 if (!(next_belY.data.size() == 0)) {
00376         
00377                                                         // the normalization factor is P(x',o | b_x, b_{y|x}, a)
00378                                                         double jp_xn_and_o = next_belY.norm_1(); 
00379         
00380                                                         // normalize to get P(y'| x', b_x, b_{y|x}, a, o)
00381                                                         next_belY *= (1.0/next_belY.norm_1());
00382                 
00383                                                         nextStB->sval = xn;
00384                 
00385                                                         //nextStB->bvec = &next_belY;
00386                                                         *nextStB->bvec = next_belY;
00387         
00388                                                         //get value at (x', P(y'| x', b_x, b_{y|x}, a, o))
00389                                                         double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec); 
00390         
00391                                                         //multiply V(x', P(y'| x', b_x, b_{y|x}, a, o)) with P(x',o | b_x, b_{y|x}, a) and add to summation over x'
00392                                                         observationValue += jp_xn_and_o * childLB; 
00393                                                 }
00394                                         }               
00395                                 } 
00396                                 // else don't need to do anything, i.e. add 0 to observationValue
00397                         }
00398                         // add to actionValue
00399                         actionValue += observationValue;
00400                 }
00401                 // after exiting observation loop, multiply observationValue by discount factor and add to R(b,a)
00402                 actionValue = problem->getDiscount() * actionValue;
00403 
00404                 FOR (xc, problem->XStates->size())
00405                 {
00406                         if (!(belX(xc) == 0)) 
00407                         {
00408                                 currStB->sval = xc;
00409 
00410                                 *currStB->bvec = belYs[xc];
00411                                 //currStB->bvec = &belYs[xc];
00412 
00413                                 actionValue += (belX(xc) * problem->rewards->getReward(*currStB, a));
00414                         }
00415                 }
00416 
00417                 // track best value over the actions, a.
00418                 if (a == 0)
00419                 {
00420                         bestValue = actionValue;
00421                 }
00422                 else
00423                 {
00424                         if (actionValue > bestValue) 
00425                         {
00426                                 bestValue = actionValue;
00427                                 bestAction = a;
00428                         }
00429                 }
00430         }
00431         return bestAction;
00432 }
00433 
00434 // SYL07282010 - modify function so that it follows RSS09 paper
00435 int AlphaVectorPolicy::getBestActionLookAhead(SharedPointer<belief_vector>& belY, DenseVector& belX)
00436 {
00437         unsigned int bestAction  = 0;
00438         double bestValue, observationValue, actionValue;
00439         // observationValue is the summation over x'; actionValue is the summation over o; bestValue tracks the highest value among the actions, a.
00440 
00441         SharedPointer<BeliefWithState> nextStB (new BeliefWithState());
00442         SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00443 
00444         FOR (a, problem->getNumActions())
00445         {
00446                 actionValue = 0;
00447 
00448                 FOR (o, problem->observations->size())
00449                 {
00450                         observationValue = 0;
00451                         FOR (xn, problem->XStates->size())
00452                         {
00453                                 // for a particular x'
00454                                 if (!((problem->obsProb->getMatrix(a, xn))->isColumnEmpty(o))) 
00455                                 {
00456                                         SparseVector jspv_sum;  // sum of P(x',y' | x, b_{y|x}, a)  * b_x(x) over values of x
00457                                         jspv_sum.resize(problem->YStates->size());
00458                                         belief_vector next_belY; // for P(x',y',o | b_x, b_{y|x}, a) and P(y' | x',  b_x, b_{y|x}, a, o)
00459                         
00460                                         bool nonzero_p_xn = false;  // flag to indicate if P(x',y' | b_x, b_{y|x}, a) == 0 for this particular combination of x', o, and a, and for all y
00461                                         // loop over x
00462                                         FOR (xc, problem->XStates->size())
00463                                         {
00464                                                 if (!(belX(xc) == 0)) 
00465                                                 {
00466                                                         // for a particular x
00467                                                         // skip all the calculations to add to jspv_sum if *(momdpProblem->XTrans->getMatrix(a, xc)) is all zero for column xn
00468                                                         if (!((problem->XTrans->getMatrix(a, xc))->isColumnEmpty(xn))) 
00469                                                         {
00470                                                                 DenseVector tmp, tmp1;
00471                                                                 DenseVector Bc; 
00472                                                                 SparseVector jspv;
00473 
00474                                                                 copy(Bc, *belY);
00475                                                                 
00476                                                                 emult_column( tmp, *(problem->XTrans->getMatrix(a, xc)), xn, Bc );
00477                                 
00478                                                                 mult( tmp1, *(problem->YTrans->getMatrixTr(a, xc, xn)), tmp );
00479                                                                 copy(jspv, tmp1);  // P(x',y' | x, b_{y|x}, a) for a particular x and x'
00480                                 
00481                                                                 // multiply with belX(xc) and add to sum over x values
00482                                                                 jspv *= belX(xc);
00483                                                                 jspv_sum += jspv;
00484 
00485                                                                 if (nonzero_p_xn == false) nonzero_p_xn = true;
00486                                                         } 
00487                                                 }
00488                                         }
00489 
00490                                         if (nonzero_p_xn)
00491                                         {
00492                                                 emult_column( next_belY, *(problem->obsProb->getMatrix(a, xn)), o,  jspv_sum); // P(x',y',o | b_x, b_{y|x}, a)
00493 
00494                                                 // check that it's not an all-zero vector!!
00495                                                 if (!(next_belY.data.size() == 0)) {
00496         
00497                                                         // the normalization factor is P(x',o | b_x, b_{y|x}, a)
00498                                                         double jp_xn_and_o = next_belY.norm_1(); 
00499         
00500                                                         // normalize to get P(y'| x', b_x, b_{y|x}, a, o)
00501                                                         next_belY *= (1.0/next_belY.norm_1());
00502                 
00503                                                         nextStB->sval = xn;
00504                 
00505                                                         //nextStB->bvec = &next_belY;
00506                                                         *nextStB->bvec = next_belY;
00507         
00508                                                         //get value at (x', P(y'| x', b_x, b_{y|x}, a, o))
00509                                                         double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec); 
00510         
00511                                                         //multiply V(x', P(y'| x', b_x, b_{y|x}, a, o)) with P(x',o | b_x, b_{y|x}, a) and add to summation over x'
00512                                                         observationValue += jp_xn_and_o * childLB; 
00513                                                 }
00514                                         }               
00515                                 } 
00516                                 // else don't need to do anything, i.e. add 0 to observationValue
00517                         }
00518                         // add to actionValue
00519                         actionValue += observationValue;
00520                 }
00521                 // after exiting observation loop, multiply observationValue by discount factor and add to R(b,a)
00522                 actionValue = problem->getDiscount() * actionValue;
00523 
00524                 FOR (xc, problem->XStates->size())
00525                 {
00526                         if (!(belX(xc) == 0)) 
00527                         {
00528                                 currStB->sval = xc;
00529 
00530                                 *currStB->bvec = *belY;         
00531                                 
00532                                 actionValue += (belX(xc) * problem->rewards->getReward(*currStB, a));
00533                         }
00534                 }
00535 
00536                 // track best value over the actions, a.
00537                 if (a == 0)
00538                 {
00539                         bestValue = actionValue;
00540                 }
00541                 else
00542                 {
00543                         if (actionValue > bestValue) 
00544                         {
00545                                 bestValue = actionValue;
00546                                 bestAction = a;
00547                         }
00548                 }
00549         }
00550         return bestAction;
00551 }
00552 
00553 // SYL07282010 - replaced with code which follows RSS09 paper
00554 // this function implements: for each action, for each possible (x,b_{y|x}), do one-step-lookahead, then weigh the resultant values by belX(x).
00555 int AlphaVectorPolicy::getBestActionLookAhead_alternative(std::vector<belief_vector>& b, DenseVector& belX)
00556 {
00557         unsigned int bestAction  = 0;
00558         double bestValue, xValue, actionValue;
00559         //obs_prob_vector StatenObs_pv; //(problem->numObservations);
00560         obs_prob_vector spv, opv;
00561 
00562         SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00563         SharedPointer<BeliefWithState> nextStB;
00564         
00565 
00566         FOR (a, problem->getNumActions())
00567         {
00568                 actionValue = 0;
00569                 FOR (xc, problem->XStates->size())
00570                 {
00571 
00572                         if (!(belX(xc) == 0))
00573                         {
00574 
00575                                 xValue = 0;
00576                                 currStB->sval = xc;
00577                                 //currStB->bvec = &b[xc];
00578                                 *currStB->bvec = b[xc];
00579 
00580                                 problem->getObsStateProbVector(spv, *currStB, a);
00581 
00582                                 FOR (xn, spv.size())
00583                                 {
00584                                         double sprob = spv(xn);
00585                                         if (sprob > OBS_IS_ZERO_EPS)
00586                                         {
00587                                                 problem->getObsProbVector(opv, *currStB, a, xn);
00588                                                 //problem->getStatenObsProbVector(StatenObs_pv, currStB, a, xn);
00589 
00590                                                 //#cout << "xn : " << xn << endl;
00591                                                 FOR(o, opv.size())
00592                                                 {
00593 
00594                                                         double oprob = opv(o);
00595                                                         if (oprob > OBS_IS_ZERO_EPS)
00596                                                         { 
00597                                                                 //problem->getNextBeliefStval(nextStB, currStB, a, o, xn);
00598                                                                 nextStB = problem->beliefTransition->nextBelief(currStB, a, o, xn);
00599                                                                 //get value at {xn, next belief}
00600                                                                 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec); 
00601                                                                 //multiply by  StatenObs_pv(o), add to xValue
00602                                                                 //xValue +=  StatenObs_pv(o) * childLB; 
00603                                                                 xValue +=  oprob * sprob * childLB; 
00604                                                                 
00605                                                         }       
00606                                                 }
00607                                         }
00608                                 }
00609 
00610                                 xValue = problem->rewards->getReward(*currStB, a) + problem->getDiscount() * xValue;
00611                                 actionValue += (belX(xc) * xValue); 
00612                         }
00613                 }
00614 
00615                 if (a == 0)
00616                 {
00617                         bestValue = actionValue;
00618                 }
00619                 else
00620                 {
00621                         if (actionValue > bestValue) 
00622                         {
00623                                 bestValue = actionValue;
00624                                 bestAction = a;
00625                         }
00626 
00627                 }
00628 
00629         }       
00630         return bestAction;
00631 }
00632 
00633 // SYL07282010 replaced with code which follows RSS09 paper
00634 // this function implements: for each action, for each possible (x,b_y), do one-step-lookahead, then weigh the resultant values by belX(x). 
00635 int AlphaVectorPolicy::getBestActionLookAhead_alternative(SharedPointer<belief_vector>& b, DenseVector& belX)
00636 {
00637 
00638         unsigned int bestAction  = 0;
00639         double bestValue, xValue, actionValue;
00640         //obs_prob_vector StatenObs_pv; //(problem->numObservations);
00641         obs_prob_vector spv, opv;
00642         SharedPointer<BeliefWithState> currStB (new BeliefWithState());
00643         currStB->bvec = b;
00644 
00645         SharedPointer<BeliefWithState> nextStB;
00646 
00647         FOR (a, problem->getNumActions())
00648         {
00649                 actionValue = 0;
00650                 FOR (xc, problem->XStates->size())
00651                 {
00652 
00653                         if (!(belX(xc) == 0))
00654                         {
00655 
00656                                 xValue = 0;
00657                                 currStB->sval = xc;
00658                                 
00659 
00660                                 problem->getObsStateProbVector(spv, *currStB, a);
00661 
00662                                 FOR (xn, spv.size())
00663                                 {
00664                                         double sprob = spv(xn);
00665                                         if (sprob > OBS_IS_ZERO_EPS)
00666                                         {
00667                                                 problem->getObsProbVector(opv, *currStB, a, xn);
00668                                                 //problem->getStatenObsProbVector(StatenObs_pv, currStB, a, xn);
00669 
00670                                                 //#cout << "xn : " << xn << endl;
00671                                                 FOR(o, opv.size())
00672                                                 {
00673 
00674                                                         double oprob = opv(o);
00675                                                         if (oprob > OBS_IS_ZERO_EPS)
00676                                                         { 
00677                                                                 //problem->getNextBeliefStval(nextStB, currStB, a, o, xn);
00678                                                                 nextStB = problem->beliefTransition->nextBelief(currStB, a, o, xn);
00679                                                                 //get value at {xn, next belief}
00680                                                                 double childLB = inner_prod(*alphaPlanePoolSet->getBestAlphaPlane1(*nextStB)->alpha, *nextStB->bvec); 
00681                                                                 //multiply by  StatenObs_pv(o), add to xValue
00682                                                                 //xValue +=  StatenObs_pv(o) * childLB; 
00683                                                                 xValue +=  oprob * sprob * childLB; 
00684                                                                 
00685                                                         }       
00686                                                 }
00687                                         }
00688                                 }
00689 
00690                                 xValue = problem->rewards->getReward(*currStB, a) + problem->getDiscount() * xValue;
00691                                 actionValue += (belX(xc) * xValue); 
00692                         }
00693                 }
00694                 if (a == 0)
00695                 {
00696                         bestValue = actionValue;
00697                 }
00698                 else
00699                 {
00700                         if (actionValue > bestValue) 
00701                         {
00702                                 bestValue = actionValue;
00703                                 bestAction = a;
00704                         }
00705 
00706                 }
00707 
00708         }       
00709         return bestAction;
00710 }
00711 
00712 
00713 // note that this function implements: for each possible (x,b_y), lookup the action and value, weigh the value by belX(x), then choose the action corresponding to the highest weighed value 
00714 int AlphaVectorPolicy::getBestAction(SharedPointer<belief_vector>& b, DenseVector& belX)
00715 {
00716         int maxAction;
00717         double maxValue =  -DBL_MAX;
00718 
00719         FOR (xc, problem->XStates->size())
00720         {
00721                 if (!(belX(xc) == 0))
00722                 {
00723 
00724                         SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[xc]->getBestAlphaPlane1(b);;
00725                         double xvalue = inner_prod(*bestAlpha->alpha, *b);
00726                         xvalue *= belX(xc) ; 
00727                         if(xvalue > maxValue)
00728                         {
00729                                 maxValue = xvalue;
00730                                 maxAction = bestAlpha->action;
00731                         }
00732                 }
00733         }
00734         return maxAction;
00735 }
00736 
00737 // SYL07282010 added
00738 // note that this function implements: for each possible (x,b_{y|x}), lookup the action and value, weigh the value by belX(x), then choose the action corresponding to the highest weighed value 
00739 int AlphaVectorPolicy::getBestAction(std::vector<belief_vector>& belYs, DenseVector& belX)
00740 {
00741         int maxAction;
00742         double maxValue =  -DBL_MAX;
00743 
00744         SharedPointer<belief_vector> Bc (new belief_vector());
00745 
00746         FOR (xc, problem->XStates->size())
00747         {
00748                 if (!(belX(xc) == 0))
00749                 {
00750                         *Bc = belYs[xc];
00751 
00752                         SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[xc]->getBestAlphaPlane1(Bc);
00753                         //SharedPointer<AlphaPlane> bestAlpha = alphaPlanePoolSet->set[xc]->getBestAlphaPlane1(b);
00754                         double xvalue = inner_prod(*bestAlpha->alpha, *Bc);
00755                         //double xvalue = inner_prod(*bestAlpha->alpha, *b);
00756                         xvalue *= belX(xc) ; 
00757                         if(xvalue > maxValue)
00758                         {
00759                                 maxValue = xvalue;
00760                                 maxAction = bestAlpha->action;
00761                         }
00762                 }
00763         }
00764         return maxAction;
00765 }
00766 


appl
Author(s): petercai
autogenerated on Tue Jan 7 2014 11:02:28