SampleBP.cpp
Go to the documentation of this file.
00001 
00004 /************************************************************************
00005 File: SampleBP.cc
00006 Date: 06/06/2007 
00007 Author: rn
00008 *************************************************************************/
00009 #include "SampleBP.h"
00010 #include "SARSOP.h"
00011 
00012 #include "GlobalResource.h"
00013 
00014 //#define  DEBUGSYL_310708 1
00015 //#define  DEBUGSYL_120808 1
00016 
00017 namespace momdp
00018 {
00019         //constructor
00020         void SampleBP::setup(SharedPointer<MOMDP> _problem, SARSOP* _solver)
00021         {
00022                 Sample::setup(_solver, _problem, &(_solver->beliefCacheSet), _solver->beliefForest);
00023 
00024                 problem = _problem;
00025                 solver = _solver;
00026                 srand( time(NULL) );
00027                 // beliefCache = &(BeliefCache::getCache ()); // removed for factored, now accessed through Bounds class
00028                 
00029 
00030                 numTrials = 0;
00031                 numSubOptimal = 0;              //xan-edit
00032                 numBinProceed = 0;//for Bin Heuristic
00033 
00034                 // this section modified for running parallel trials
00035                 depthArr.resize(problem->initialBeliefX->size());
00036                 trialTargetPrecisionArr.resize(problem->initialBeliefX->size());
00037                 newTrialFlagArr.resize(problem->initialBeliefX->size());
00038                 priorityQueueArr.resize(problem->initialBeliefX->size());
00039                 nextNodeTargetUbArr.resize(problem->initialBeliefX->size());
00040                 nextNodeTargetLbArr.resize(problem->initialBeliefX->size());
00041                 FOR(r, problem->initialBeliefX->size()) 
00042                 {
00043                         double rprob = (*(problem->initialBeliefX))(r);
00044                         if (rprob > OBS_IS_ZERO_EPS) {
00045                                 depthArr[r] = 0;
00046                                 trialTargetPrecisionArr[r] = -1;
00047                                 newTrialFlagArr[r]  = 0; //ADD SYLTAG
00048                         } else {
00049                                 depthArr[r] = -10;
00050                                 trialTargetPrecisionArr[r] = -10;
00051                                 newTrialFlagArr[r]  = -10; //ADD SYLTAG
00052                         }
00053                 }
00054         }
00055         SampleBP::SampleBP ()
00056         {
00057         }
00058 
00059         void SampleBP::setRandomization(bool newFlag)
00060         {
00061                 doRandomization = newFlag;
00062         }
00063         //first level method
00064         //Function: sample
00065         //Functionality: 
00066         //    decide which are the next beliefs to be sampled, and
00067         //    return them as a list, at the same time add them into the
00068         //    reachable structure
00069         //Parameters:
00070         //    row: the current belief's row index in beliefCache
00071         //Returns:
00072         //    the list of belief indices in beliefCache which will do backups next
00073         list < cacherow_stval >SampleBP::sample (cacherow_stval currIndexRow, unsigned int currentRoot)
00074         {
00075                 isRoot = false;
00076                 bm->updateNode(currIndexRow); //for Bin Heuristic  // prevly, bm->updateNode(row);//for Bin Heuristic 
00077                 list < cacherow_stval >sampledBeliefs; // prevly, list < int >sampledBeliefs;
00078                 double excessUncertainty;
00079                 double lbVal, ubVal;
00080 
00081                 
00082                 lbVal = solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->LB;
00083                 ubVal = solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->UB;
00084                 BeliefTreeNode & currentNode = *(solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->REACHABLE);
00085 
00086                 DEBUG_TRACE( cout << "SampleBP::sample ub" << ubVal << " lb " << lbVal << endl; );
00087                 DEBUG_TRACE( cout << "SampleBP::sample currentNode row: " << currentNode.cacheIndex.row << " sval " << currentNode.cacheIndex.sval << endl; );
00088 
00089                 //reset depth and precision parameters when it is a new trial
00090                 if (trialTargetPrecisionArr[currentRoot] == -1)//need for hsvi and fixed precision bp
00091                 //if (trialTargetPrecision == -1)//need for hsvi and fixed precision bp
00092                 {
00093                         //ADD SYLTAG
00094                         bool rootFound = false;
00095                         
00096                         FOR (r, solver->beliefForest->getGlobalRootNumSampleroots()) // solver->beliefForest is a data member of Sample class
00097                         {
00098                                 SampleRootEdge* eR = solver->beliefForest->sampleRootEdges[r];
00099                                 if (NULL != eR) {
00100                                         BeliefTreeNode & sn = *eR->sampleRoot;
00101                                         if (&currentNode == &sn) rootFound  = true;
00102                                 }
00103                         }
00104 
00105                         //MOD SYLTAG
00106                         if (!rootFound)
00107                         //if (&currentNode != root) // prevly if (row != 0)     // root is a data member of Sample class
00108                         {
00109                                 printf ("error: new trial not starting from the root\n");
00110                         }
00111 
00112                         //trialTargetPrecision = (ubVal - lbVal) * problem->getDiscount ();
00113                         //cout << "GlobalResource::getInstance()->solverParams->BP_IMPROVEMENT_CONSTANT " << GlobalResource::getInstance()->solverParams->BP_IMPROVEMENT_CONSTANT << endl;
00114                         
00115                         trialTargetPrecisionArr[currentRoot] = (ubVal - lbVal) *  solver->solverParams->BP_IMPROVEMENT_CONSTANT;//not 
00116                         //trialTargetPrecision = (ubVal - lbVal) * GlobalResource::getInstance()->solverParams->BP_IMPROVEMENT_CONSTANT;//not in use for fixed targetPrecision 
00117                         //empty the priority Queue
00118                         priorityQueueArr[currentRoot].clear ();
00119                         isRoot = true;
00120                         nextNodeTargetUbArr[currentRoot] = lbVal;
00121                 }
00122 
00123                 double finalExcess = ubVal - lbVal - (trialTargetPrecisionArr[currentRoot] * 0.5 * pow (problem->getDiscount (), -depthArr[currentRoot]));
00124 
00125                 DEBUG_TRACE( cout << "SampleBP::sample finalExcess " << finalExcess << endl; );
00126 
00127                 if ( (finalExcess) <= 0 )//if final precision reached
00128                 {
00129                         numTrials++;
00130                         trialTargetPrecisionArr[currentRoot]  = -1;
00131                         depthArr[currentRoot] = 0;
00132                         logOcc = log (1.0);
00133                         newTrialFlagArr[currentRoot] = 1; //ADD SYLTAG
00134 
00135                         return priorityQueueArr[currentRoot];   //at the end of each trial, the entire path is returned
00136                 }
00137                 else
00138                 {
00139                         //initialize checking variables----------------------
00140                         double expectedError = trialTargetPrecisionArr[currentRoot]  * pow (problem->getDiscount (), -depthArr[currentRoot]);
00141                         excessUncertainty = ubVal - lbVal - expectedError;
00142                         double curTargetUb = max (nextNodeTargetUbArr[currentRoot], lbVal + expectedError);
00143 
00144                         BPUpdateResult r;
00145 
00146                         DEBUG_TRACE( cout << "SampleBP::sample expectedError " << expectedError << endl; );
00147                         DEBUG_TRACE( cout << "SampleBP::sample excessUncertainty " << excessUncertainty << endl; );
00148                         
00149                         // choose to do randomization or not
00150                         if(doRandomization)
00151                         {
00152                                 r.maxUBAction = chooseAction(currentNode);
00153                                 //cout << "Random Choose action" << endl;
00154                         }
00155                         else
00156                         {
00157                                 r.maxUBAction = solver->upperBoundSet->set[currIndexRow.sval]->dataTable->get(currIndexRow.row).UB_ACTION;
00158                         }
00159                         
00160                         DEBUG_TRACE( cout << "SampleBP::sample r.maxUBAction " << r.maxUBAction << endl; );
00161 
00162                         r.maxUBVal = ubVal;
00163                         getMaxExcessUncOutcome (currentNode, r, currentRoot);
00164 
00165                         //check whether target upper bound is reached by the newly sampled node
00166                         int i = r.maxUBAction;
00167                         int Xn = r.maxExcessUncStateOutcome;
00168                         int j = r.maxExcessUncOutcome;
00169 
00170                         cacherow_stval newIndexRow = currentNode.getNextState (i,j, Xn).cacheIndex;
00171         
00172                         DEBUG_TRACE( cout << "SampleBP::sample int i = r.maxUBAction; " << r.maxUBAction << endl; );
00173                         DEBUG_TRACE( cout << "SampleBP::sample int Xn = r.maxExcessUncStateOutcome; " << Xn << endl; );
00174                         DEBUG_TRACE( cout << "SampleBP::sample int j = r.maxExcessUncOutcome; " << j << endl; );
00175                         DEBUG_TRACE( cout << "SampleBP::sample newIndexRow row " << newIndexRow.row <<  " sval " << newIndexRow.sval << endl; );
00176 
00177 
00178                         // 26092008 added
00179                         // if child node is exactly the same as parent node, and there is no randomization,
00180                         // end the trial here 
00181                         // HOWEVER note that , since the parent node is a child node of itself, just doing repeated backups COULD
00182                         // improve bounds at the same node!
00183 /*                      if ( (!doRandomization) && (newIndexRow.sval == currIndexRow.sval) && (newIndexRow.row == currIndexRow.row))
00184                         {
00185                                 numTrials++;
00186                                 trialTargetPrecision = -1;      //real value computed later when needed
00187                                 depth = 0;
00188                                 logOcc = log (1.0);
00189                                 newTrialFlag = 1; //ADD SYLTAG
00190 #if DEBUGSYL_260908
00191 cout << "End of one trial due to childnode == parentnode" << endl;
00192 #endif
00193                                 return priorityQueue;   //at the end of each trial, the entire path is returned
00194                         } 
00195 */
00196 
00197                         // int newRow = currentNode.getNextState (i,j).cacheIndex;
00198                         double futureValue = 0;
00199                         const BeliefTreeQEntry & Qa = currentNode.Q[i];
00200                         bool proceed = false;
00201                         //compute expectation ub
00202                         FOR (Xval, Qa.getNumStateOutcomes())
00203                         {
00204                                 const BeliefTreeObsState* QaXval = Qa.stateOutcomes[Xval];
00205                                 //const BeliefTreeObsState & QaXval = Qa.stateOutcomes[Xval];
00206                            if (NULL != QaXval ) {
00207                                 FOR (o, QaXval->getNumOutcomes ())
00208                                 //FOR (o, QaXval.getNumOutcomes ())
00209                                 {
00210                                         BeliefTreeEdge *e = QaXval->outcomes[o];
00211                                         //BeliefTreeEdge *e = QaXval.outcomes[o];
00212                                         if (NULL != e && ((o != j) || (Xval != Xn)   ))
00213                                         {
00214                                                 // 26092008 corrected multiplication factor, prevly: e->obsProb
00215                                                 // 061008 changed calculation for obsProb
00216                                                 futureValue += e->obsProb * solver->beliefCacheSet[e->nextState->cacheIndex.sval]->getRow(e->nextState->cacheIndex.row)->UB;
00217                                                 //futureValue += e->obsProb * QaXval->obsStateProb * bounds->boundsSet[e->nextState->cacheIndex.sval]->beliefCache->getRow(e->nextState->cacheIndex.row)->UB;
00218                                         }
00219                                 }
00220                            }
00221                         }
00222 
00223                         /* FOR (o, Qa.getNumOutcomes ())
00224                         {
00225                                 BeliefTreeEdge *e = Qa.outcomes[o];
00226                                 if (NULL != e && o != j)
00227                                 {
00228                                         futureValue += e->obsProb * beliefCache->getRow(e->nextState->cacheIndex)->UB;
00229                                 }
00230                         } */
00231 
00232                         //end initializing variables----------------------
00233 
00234                         if ( CompareIfLowerBoundImprovesAction(i, j, Xn, currentNode, currentRoot) )//if bin value improves lb
00235                         //if ( CompareIfLowerBoundImprovesAction(i, j, currentNode) )//if bin value improves lb
00236                         {
00237                                 DEBUG_TRACE ( cout << "proceed1" << endl; );
00238                                 numBinProceed++;
00239                                 proceed = true;
00240                         }
00241                         else
00242                         {
00243                                 //if termination condition true, return root and start a new trial
00244                                 if ( (excessUncertainty) <= 0)
00245                                 {
00246                                         DEBUG_TRACE ( cout << "proceed2" << endl; );
00247                                         numTrials++;
00248                                         trialTargetPrecisionArr[currentRoot] = -1;      //real value computed later when needed
00249                                         depthArr[currentRoot] = 0;
00250                                         logOcc = log (1.0);
00251                                         newTrialFlagArr[currentRoot] = 1; //ADD SYLTAG
00252 #if DEBUGSYL_260908
00253 cout << "End of one trial due to ( (excessUncertainty) <= 0)" << endl;
00254 #endif
00255 
00256                                         return priorityQueueArr[currentRoot];   //at the end of each trial, the entire path is returned
00257                                 } 
00258                                 else
00259                                 {
00260                                         if ( (currentNode.Q[i].ubVal == CB_QVAL_UNDEFINED || currentNode.Q[i].ubVal > curTargetUb))
00261                                         {
00262                                                 DEBUG_TRACE ( cout << "proceed3" << endl; );
00263                                                 proceed = true;
00264                                         }
00265                                         else
00266                                         {       //targetUpperBound is not reached for this action
00267                                                 DEBUG_TRACE ( cout << "proceed4" << endl; );
00268                                                 numSubOptimal++;//subOptimal = true;
00269                                         }//end bp check
00270                                 }//end excess uncertainty check
00271                         }//end compare lower bound improves action
00272                         
00273                         
00274                         if ( proceed )
00275                         {
00276                                 DEBUG_TRACE ( cout << "proceeded" << endl; );
00277 #if DEBUGSYL_260908
00278 cout << "In sample(), parent.sval : " << currIndexRow.sval << " .row : " << currIndexRow.row ;
00279 cout << " | action : " << i << " observation : " << j ;
00280 cout << " | child.sval : " << newIndexRow.sval << ".row " << newIndexRow.row << endl;
00281 #endif
00282 
00283                                 //double curUb = bounds->upperBounds->getValue (currentNode.getNextState (i, j).s);
00284                                 double curUb = solver->upperBoundSet->getValue(currentNode.getNextState (i, j, Xn).s);
00285 
00286                                 //double curUb = bounds->upperBoundBVpair->getValue (currentNode.getNextState (i, j).s);
00287                                 solver->beliefCacheSet[newIndexRow.sval]->getRow(newIndexRow.row)->UB = curUb; // beliefCache->getRow(newRow)->UB = curUb;
00288                                 // 061008 changed calculation for obsProb
00289                                 nextNodeTargetUbArr[currentRoot] = ((curTargetUb - currentNode.Q[i].immediateReward) / problem->getDiscount() - futureValue) / (Qa.stateOutcomes[Xn]->outcomes[j]->obsProb) ;
00290                                 //nextNodeTargetUb = ((curTargetUb - currentNode.Q[i].immediateReward) / problem->getDiscount() - futureValue) / (Qa.stateOutcomes[Xn]->outcomes[j]->obsProb * Qa.stateOutcomes[Xn]->obsStateProb) ;
00291                                 // the above takes the place of this
00292                                 //nextNodeTargetUb = ((curTargetUb - currentNode.Q[i].immediateReward) / problem->getDiscount() - futureValue) / Qa.stateOutcomes[Xn]->outcomes[j]->obsProb;
00293 
00294                                 //nextNodeTargetUb = ((curTargetUb - currentNode.Q[i].immediateReward) / problem->getDiscount() - futureValue) / Qa.outcomes[j]->obsProb;
00295                                 assert (-1 != r.maxExcessUncOutcome);
00296                                 sampledBeliefs.push_back (newIndexRow);
00297                                 samplePrepare (sampledBeliefs.back ());
00298 //TODO                          dump(currIndexRow, newIndexRow);
00299                                 priorityQueueArr[currentRoot].push_front (currIndexRow);
00300                                 depthArr[currentRoot]++;
00301 
00302                                 return sampledBeliefs;
00303                         }
00304                         else
00305                         {
00306                                 numTrials++;
00307                                 trialTargetPrecisionArr[currentRoot] = -1;      //real value computed later when needed
00308                                 depthArr[currentRoot] = 0;
00309                                 logOcc = log (1.0);
00310                                 newTrialFlagArr[currentRoot] = 1; //ADD SYLTAG
00311 
00312 #if DEBUGSYL_260908
00313 cout << "End of one trial due to !( (currentNode.Q[i].ubVal == CB_QVAL_UNDEFINED || currentNode.Q[i].ubVal > curTargetUb))" << endl;
00314 #endif
00315 
00316 
00317                                 return priorityQueueArr[currentRoot];   //at the end of each trial, the entire path is returned
00318                         }
00319                 }//end final precision check
00320         }
00321 
00322         //third level method
00323         //Function: getMaxExecessUncOutcome
00324         //Functionality:
00325         //    update the maxExcessUnc and maxExcessUncOutcome for r
00326         //Parameters:
00327         //    cn:     the node at which updates needs to be done
00328         //    r:      the result of which to be updated
00329         //Returns:
00330         //    NA
00331         //Called by: sample()
00332         void SampleBP::getMaxExcessUncOutcome (BeliefTreeNode & cn, BPUpdateResult & r, unsigned int currentRoot) const
00333         {
00334                 r.maxExcessUnc = -99e+20;
00335                 r.maxExcessUncOutcome = -1;
00336                 r.maxExcessUncStateOutcome = -1;
00337                 double width;
00338                 double lbVal, ubVal;
00339                 BeliefTreeQEntry & Qa = cn.Q[r.maxUBAction];
00340                 FOR (x, Qa.getNumStateOutcomes() )
00341                 {
00342                         BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00343                         //BeliefTreeObsState & Qax = Qa.stateOutcomes[x];
00344                   if(NULL != Qax) {
00345                         FOR (o, Qax->getNumOutcomes ())
00346                         //FOR (o, Qax.getNumOutcomes ())
00347                         {
00348                                 BeliefTreeEdge *e = Qax->outcomes[o];
00349                                 //BeliefTreeEdge *e = Qax.outcomes[o];
00350                                 if (NULL != e)
00351                                 {
00352                                         BeliefTreeNode & sn = *e->nextState;
00353                                         lbVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->LB;
00354                                         ubVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->UB;
00355                                         // 26092008 corrected multiplication factor, prevly: e->obsProb
00356                                         // 061008 changed calculation for obsProb
00357                                         width = e->obsProb * (ubVal - lbVal - trialTargetPrecisionArr[currentRoot] * pow (problem->getDiscount (), -(depthArr[currentRoot] + 1)));//targetPrecision VS trialTargetPrecision ??
00358                                         //width =       e->obsProb * Qax->obsStateProb * (ubVal - lbVal - trialTargetPrecision * pow (problem->getDiscount (), -(depth + 1)));//targetPrecision VS trialTargetPrecision ??
00359                                         if (width > r.maxExcessUnc)
00360                                         {
00361                                                 r.maxExcessUnc = width;
00362                                                 r.maxExcessUncOutcome = o;
00363                                                 r.maxExcessUncStateOutcome = x;
00364                                         }
00365                                 }
00366                         }
00367                    }
00368                 }
00369 
00370                 if(doRandomization)
00371                 {
00372                         //randomly select observation
00373 
00374                         int numObservations = problem->observations->size();
00375                         int numStatesObs = problem->XStates->size();
00376 
00377                         int * xstates = (int *)malloc(sizeof(int)*numObservations*numStatesObs);
00378                         int * observations = (int *)malloc(sizeof(int)*numObservations*numStatesObs);
00379                         double * widths = (double *)malloc(sizeof(double)*numObservations*numStatesObs);
00380                         double max_excess = r.maxExcessUnc;
00381                         int index = 0;
00382 
00383                         FOR (x, Qa.getNumStateOutcomes() )
00384                         {
00385                                 BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00386                                 //BeliefTreeObsState & Qax = Qa.stateOutcomes[x];
00387                            if (NULL != Qax) {
00388                                 FOR (o, Qax->getNumOutcomes()) {
00389                                 //FOR (o, Qax.getNumOutcomes()) {
00390                                         BeliefTreeEdge* e = Qax->outcomes[o];
00391                                         //BeliefTreeEdge* e = Qax.outcomes[o];
00392                                         if (NULL != e) {
00393                                                 BeliefTreeNode& sn = *e->nextState;
00394                                         lbVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->LB;
00395                                         ubVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->UB;
00396                                         // 26092008 corrected multiplication factor, prevly: e->obsProb
00397                                         // 061008 changed calculation for obsProb
00398                                         width = e->obsProb * (ubVal - lbVal - trialTargetPrecisionArr[currentRoot] * pow (problem->getDiscount (), -(depthArr[currentRoot] + 1)));//targetPrecision VS trialTargetPrecision ??
00399                                         //width =       e->obsProb * Qax->obsStateProb * (ubVal - lbVal - trialTargetPrecision * pow (problem->getDiscount (), -(depth + 1)));//targetPrecision VS trialTargetPrecision ??
00400                                                 if (width >= (max_excess - 0.5*1e-9) ) {
00401                                                         observations[index] = o;
00402                                                         xstates[index] = x;
00403                                                         widths[index] = width;
00404                                                         index++;
00405                                                 }
00406                                         }
00407                                 }
00408                             }
00409                         }
00410                         int randVal = rand() % index;
00411                         r.maxExcessUnc = widths[randVal];
00412                         r.maxExcessUncOutcome = observations[randVal];
00413                         r.maxExcessUncStateOutcome = xstates[randVal];
00414                         free(observations);
00415                         free(xstates);
00416                         free(widths);
00417 
00418                 }
00419         }//end method: getMaxExcessUncOutcome
00420 
00421         void SampleBP::setBinManager (BinManagerSet * _bm)
00422         {
00423                 bm = _bm;
00424         }
00425 
00426         bool SampleBP::CompareIfLowerBoundImprovesAction(int action, int observation, int xstate, BeliefTreeNode & currentNode,  unsigned int currentRoot)
00427         {
00428                 int maxAction;
00429                 double maxValue = -99e10;
00430                 FOR (a, problem->getNumActions()) {//Find the best action and the corresponding value
00431                         if (currentNode.Q[a].lbVal > maxValue) {
00432                                 maxValue = currentNode.Q[a].lbVal;
00433                                 maxAction = a;
00434                         }
00435                 }
00436 
00437                 double value = 0;
00438                 double sum_o = 0;
00439                 //outcome_prob_vector opv;
00440 
00441                 // basically, we want to cycle through all valid children of Q[action], except the chosen one (action, xstate, observation)
00442                 // and sum up their LB values, multiplied by P(o,x'|x,b,a)
00443                 const BeliefTreeQEntry & Qa = currentNode.Q[action];
00444                 FOR (x, Qa.getNumStateOutcomes())
00445                 {
00446                         const BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00447                         if (NULL != Qax ) {
00448                                 FOR (o, Qax->getNumOutcomes ())
00449                                 {
00450                                         BeliefTreeEdge *e = Qax->outcomes[o];
00451 
00452                                         if ( NULL != e && ((o != (unsigned int)observation) || (x != xstate)) )
00453                                         {
00454                                                 // 26092008 corrected multiplication factor, prevly: e->obsProb
00455                                                 // 061008 changed calculation for obsProb
00456                                                 sum_o += e->obsProb * solver->beliefCacheSet[e->nextState->cacheIndex.sval]->getRow(e->nextState->cacheIndex.row)->LB;
00457                                                 //sum_o += e->obsProb * Qax->obsStateProb * bounds->boundsSet[e->nextState->cacheIndex.sval]->beliefCache->getRow(e->nextState->cacheIndex.row)->LB;
00458                                         }
00459                                 }
00460                         }
00461                 }
00462 // the above takes the place of this
00463 /*              FOR (x, currentNode.Q[action].getNumStateOutcomes()) {
00464                         problem->getObsProbVector(opv, currentNode.s, action, x);
00465                         // problem->getOutcomeProbVector(opv, currentNode.s, action);
00466                         FOR(o, opv.size())
00467                         {
00468                             if ( ((o != (unsigned int)observation) || (x != xstate)) && opv(o) > OBS_IS_ZERO_EPS )//calculate all except the xstate and observation we are considering
00469                                 {
00470                                   sum_o += opv(o) * bounds->boundsSet[currentNode.getNextState (action, o, x).cacheIndex.sval]->beliefCache->getRow(currentNode.getNextState (action, o, x).cacheIndex.row)->LB;
00471                                 }
00472                         }
00473                 }
00474 */
00475                 // now add the predicted LB of chosen node, multiplied by P(o,x'|x,b,a)
00476                 // for ref: e->obsProb * Qax->obsStateProb  
00477                 // 061008 changed calculation for obsProb
00478                 double oxp = Qa.stateOutcomes[xstate]->outcomes[observation]->obsProb;
00479                 //double oxp = Qa.stateOutcomes[xstate]->outcomes[observation]->obsProb * Qa.stateOutcomes[xstate]->obsStateProb;
00480                 value = sum_o + (oxp * bm->getBinValue( currentNode.getNextState (action, observation, xstate).cacheIndex ));
00481                 // the above takes the place of this
00482                 /* problem->getObsProbVector(opv, currentNode.s, action, xstate);
00483                 value = sum_o + (opv(observation) * bm->getBinValue( currentNode.getNextState (action, observation, xstate).cacheIndex )); */
00484 
00485                 value *= problem->getDiscount();
00486                 value += currentNode.Q[action].immediateReward;
00487                 
00488                 double curTargetLb;
00489                 if (isRoot)
00490                 {
00491                         curTargetLb = maxValue;
00492                 }       
00493                 else
00494                 {
00495                         curTargetLb = max(maxValue, nextNodeTargetLbArr[currentRoot]);
00496                 }
00497 
00498                 nextNodeTargetLbArr[currentRoot] = ((curTargetLb - currentNode.Q[action].immediateReward) / problem->getDiscount() - sum_o) / oxp;
00499                 //nextNodeTargetLb = ((curTargetLb - currentNode.Q[action].immediateReward) / problem->getDiscount() - sum_o) / opv(observation);
00500 
00501                 return (value > curTargetLb);
00502         }
00503 
00504 // doesnt seem to be used? - SYL 23072008
00505         double SampleBP::calculateLowerBoundValue(int action, BeliefTreeNode & currentNode)
00506         {
00507                 double value = 0;
00508                 outcome_prob_vector opv;
00509 
00510                 FOR (x, currentNode.Q[action].getNumStateOutcomes()) {
00511 
00512                         problem->getObsProbVector(opv, *currentNode.s, action, x);
00513                         FOR(o, opv.size())
00514                         {
00515                                 if ( opv(o) > OBS_IS_ZERO_EPS )
00516                                 {       
00517                                         value += opv(o) *  solver->beliefCacheSet[currentNode.getNextState (action, o, x).cacheIndex.sval]->getRow(currentNode.getNextState (action, o, x).cacheIndex.row)->LB;
00518                                 }
00519                         }
00520                 }
00521                 value *= problem->getDiscount();
00522                 value += currentNode.Q[action].immediateReward;
00523                 return value;
00524         }
00525 
00526         int SampleBP::chooseAction(BeliefTreeNode & currentNode)
00527         {
00528                 int * actions = (int *)malloc(sizeof(int)*problem->getNumActions());
00529                 int max_action = solver->upperBoundSet->set[currentNode.cacheIndex.sval]->dataTable->get(currentNode.cacheIndex.row).UB_ACTION;
00530 
00531                 int index = 0;
00532                 int action = max_action;
00533 
00534                 FOR (a, problem->getNumActions()) {
00535                         if ( currentNode.Q[a].ubVal >= (currentNode.Q[max_action].ubVal - 0.5*1e-9) ) {
00536                                 actions[index] = a;
00537                                 index++;
00538                         }
00539                 }
00540                 action = actions[ rand() % index ];
00541                 free(actions);
00542                 return action;
00543         }
00544 };//end namespace momdp


appl
Author(s): petercai
autogenerated on Tue Jan 7 2014 11:02:29