00001
00004
00005
00006
00007
00008
00009 #include "SampleBP.h"
00010 #include "SARSOP.h"
00011
00012 #include "GlobalResource.h"
00013
00014
00015
00016
00017 namespace momdp
00018 {
00019
00020 void SampleBP::setup(SharedPointer<MOMDP> _problem, SARSOP* _solver)
00021 {
00022 Sample::setup(_solver, _problem, &(_solver->beliefCacheSet), _solver->beliefForest);
00023
00024 problem = _problem;
00025 solver = _solver;
00026 srand( time(NULL) );
00027
00028
00029
00030 numTrials = 0;
00031 numSubOptimal = 0;
00032 numBinProceed = 0;
00033
00034
00035 depthArr.resize(problem->initialBeliefX->size());
00036 trialTargetPrecisionArr.resize(problem->initialBeliefX->size());
00037 newTrialFlagArr.resize(problem->initialBeliefX->size());
00038 priorityQueueArr.resize(problem->initialBeliefX->size());
00039 nextNodeTargetUbArr.resize(problem->initialBeliefX->size());
00040 nextNodeTargetLbArr.resize(problem->initialBeliefX->size());
00041 FOR(r, problem->initialBeliefX->size())
00042 {
00043 double rprob = (*(problem->initialBeliefX))(r);
00044 if (rprob > OBS_IS_ZERO_EPS) {
00045 depthArr[r] = 0;
00046 trialTargetPrecisionArr[r] = -1;
00047 newTrialFlagArr[r] = 0;
00048 } else {
00049 depthArr[r] = -10;
00050 trialTargetPrecisionArr[r] = -10;
00051 newTrialFlagArr[r] = -10;
00052 }
00053 }
00054 }
00055 SampleBP::SampleBP ()
00056 {
00057 }
00058
00059 void SampleBP::setRandomization(bool newFlag)
00060 {
00061 doRandomization = newFlag;
00062 }
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 list < cacherow_stval >SampleBP::sample (cacherow_stval currIndexRow, unsigned int currentRoot)
00074 {
00075 isRoot = false;
00076 bm->updateNode(currIndexRow);
00077 list < cacherow_stval >sampledBeliefs;
00078 double excessUncertainty;
00079 double lbVal, ubVal;
00080
00081
00082 lbVal = solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->LB;
00083 ubVal = solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->UB;
00084 BeliefTreeNode & currentNode = *(solver->beliefCacheSet[currIndexRow.sval]->getRow(currIndexRow.row)->REACHABLE);
00085
00086 DEBUG_TRACE( cout << "SampleBP::sample ub" << ubVal << " lb " << lbVal << endl; );
00087 DEBUG_TRACE( cout << "SampleBP::sample currentNode row: " << currentNode.cacheIndex.row << " sval " << currentNode.cacheIndex.sval << endl; );
00088
00089
00090 if (trialTargetPrecisionArr[currentRoot] == -1)
00091
00092 {
00093
00094 bool rootFound = false;
00095
00096 FOR (r, solver->beliefForest->getGlobalRootNumSampleroots())
00097 {
00098 SampleRootEdge* eR = solver->beliefForest->sampleRootEdges[r];
00099 if (NULL != eR) {
00100 BeliefTreeNode & sn = *eR->sampleRoot;
00101 if (¤tNode == &sn) rootFound = true;
00102 }
00103 }
00104
00105
00106 if (!rootFound)
00107
00108 {
00109 printf ("error: new trial not starting from the root\n");
00110 }
00111
00112
00113
00114
00115 trialTargetPrecisionArr[currentRoot] = (ubVal - lbVal) * solver->solverParams->BP_IMPROVEMENT_CONSTANT;
00116
00117
00118 priorityQueueArr[currentRoot].clear ();
00119 isRoot = true;
00120 nextNodeTargetUbArr[currentRoot] = lbVal;
00121 }
00122
00123 double finalExcess = ubVal - lbVal - (trialTargetPrecisionArr[currentRoot] * 0.5 * pow (problem->getDiscount (), -depthArr[currentRoot]));
00124
00125 DEBUG_TRACE( cout << "SampleBP::sample finalExcess " << finalExcess << endl; );
00126
00127 if ( (finalExcess) <= 0 )
00128 {
00129 numTrials++;
00130 trialTargetPrecisionArr[currentRoot] = -1;
00131 depthArr[currentRoot] = 0;
00132 logOcc = log (1.0);
00133 newTrialFlagArr[currentRoot] = 1;
00134
00135 return priorityQueueArr[currentRoot];
00136 }
00137 else
00138 {
00139
00140 double expectedError = trialTargetPrecisionArr[currentRoot] * pow (problem->getDiscount (), -depthArr[currentRoot]);
00141 excessUncertainty = ubVal - lbVal - expectedError;
00142 double curTargetUb = max (nextNodeTargetUbArr[currentRoot], lbVal + expectedError);
00143
00144 BPUpdateResult r;
00145
00146 DEBUG_TRACE( cout << "SampleBP::sample expectedError " << expectedError << endl; );
00147 DEBUG_TRACE( cout << "SampleBP::sample excessUncertainty " << excessUncertainty << endl; );
00148
00149
00150 if(doRandomization)
00151 {
00152 r.maxUBAction = chooseAction(currentNode);
00153
00154 }
00155 else
00156 {
00157 r.maxUBAction = solver->upperBoundSet->set[currIndexRow.sval]->dataTable->get(currIndexRow.row).UB_ACTION;
00158 }
00159
00160 DEBUG_TRACE( cout << "SampleBP::sample r.maxUBAction " << r.maxUBAction << endl; );
00161
00162 r.maxUBVal = ubVal;
00163 getMaxExcessUncOutcome (currentNode, r, currentRoot);
00164
00165
00166 int i = r.maxUBAction;
00167 int Xn = r.maxExcessUncStateOutcome;
00168 int j = r.maxExcessUncOutcome;
00169
00170 cacherow_stval newIndexRow = currentNode.getNextState (i,j, Xn).cacheIndex;
00171
00172 DEBUG_TRACE( cout << "SampleBP::sample int i = r.maxUBAction; " << r.maxUBAction << endl; );
00173 DEBUG_TRACE( cout << "SampleBP::sample int Xn = r.maxExcessUncStateOutcome; " << Xn << endl; );
00174 DEBUG_TRACE( cout << "SampleBP::sample int j = r.maxExcessUncOutcome; " << j << endl; );
00175 DEBUG_TRACE( cout << "SampleBP::sample newIndexRow row " << newIndexRow.row << " sval " << newIndexRow.sval << endl; );
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198 double futureValue = 0;
00199 const BeliefTreeQEntry & Qa = currentNode.Q[i];
00200 bool proceed = false;
00201
00202 FOR (Xval, Qa.getNumStateOutcomes())
00203 {
00204 const BeliefTreeObsState* QaXval = Qa.stateOutcomes[Xval];
00205
00206 if (NULL != QaXval ) {
00207 FOR (o, QaXval->getNumOutcomes ())
00208
00209 {
00210 BeliefTreeEdge *e = QaXval->outcomes[o];
00211
00212 if (NULL != e && ((o != j) || (Xval != Xn) ))
00213 {
00214
00215
00216 futureValue += e->obsProb * solver->beliefCacheSet[e->nextState->cacheIndex.sval]->getRow(e->nextState->cacheIndex.row)->UB;
00217
00218 }
00219 }
00220 }
00221 }
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234 if ( CompareIfLowerBoundImprovesAction(i, j, Xn, currentNode, currentRoot) )
00235
00236 {
00237 DEBUG_TRACE ( cout << "proceed1" << endl; );
00238 numBinProceed++;
00239 proceed = true;
00240 }
00241 else
00242 {
00243
00244 if ( (excessUncertainty) <= 0)
00245 {
00246 DEBUG_TRACE ( cout << "proceed2" << endl; );
00247 numTrials++;
00248 trialTargetPrecisionArr[currentRoot] = -1;
00249 depthArr[currentRoot] = 0;
00250 logOcc = log (1.0);
00251 newTrialFlagArr[currentRoot] = 1;
00252 #if DEBUGSYL_260908
00253 cout << "End of one trial due to ( (excessUncertainty) <= 0)" << endl;
00254 #endif
00255
00256 return priorityQueueArr[currentRoot];
00257 }
00258 else
00259 {
00260 if ( (currentNode.Q[i].ubVal == CB_QVAL_UNDEFINED || currentNode.Q[i].ubVal > curTargetUb))
00261 {
00262 DEBUG_TRACE ( cout << "proceed3" << endl; );
00263 proceed = true;
00264 }
00265 else
00266 {
00267 DEBUG_TRACE ( cout << "proceed4" << endl; );
00268 numSubOptimal++;
00269 }
00270 }
00271 }
00272
00273
00274 if ( proceed )
00275 {
00276 DEBUG_TRACE ( cout << "proceeded" << endl; );
00277 #if DEBUGSYL_260908
00278 cout << "In sample(), parent.sval : " << currIndexRow.sval << " .row : " << currIndexRow.row ;
00279 cout << " | action : " << i << " observation : " << j ;
00280 cout << " | child.sval : " << newIndexRow.sval << ".row " << newIndexRow.row << endl;
00281 #endif
00282
00283
00284 double curUb = solver->upperBoundSet->getValue(currentNode.getNextState (i, j, Xn).s);
00285
00286
00287 solver->beliefCacheSet[newIndexRow.sval]->getRow(newIndexRow.row)->UB = curUb;
00288
00289 nextNodeTargetUbArr[currentRoot] = ((curTargetUb - currentNode.Q[i].immediateReward) / problem->getDiscount() - futureValue) / (Qa.stateOutcomes[Xn]->outcomes[j]->obsProb) ;
00290
00291
00292
00293
00294
00295 assert (-1 != r.maxExcessUncOutcome);
00296 sampledBeliefs.push_back (newIndexRow);
00297 samplePrepare (sampledBeliefs.back ());
00298
00299 priorityQueueArr[currentRoot].push_front (currIndexRow);
00300 depthArr[currentRoot]++;
00301
00302 return sampledBeliefs;
00303 }
00304 else
00305 {
00306 numTrials++;
00307 trialTargetPrecisionArr[currentRoot] = -1;
00308 depthArr[currentRoot] = 0;
00309 logOcc = log (1.0);
00310 newTrialFlagArr[currentRoot] = 1;
00311
00312 #if DEBUGSYL_260908
00313 cout << "End of one trial due to !( (currentNode.Q[i].ubVal == CB_QVAL_UNDEFINED || currentNode.Q[i].ubVal > curTargetUb))" << endl;
00314 #endif
00315
00316
00317 return priorityQueueArr[currentRoot];
00318 }
00319 }
00320 }
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 void SampleBP::getMaxExcessUncOutcome (BeliefTreeNode & cn, BPUpdateResult & r, unsigned int currentRoot) const
00333 {
00334 r.maxExcessUnc = -99e+20;
00335 r.maxExcessUncOutcome = -1;
00336 r.maxExcessUncStateOutcome = -1;
00337 double width;
00338 double lbVal, ubVal;
00339 BeliefTreeQEntry & Qa = cn.Q[r.maxUBAction];
00340 FOR (x, Qa.getNumStateOutcomes() )
00341 {
00342 BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00343
00344 if(NULL != Qax) {
00345 FOR (o, Qax->getNumOutcomes ())
00346
00347 {
00348 BeliefTreeEdge *e = Qax->outcomes[o];
00349
00350 if (NULL != e)
00351 {
00352 BeliefTreeNode & sn = *e->nextState;
00353 lbVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->LB;
00354 ubVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->UB;
00355
00356
00357 width = e->obsProb * (ubVal - lbVal - trialTargetPrecisionArr[currentRoot] * pow (problem->getDiscount (), -(depthArr[currentRoot] + 1)));
00358
00359 if (width > r.maxExcessUnc)
00360 {
00361 r.maxExcessUnc = width;
00362 r.maxExcessUncOutcome = o;
00363 r.maxExcessUncStateOutcome = x;
00364 }
00365 }
00366 }
00367 }
00368 }
00369
00370 if(doRandomization)
00371 {
00372
00373
00374 int numObservations = problem->observations->size();
00375 int numStatesObs = problem->XStates->size();
00376
00377 int * xstates = (int *)malloc(sizeof(int)*numObservations*numStatesObs);
00378 int * observations = (int *)malloc(sizeof(int)*numObservations*numStatesObs);
00379 double * widths = (double *)malloc(sizeof(double)*numObservations*numStatesObs);
00380 double max_excess = r.maxExcessUnc;
00381 int index = 0;
00382
00383 FOR (x, Qa.getNumStateOutcomes() )
00384 {
00385 BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00386
00387 if (NULL != Qax) {
00388 FOR (o, Qax->getNumOutcomes()) {
00389
00390 BeliefTreeEdge* e = Qax->outcomes[o];
00391
00392 if (NULL != e) {
00393 BeliefTreeNode& sn = *e->nextState;
00394 lbVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->LB;
00395 ubVal = solver->beliefCacheSet[sn.cacheIndex.sval]->getRow(sn.cacheIndex.row)->UB;
00396
00397
00398 width = e->obsProb * (ubVal - lbVal - trialTargetPrecisionArr[currentRoot] * pow (problem->getDiscount (), -(depthArr[currentRoot] + 1)));
00399
00400 if (width >= (max_excess - 0.5*1e-9) ) {
00401 observations[index] = o;
00402 xstates[index] = x;
00403 widths[index] = width;
00404 index++;
00405 }
00406 }
00407 }
00408 }
00409 }
00410 int randVal = rand() % index;
00411 r.maxExcessUnc = widths[randVal];
00412 r.maxExcessUncOutcome = observations[randVal];
00413 r.maxExcessUncStateOutcome = xstates[randVal];
00414 free(observations);
00415 free(xstates);
00416 free(widths);
00417
00418 }
00419 }
00420
00421 void SampleBP::setBinManager (BinManagerSet * _bm)
00422 {
00423 bm = _bm;
00424 }
00425
00426 bool SampleBP::CompareIfLowerBoundImprovesAction(int action, int observation, int xstate, BeliefTreeNode & currentNode, unsigned int currentRoot)
00427 {
00428 int maxAction;
00429 double maxValue = -99e10;
00430 FOR (a, problem->getNumActions()) {
00431 if (currentNode.Q[a].lbVal > maxValue) {
00432 maxValue = currentNode.Q[a].lbVal;
00433 maxAction = a;
00434 }
00435 }
00436
00437 double value = 0;
00438 double sum_o = 0;
00439
00440
00441
00442
00443 const BeliefTreeQEntry & Qa = currentNode.Q[action];
00444 FOR (x, Qa.getNumStateOutcomes())
00445 {
00446 const BeliefTreeObsState* Qax = Qa.stateOutcomes[x];
00447 if (NULL != Qax ) {
00448 FOR (o, Qax->getNumOutcomes ())
00449 {
00450 BeliefTreeEdge *e = Qax->outcomes[o];
00451
00452 if ( NULL != e && ((o != (unsigned int)observation) || (x != xstate)) )
00453 {
00454
00455
00456 sum_o += e->obsProb * solver->beliefCacheSet[e->nextState->cacheIndex.sval]->getRow(e->nextState->cacheIndex.row)->LB;
00457
00458 }
00459 }
00460 }
00461 }
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478 double oxp = Qa.stateOutcomes[xstate]->outcomes[observation]->obsProb;
00479
00480 value = sum_o + (oxp * bm->getBinValue( currentNode.getNextState (action, observation, xstate).cacheIndex ));
00481
00482
00483
00484
00485 value *= problem->getDiscount();
00486 value += currentNode.Q[action].immediateReward;
00487
00488 double curTargetLb;
00489 if (isRoot)
00490 {
00491 curTargetLb = maxValue;
00492 }
00493 else
00494 {
00495 curTargetLb = max(maxValue, nextNodeTargetLbArr[currentRoot]);
00496 }
00497
00498 nextNodeTargetLbArr[currentRoot] = ((curTargetLb - currentNode.Q[action].immediateReward) / problem->getDiscount() - sum_o) / oxp;
00499
00500
00501 return (value > curTargetLb);
00502 }
00503
00504
00505 double SampleBP::calculateLowerBoundValue(int action, BeliefTreeNode & currentNode)
00506 {
00507 double value = 0;
00508 outcome_prob_vector opv;
00509
00510 FOR (x, currentNode.Q[action].getNumStateOutcomes()) {
00511
00512 problem->getObsProbVector(opv, *currentNode.s, action, x);
00513 FOR(o, opv.size())
00514 {
00515 if ( opv(o) > OBS_IS_ZERO_EPS )
00516 {
00517 value += opv(o) * solver->beliefCacheSet[currentNode.getNextState (action, o, x).cacheIndex.sval]->getRow(currentNode.getNextState (action, o, x).cacheIndex.row)->LB;
00518 }
00519 }
00520 }
00521 value *= problem->getDiscount();
00522 value += currentNode.Q[action].immediateReward;
00523 return value;
00524 }
00525
00526 int SampleBP::chooseAction(BeliefTreeNode & currentNode)
00527 {
00528 int * actions = (int *)malloc(sizeof(int)*problem->getNumActions());
00529 int max_action = solver->upperBoundSet->set[currentNode.cacheIndex.sval]->dataTable->get(currentNode.cacheIndex.row).UB_ACTION;
00530
00531 int index = 0;
00532 int action = max_action;
00533
00534 FOR (a, problem->getNumActions()) {
00535 if ( currentNode.Q[a].ubVal >= (currentNode.Q[max_action].ubVal - 0.5*1e-9) ) {
00536 actions[index] = a;
00537 index++;
00538 }
00539 }
00540 action = actions[ rand() % index ];
00541 free(actions);
00542 return action;
00543 }
00544 };