00001
00008 #include <stdlib.h>
00009 #ifdef _MSC_VER
00010 #else
00011 #include <unistd.h>
00012 #endif
00013
00014 #include <stdio.h>
00015 #include <assert.h>
00016
00017 #include <iostream>
00018 #include <fstream>
00019
00020 #include "MOMDP.h"
00021 #include "FastInfUBInitializer.h"
00022 #include "FullObsUBInitializer.h"
00023 #include "BeliefValuePairPoolSet.h"
00024
00025 using namespace std;
00026
00027
00028
00029
00030
00031 namespace momdp {
00032
00033 FastInfUBInitializer::FastInfUBInitializer( SharedPointer<MOMDP> problem, BeliefValuePairPoolSet* _bound)
00034 {
00035 pomdp = ( SharedPointer<MOMDP>) problem;
00036 bound = _bound;
00037
00038
00039
00040
00041
00042 }
00043
00044 FastInfUBInitializer::FastInfUBInitializer(SharedPointer<MOMDP> problem)
00045 {
00046 pomdp = ( SharedPointer<MOMDP>) problem;
00047
00048
00049
00050
00051
00052 }
00053
00054 void FastInfUBInitializer::initialize(double targetPrecision)
00055 {
00056 if(pomdp->XStates->size() != 1 && pomdp->hasPOMDPMatrices())
00057 {
00058
00059 initFIB_unfac(targetPrecision, false);
00060 }
00061 else
00062 {
00063 initFIB(targetPrecision, false);
00064 }
00065
00066
00067
00068
00069
00070 }
00071
00072 void FastInfUBInitializer::getFIBsolution(double targetPrecision)
00073 {
00074 if(pomdp->XStates->size() != 1 && pomdp->hasPOMDPMatrices())
00075 {
00076
00077 initFIB_unfac(targetPrecision, true);
00078 }
00079 else
00080 {
00081 initFIB(targetPrecision, true);
00082 }
00083
00084
00085
00086
00087
00088 }
00089
00090
00091 void FastInfUBInitializer::initMDP_unfac(double targetPrecision)
00092 {
00093
00094 FullObsUBInitializer m;
00095 m.valueIteration_unfac(pomdp, targetPrecision);
00096 DenseVector calpha;
00097 copy(calpha, m.alpha);
00098
00099 alphas.clear();
00100 alphas.push_back(calpha);
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110 }
00111
00112 void FastInfUBInitializer::initFIB_unfac(double targetPrecision, bool getFIBvectors)
00113 {
00114
00115 std::vector< DenseVector > al(pomdp->actions->size());
00116 std::vector< DenseVector > nextAl(pomdp->actions->size());
00117 DenseVector tmp, beta_aoi, beta_ao, diff;
00118 double maxResidual;
00119 alpha_vector backup;
00120
00121 initMDP_unfac(MDP_RESIDUAL);
00122
00123
00124
00125
00126
00127 FullObsUBInitializer m;
00128 m.pomdp = pomdp;
00129 alpha_vector& alpha = alphas[0];
00130 copy(m.alpha, alpha);
00131
00132 FOR (a, pomdp->actions->size())
00133 {
00134
00135
00136 al[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00137 nextAl[a].resize(pomdp->XStates->size() * pomdp->YStates->size());
00138 m.nextAlphaAction_unfac(al[a], a);
00139 }
00140
00141
00142 do {
00143 FOR (a, pomdp->actions->size()) {
00144 DenseVector& beta_a = nextAl[a];
00145
00146 set_to_zero( beta_a );
00147
00148
00149 FOR (o, pomdp->observations->size()) {
00150 FOR (i, pomdp->actions->size()) {
00151 emult_column( tmp, *(*(pomdp->pomdpO))[a], o, al[i] );
00152
00153 mult( beta_aoi, tmp, *(*(pomdp->pomdpTtr))[a] );
00154
00155 if (0 == i) {
00156 beta_ao = beta_aoi;
00157 } else {
00158 max_assign( beta_ao, beta_aoi );
00159 }
00160 }
00161 beta_a += beta_ao;
00162 }
00163
00164 beta_a *= pomdp->discount;
00165 copy_from_column( tmp, (*(pomdp->pomdpR)), a );
00166
00167 beta_a += tmp;
00168 }
00169
00170 maxResidual = 0;
00171 FOR (a, pomdp->actions->size()) {
00172 diff = nextAl[a];
00173 diff -= al[a];
00174 maxResidual = std::max( maxResidual, diff.norm_inf() );
00175
00176 al[a] = nextAl[a];
00177 }
00178
00179 } while ( maxResidual > targetPrecision );
00180
00181 if (!getFIBvectors)
00182 { DenseVector dalpha;
00183 FOR (a, pomdp->actions->size())
00184 {
00185 if (0 == a)
00186 {
00187 dalpha = al[a];
00188 }
00189 else
00190 {
00191 max_assign(dalpha, al[a]);
00192 }
00193 }
00194
00195
00196
00197
00198 FOR (i, pomdp->XStates->size()*pomdp->YStates->size()) {
00199
00200
00201 unsigned int x = (unsigned int) i/(pomdp->YStates->size());
00202 unsigned int y = i % (pomdp->YStates->size());
00203
00204 if (pomdp->isPOMDPTerminalState[x][y]) {
00205
00206 dalpha(i) = 0.0;
00207 }
00208 }
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 std::vector<DenseVector> dalphas(pomdp->XStates->size());
00223
00224 FOR (s, pomdp->XStates->size() * pomdp->YStates->size()) {
00225
00226 unsigned int x = (unsigned int) s/(pomdp->YStates->size());
00227 unsigned int y = s % (pomdp->YStates->size());
00228
00229 if (y==0) dalphas[x].resize(pomdp->YStates->size());
00230
00231 dalphas[x](y) = dalpha(s);
00232 }
00233
00234
00235 FOR (state_idx, pomdp->XStates->size())
00236 {
00237 bound->set[state_idx]->points.clear();
00238 copy(bound->set[state_idx]->cornerPoints,dalphas[state_idx]);
00239
00240
00241
00242
00243
00244 }
00245 } else {
00246 actionAlphaByState.resize(pomdp->actions->size());
00247
00248 FOR (a, pomdp->actions->size())
00249 {
00250 FOR (i, pomdp->XStates->size()*pomdp->YStates->size()) {
00251
00252
00253 unsigned int x = (unsigned int) i/(pomdp->YStates->size());
00254 unsigned int y = i % (pomdp->YStates->size());
00255
00256 if (pomdp->isPOMDPTerminalState[x][y]) {
00257
00258 al[a](i) = 0.0;
00259 }
00260
00261 if (x==0) actionAlphaByState[a].resize(pomdp->XStates->size());
00262 if (y==0) actionAlphaByState[a][x].resize(pomdp->YStates->size());
00263 actionAlphaByState[a][x](y) = al[a](i);
00264 }
00265 }
00266
00267 }
00268 }
00269
00270 void FastInfUBInitializer::initMDP(double targetPrecision, FullObsUBInitializer& m)
00271
00272 {
00273
00274
00275 m.valueIteration(pomdp, targetPrecision);
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313 }
00314
00315 void FastInfUBInitializer::initFIB(double targetPrecision, bool getFIBvectors)
00316 {
00317
00318
00319
00320
00321 std::vector< std::vector<DenseVector> > al(pomdp->actions->size());
00322 std::vector< std::vector<DenseVector> > nextAl(pomdp->actions->size());
00323
00324 DenseVector tmp, tmp2, diff, beta_ao;
00325 DenseVector beta_aoXn_unweighted, beta_aoXn;
00326 double maxResidual;
00327
00328
00329 FullObsUBInitializer M;
00330
00331 initMDP(MDP_RESIDUAL, M);
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357 FOR (a, pomdp->actions->size()) {
00358 al[a].resize(pomdp->XStates->size());
00359 nextAl[a].resize(pomdp->XStates->size());
00360
00361 M.nextAlphaAction(al[a], a);
00362 }
00363
00364 bool valid_o_and_Xn;
00365
00366 do {
00367
00368 maxResidual = 0;
00369
00370 FOR (a, pomdp->actions->size())
00371 {
00372 std::vector< DenseVector >& beta_a = nextAl[a];
00373
00374 FOR (Xc, pomdp->XStates->size())
00375 {
00376 beta_a[Xc].resize(pomdp->YStates->size());
00377
00378 FOR (o, pomdp->observations->size())
00379 {
00380 beta_ao.resize(pomdp->YStates->size());
00381 valid_o_and_Xn = false;
00382
00383
00384 const vector<int>& possibleXns = pomdp->XTrans->getMatrix(a, Xc)->nonEmptyColumns();
00385 FOREACH (int, XnIt, possibleXns)
00386 {
00387 int Xn = *XnIt;
00388 if (!(pomdp->obsProb->getMatrix(a, Xn)->isColumnEmpty(o)))
00389 {
00390 valid_o_and_Xn = true;
00391
00392 FOR (i, pomdp->actions->size())
00393 {
00394 emult_column( tmp, *pomdp->obsProb->getMatrix(a, Xn), o, al[i][Xn] );
00395 mult( tmp2, *pomdp->YTrans->getMatrix(a, Xc, Xn), tmp);
00396
00397
00398
00399 if (0 == i) {
00400 beta_aoXn_unweighted = tmp2;
00401 } else {
00402 max_assign( beta_aoXn_unweighted, tmp2 );
00403 }
00404
00405 }
00406
00407 emult_column(beta_aoXn, *pomdp->XTrans->getMatrix(a, Xc), Xn, beta_aoXn_unweighted);
00408 beta_ao += beta_aoXn;
00409 }
00410 }
00411
00412 if (valid_o_and_Xn)
00413
00414 beta_a[Xc] += beta_ao;
00415
00416 }
00417
00418 beta_a[Xc] *= pomdp->discount;
00419
00420 copy_from_column( tmp, *pomdp->rewards->getMatrix(Xc), a );
00421 beta_a[Xc] += tmp;
00422
00423
00424 diff = beta_a[Xc];
00425 diff -= al[a][Xc];
00426 maxResidual = std::max( maxResidual, diff.norm_inf() );
00427 }
00428 }
00429
00430
00431 al = nextAl;
00432
00433
00434
00435
00436
00437
00438
00439
00440 } while ( maxResidual > targetPrecision );
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450 #if DEBUGSYL_290908
00451 cout << "targetPrecision : " << targetPrecision << endl;
00452 cout << "maxResidual : " << maxResidual << " iterationCount : " << iterationCount << endl;
00453 #endif
00454
00455 #if DEBUGSYL_160908
00456 cout << "targetPrecision : " << targetPrecision << endl;
00457 cout << "maxResidual : " << maxResidual << " iterationCount : " << iterationCount << endl;
00458 cout << "After iteration, al " << endl;
00459 FOR (a, pomdp->actions->size()) {
00460 cout << "a : " << a << endl;
00461
00462 for (unsigned int stateidx=0; stateidx < alphasByState.size(); stateidx++)
00463 {
00464 cout << "stateidx : " << stateidx << endl;
00465 al[a][stateidx].write(std::cout);
00466 cout << endl;
00467 }
00468 }
00469 #endif
00470
00471 if (!getFIBvectors) {
00472
00473 DenseVector dalpha;
00474
00475 FOR (state_idx, pomdp->XStates->size()) {
00476
00477 FOR (a, pomdp->actions->size()) {
00478 if (0 == a) {
00479 dalpha = al[a][state_idx];
00480 } else {
00481 max_assign(dalpha, al[a][state_idx]);
00482 }
00483 }
00484
00485
00486
00487
00488
00489
00490 FOR (i, pomdp->YStates->size()) {
00491 if (pomdp->isPOMDPTerminalState[state_idx][i]) {
00492 dalpha(i) = 0.0;
00493 }
00494 }
00495
00496
00497 bound->set[state_idx]->points.clear();
00498 copy(bound->set[state_idx]->cornerPoints,dalpha);
00499
00500 }
00501
00502 } else {
00503
00504 FOR (state_idx, pomdp->XStates->size()) {
00505
00506 FOR (a, pomdp->actions->size()) {
00507
00508
00509 FOR (i, pomdp->YStates->size()) {
00510 if (pomdp->isPOMDPTerminalState[state_idx][i]) {
00511 al[a][state_idx](i) = 0.0;
00512 }
00513 }
00514 }
00515
00516 }
00517
00518 actionAlphaByState = al;
00519
00520 }
00521 }
00522
00523 };
00524
00525