00001
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <cfloat>
00011
00012 #ifdef _MSC_VER
00013 #include "getopt.h"
00014 #define NOMINMAX
00015 #include <windows.h>
00016
00017 #else
00018 #include <getopt.h>
00019 #include <sys/time.h>
00020 #endif
00021 #include <signal.h>
00022 #include <iostream>
00023 #include <fstream>
00024 #include <string>
00025 #include <sstream>
00026 #include <assert.h>
00027
00028 #include "CPTimer.h"
00029
00030 #include "GlobalResource.h"
00031
00032
00033
00034 #include "solverUtils.h"
00035 #include "Parser.h"
00036 #include "POMDP.h"
00037 #include "ParserSelector.h"
00038 #include "MOMDP.h"
00039 #include "SARSOP.h"
00040 #include "BackupAlphaPlaneMOMDP.h"
00041 #include "BackupBeliefValuePairMOMDP.h"
00042
00043
00044
00045 #include "FullObsUBInitializer.h"
00046 #include "FastInfUBInitializer.h"
00047
00048 #include <string.h>
00049
00050 using namespace std;
00051 using namespace momdp;
00052
00053 #ifdef __cplusplus
00054 extern "C"
00055 #endif
00056 {
00057 extern unsigned long GlobalMemLimit;
00058 }
00059
00060 struct OutputParams {
00061 double timeoutSeconds;
00062 double interval;
00063 OutputParams(void);
00064 };
00065
00066 OutputParams::OutputParams(void) {
00067 timeoutSeconds = -1;
00068 interval = -1;
00069 }
00070
00071
00072 #ifdef _MSC_VER
00073 BOOL CtrlHandler( DWORD fdwCtrlType )
00074 {
00075 switch( fdwCtrlType )
00076 {
00077
00078 case CTRL_C_EVENT:
00079 case CTRL_CLOSE_EVENT:
00080 case CTRL_BREAK_EVENT:
00081 case CTRL_SHUTDOWN_EVENT:
00082 case CTRL_LOGOFF_EVENT:
00083 if(GlobalResource::getInstance()->solving)
00084 {
00085 GlobalResource::getInstance()->userTerminatedG = true;
00086 }
00087 else
00088 {
00089 exit(1);
00090 }
00091 printf("*** Received SIGINT. User pressed control-C. ***\n");
00092 printf("\nTerminating ...\n");
00093 fflush(stdout);
00094 GlobalResource::getInstance()->userTerminatedG = true;
00095 return( TRUE );
00096
00097 default:
00098 return FALSE;
00099 }
00100 }
00101
00102 void registerCtrlHanler()
00103 {
00104 if( SetConsoleCtrlHandler( (PHANDLER_ROUTINE) CtrlHandler, TRUE ) )
00105 {
00106
00107 }
00108 else
00109 {
00110
00111 printf( "\nERROR: Could not set control handler");
00112 }
00113 }
00114
00115 #else
00116
00117 void sigIntHandler(int sig) {
00118
00119 if(GlobalResource::getInstance()->solving)
00120 {
00121 GlobalResource::getInstance()->userTerminatedG = true;
00122 }
00123 else
00124 {
00125 exit(1);
00126 }
00127
00128
00129 printf("*** Received SIGINT. User pressed control-C. ***\n");
00130 printf("\nTerminating ...\n");
00131 fflush(stdout);
00132 }
00133
00134 void setSignalHandler(int sig, void (*handler)(int))
00135 {
00136 struct sigaction act;
00137 memset (&act, 0, sizeof(act));
00138 act.sa_handler = handler;
00139 act.sa_flags = SA_RESTART;
00140 if (-1 == sigaction (sig, &act, NULL)) {
00141 cerr << "ERROR: unable to set handler for signal "
00142 << sig << endl;
00143 exit(EXIT_FAILURE);
00144 }
00145
00146
00147 }
00148 #endif
00149
00150 void usage(const char* cmdName)
00151 {
00152 cerr <<
00153 "Usage: " << cmdName << " POMDPModelFileName [--fast] [--precison targetPrecision] [--randomization]\n"
00154 " [--timeout timeLimit] [--memory memoryLimit] [--output policyFileName]\n"
00155 " [--policy-interval timeInterval]\n"
00156 " or " <<cmdName << " --help (or -h) Print this help\n"
00157 " or " <<cmdName << " --version Print version information\n"
00158 "\n"
00159 "Solver options:\n"
00160 " -f or --fast Use fast (but very picky) alternate parser for .pomdp files.\n"
00161 " -p or --precision targetPrecision\n"
00162 " Set targetPrecision as the target precision in solution \n"
00163 " quality; run ends when target precision is reached. The target\n"
00164 " precision is 1e-3 by default.\n"
00165 " --randomization Turn on randomization for the sampling algorithm.\n"
00166 " Randomization is off by default.\n"
00167 " --timeout timeLimit Use timeLimit as the timeout in seconds. If running time\n"
00168 " exceeds the specified value, the solver writes out a policy and\n"
00169 " terminates. There is no time limit by default.\n"
00170 " --memory memoryLimit Use memoryLimit as the memory limit in MB. No memory limit\n"
00171 " by default. If memory usage exceeds the specified value,\n"
00172 " ofsol writes out a policy and terminates. Set the value to be\n"
00173 " less than physical memory to avoid swapping.\n"
00174 " --trial-improvement-factor improvementConstant\n"
00175 " Use improvementConstant as the trial improvement factor in the\n"
00176 " sampling algorithm. At the default of 0.5, a trial terminates at\n"
00177 " a belief when the gap between its upper and lower bound is 0.5 of\n"
00178 " the current precision at the initial belief.\n"
00179 "\n"
00180 "Policy output options:\n"
00181 " -o or --output policyFileName\n"
00182 " Use policyFileName as the name of policy output file. The\n"
00183 " file name is 'out.policy' by default.\n"
00184 " --policy-interval timeInterval\n"
00185 " Use timeInterval as the time interval between two consecutive\n"
00186 " write-out of policy files. If this is not specified, the solver\n"
00187 " only writes out a policy file upon termination.\n"
00188 "\n"
00189 "Examples:\n"
00190 " " << cmdName << " Hallway.pomdp\n"
00191 " " << cmdName << " --timeout 100 --output hallway.policy Hallway.pomdp\n"
00192 "\n"
00193 ;
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225 exit(-1);
00226 }
00227
00228
00229 int QMDPSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00230 {
00231 cout << "Generate QMDP Policy" << endl;
00232 double targetPrecision = MDP_RESIDUAL;
00233
00234
00235 FullObsUBInitializer m;
00236 if(problem->XStates->size() != 1 && problem->hasPOMDPMatrices())
00237 {
00238 DEBUG_LOG(cout << "Calling FullObsUBInitialize::QMDPSolution_unfac()" << endl;);
00239
00240
00241
00242 m.QMDPSolution_unfac(problem, targetPrecision);
00243 int numActions = problem->actions->size();
00244 int numXstates = problem->XStates->size();
00245 int numYstates = problem->YStates->size();
00246 m.actionAlphaByState.resize(numActions);
00247 FOR(a, numActions)
00248 {
00249 m.actionAlphaByState[a].resize(numXstates);
00250 FOR (state_idx, numXstates)
00251 {
00252 m.actionAlphaByState[a][state_idx].resize(problem->getBeliefSize());
00253 }
00254
00255 }
00256
00257 FOR(a, numActions)
00258 {
00259 m.UnfacPostProcessing(m.actionAlphas[a], m.actionAlphaByState[a]);
00260 }
00261 }
00262 else
00263 {
00264 DEBUG_LOG(cout << "Calling FullObsUBInitialize::QMDPSolution()" << endl;);
00265
00266 m.QMDPSolution(problem, targetPrecision);
00267 FOR(a, problem->actions->size())
00268 {
00269 m.FacPostProcessing(m.actionAlphaByState[a]);
00270 }
00271 }
00272
00273 AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00274 alphaPlanePoolSet.setProblem(problem);
00275 alphaPlanePoolSet.setSolver(NULL);
00276 alphaPlanePoolSet.initialize();
00277
00278
00279 FOR(a, problem->actions->size())
00280 {
00281 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00282 {
00283 SharedPointer<AlphaPlane> plane (new AlphaPlane());
00284 copy(*plane->alpha, m.actionAlphaByState[a][stateidx]);
00285 plane->action = a;
00286 plane->sval = stateidx;
00287
00288 alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00289 }
00290 }
00291 string outFileName (p->outPolicyFileName);
00292 alphaPlanePoolSet.writeToFile(outFileName, p->problemName);
00293 return 0;
00294 }
00295
00296 int FIBSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00297 {
00298 cout << "Generate FIB Policy" << endl;
00299 double targetPrecision = MDP_RESIDUAL;
00300
00301
00302 FastInfUBInitializer f(problem);
00303 DEBUG_LOG(cout << "Calling FastInfUBInitializer::getFIBsolution()" << endl;); f.getFIBsolution(targetPrecision);
00304
00305 AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00306 alphaPlanePoolSet.setProblem(problem);
00307 alphaPlanePoolSet.setSolver(NULL);
00308 alphaPlanePoolSet.initialize();
00309
00310
00311 FOR(a, problem->actions->size())
00312 {
00313 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00314 {
00315 SharedPointer<AlphaPlane> plane (new AlphaPlane());
00316 copy(*plane->alpha, f.actionAlphaByState[a][stateidx]);
00317 plane->action = a;
00318 plane->sval = stateidx;
00319
00320 alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00321 }
00322 }
00323 string outFileName (p->outPolicyFileName);
00324 alphaPlanePoolSet.writeToFile(outFileName, p->problemName);
00325 return 0;
00326 }
00327
00328 int MDPSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00329 {
00330 cout << "Generate MDP Policy" << endl;
00331 double targetPrecision = MDP_RESIDUAL;
00332
00333
00334 FullObsUBInitializer m;
00335 if(problem->XStates->size() != 1 && problem->hasPOMDPMatrices())
00336 {
00337
00338
00339
00340 m.alphaByState.resize(problem->XStates->size());
00341 DEBUG_LOG(cout << "Calling FullObsUBInitialize::valueIteration_unfac()" << endl;);
00342 m.valueIteration_unfac(problem, targetPrecision);
00343 m.UnfacPostProcessing(m.alpha, m.alphaByState);
00344 }
00345 else
00346 {
00347
00348 DEBUG_LOG(cout << "Calling FullObsUBInitialize::valueIteration()" << endl;);
00349 m.valueIteration(problem, targetPrecision);
00350 m.FacPostProcessing(m.alphaByState);
00351 }
00352
00353 AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00354 alphaPlanePoolSet.setProblem(problem);
00355 alphaPlanePoolSet.setSolver(NULL);
00356 alphaPlanePoolSet.initialize();
00357
00358
00359
00360
00361 if(problem->YStates->size() == 1)
00362 {
00363 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00364 {
00365 SharedPointer<AlphaPlane> plane (new AlphaPlane());
00366 int maxAction = 0;
00367 double maxActionLB = -DBL_MAX;
00368
00369
00370 SharedPointer<BeliefWithState> b = SharedPointer<BeliefWithState>(new BeliefWithState);
00371 b->bvec = new SparseVector(); b->bvec->resize(1);
00372 b->bvec->push_back(0,1.0); b->sval=stateidx;
00373
00374 obsState_prob_vector spv;
00375 for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++)
00376 {
00377 int a = aIter.index();
00378
00379 double sum = 0.0;
00380 double immediateReward = problem->rewards->getReward(*b, a);
00381 problem->getObsStateProbVector(spv, *b, a);
00382
00383 FOR(Xn, spv.size())
00384 {
00385 double sprob = spv(Xn);
00386 if (sprob > OBS_IS_ZERO_EPS)
00387 {
00388 double childLB = m.alphaByState[Xn](0);
00389 sum += childLB * sprob;
00390 }
00391 }
00392 sum *= problem->getDiscount();
00393 sum += immediateReward;
00394
00395 if(sum > maxActionLB)
00396 {
00397 maxActionLB = sum;
00398 maxAction = a;
00399 }
00400 assert(maxActionLB != -DBL_MAX);
00401 }
00402
00403 copy(*plane->alpha, m.alphaByState[stateidx]);
00404 plane->action = maxAction;
00405 plane->sval = stateidx;
00406
00407 alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00408 }
00409 }
00410 else{
00411 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00412 {
00413 SharedPointer<AlphaPlane> plane (new AlphaPlane());
00414 copy(*plane->alpha, m.alphaByState[stateidx]);
00415 plane->action = -1;
00416 plane->sval = stateidx;
00417
00418 alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00419 }
00420 }
00421
00422 string outFileName (p->outPolicyFileName);
00423 alphaPlanePoolSet.writeToFile(outFileName, p->problemName);
00424 return 0;
00425 }
00426 int main(int argc, char **argv)
00427 {
00428
00429
00430 {
00431 SolverParams* p = &GlobalResource::getInstance()->solverParams;
00432
00433 bool parseCorrect = SolverParams::parseCommandLineOption(argc, argv, *p);
00434 if(!parseCorrect)
00435 {
00436 usage(p->cmdName);
00437 exit(EXIT_FAILURE);
00438 }
00439
00440
00441 OutputParams op;
00442 if(GlobalResource::getInstance()->benchmarkMode)
00443 {
00444 if(GlobalResource::getInstance()->simNum == 0|| GlobalResource::getInstance()->simLen == 0)
00445 {
00446 cout << "Benchmark Length and/or Number not set, please set them using option --simLen and --simNum" << endl;
00447 exit(-1);
00448 }
00449 }
00450
00451
00452 GlobalResource::getInstance()->init();
00453 string baseName = GlobalResource::getInstance()->parseBaseNameWithoutPath(p->problemName);
00454 GlobalResource::getInstance()->setBaseName(baseName);
00455
00456
00457
00458
00459
00460
00461
00462 #ifdef _MSC_VER
00463 registerCtrlHanler();
00464 #else
00465 setSignalHandler(SIGINT, &sigIntHandler);
00466 #endif
00467
00468 printf("\nLoading the model ...\n ");
00469
00470
00471
00472 GlobalResource::getInstance()->PBSolverPrePOMDPLoad();
00473 SharedPointer<MOMDP> problem (NULL);
00474 if(p->hardcodedProblem.length() ==0 )
00475 {
00476 problem = ParserSelector::loadProblem(p->problemName, *p);
00477 }
00478 else
00479 {
00480 cout << "Unknown hard coded problem type : " << p->hardcodedProblem << endl;
00481 exit(0);
00482 }
00483
00484 double pomdpLoadTime = GlobalResource::getInstance()->PBSolverPostPOMDPLoad();
00485 printf(" loading time : %.2fs \n", pomdpLoadTime);
00486 GlobalResource::getInstance()->problem = problem;
00487
00488
00489 if(p->MDPSolution == true)
00490 {
00491 MDPSolution(problem, p);
00492 return 0;
00493 }
00494
00495 if(p->QMDPSolution == true)
00496 {
00497 QMDPSolution(problem, p);
00498 return 0;
00499 }
00500
00501 if(p->FIBSolution == true)
00502 {
00503 FIBSolution(problem, p);
00504 return 0;
00505 }
00506
00507 if(GlobalResource::getInstance()->benchmarkMode)
00508 {
00509 srand(GlobalResource::getInstance()->randSeed);
00510 GlobalResource::getInstance()->expRewardRecord.resize(GlobalResource::getInstance()->simNum);
00511 }
00512
00513 PointBasedAlgorithm* solver;
00514
00515 switch (p->strategy)
00516 {
00517 case S_SARSOP:
00518 {
00519 SARSOP* sarsopSolver = NULL;
00520 BackupAlphaPlaneMOMDP* lbBackup = new BackupAlphaPlaneMOMDP();
00521 BackupBeliefValuePairMOMDP* ubBackup = new BackupBeliefValuePairMOMDP();
00522
00523 sarsopSolver = new SARSOP(problem, p);
00524
00525 lbBackup->problem = problem;
00526 sarsopSolver->lowerBoundBackup = lbBackup;
00527
00528 ((BackupAlphaPlaneMOMDP* )(sarsopSolver->lowerBoundBackup))->solver = sarsopSolver;
00529
00530 ubBackup->problem = problem;
00531 sarsopSolver->upperBoundBackup = ubBackup;
00532 solver = sarsopSolver;
00533 }
00534 break;
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555 default:
00556 assert(0);
00557 };
00558
00559
00560 solver->solve(problem);
00561
00562 cout << endl;
00563
00564 }
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585 return 0;
00586
00587
00588
00589 }
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632