appl: solver.cpp Source File

Go to the documentation of this file.
00001 
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <cfloat>
00011 
00012 #ifdef _MSC_VER
00013 #include "getopt.h"
00014 #define NOMINMAX 
00015 #include <windows.h> 
00016 
00017 #else
00018 #include <getopt.h>
00019 #include <sys/time.h>
00020 #endif
00021 #include <signal.h>
00022 #include <iostream>
00023 #include <fstream>
00024 #include <string>
00025 #include <sstream>
00026 #include <assert.h>
00027 
00028 #include "CPTimer.h"
00029 
00030 #include "GlobalResource.h"
00031 //#include "ActionSelector.h"
00032 //#include "PolicyFollower.h"
00033 
00034 #include "solverUtils.h"
00035 #include "Parser.h"
00036 #include "POMDP.h"
00037 #include "ParserSelector.h"
00038 #include "MOMDP.h"
00039 #include "SARSOP.h"
00040 #include "BackupAlphaPlaneMOMDP.h"
00041 #include "BackupBeliefValuePairMOMDP.h"
00042 
00043 //#include "FSVI.h"
00044 //#include "GES.h"
00045 #include "FullObsUBInitializer.h"
00046 #include "FastInfUBInitializer.h"
00047 
00048 #include <string.h>
00049 
00050 using namespace std;
00051 using namespace momdp;
00052 
00053 #ifdef __cplusplus
00054 extern "C"
00055 #endif
00056 {
00057         extern unsigned long GlobalMemLimit;
00058 }
00059 
00060 struct OutputParams {
00061         double timeoutSeconds;
00062         double interval;
00063         OutputParams(void);
00064 };
00065 
00066 OutputParams::OutputParams(void) {
00067         timeoutSeconds = -1;
00068         interval = -1;
00069 }
00070 
00071 
00072 #ifdef _MSC_VER
00073 BOOL CtrlHandler( DWORD fdwCtrlType ) 
00074 { 
00075         switch( fdwCtrlType ) 
00076         { 
00077                 // Handle the interrupt signal. 
00078         case CTRL_C_EVENT: 
00079         case CTRL_CLOSE_EVENT: 
00080         case CTRL_BREAK_EVENT: 
00081         case CTRL_SHUTDOWN_EVENT: 
00082         case CTRL_LOGOFF_EVENT:
00083                 if(GlobalResource::getInstance()->solving)
00084                 {
00085                         GlobalResource::getInstance()->userTerminatedG = true;
00086                 }
00087                 else
00088                 {
00089                         exit(1);
00090                 }
00091                 printf("*** Received SIGINT. User pressed control-C. ***\n");
00092                 printf("\nTerminating ...\n");
00093                 fflush(stdout);
00094                 GlobalResource::getInstance()->userTerminatedG = true;
00095                 return( TRUE );
00096 
00097         default: 
00098                 return FALSE; 
00099         } 
00100 } 
00101 
00102 void registerCtrlHanler()
00103 {
00104         if( SetConsoleCtrlHandler( (PHANDLER_ROUTINE) CtrlHandler, TRUE ) ) 
00105         { 
00106                 // Success
00107         } 
00108         else 
00109         {
00110                 // Failed to register... but continue anyway
00111                 printf( "\nERROR: Could not set control handler"); 
00112         }
00113 }
00114 
00115 #else
00116 
00117 void sigIntHandler(int sig) {
00118 
00119         if(GlobalResource::getInstance()->solving)
00120         {
00121                 GlobalResource::getInstance()->userTerminatedG = true;
00122         }
00123         else
00124         {
00125                 exit(1);
00126         }
00127 
00128 
00129         printf("*** Received SIGINT. User pressed control-C. ***\n");
00130         printf("\nTerminating ...\n");
00131         fflush(stdout);
00132 }
00133 
00134 void setSignalHandler(int sig, void (*handler)(int)) 
00135 {
00136         struct sigaction act;
00137         memset (&act, 0, sizeof(act));
00138         act.sa_handler = handler;
00139         act.sa_flags = SA_RESTART;
00140         if (-1 == sigaction (sig, &act, NULL)) {
00141                 cerr << "ERROR: unable to set handler for signal "
00142                         << sig << endl;
00143                 exit(EXIT_FAILURE);
00144         }
00145 
00146 
00147 }
00148 #endif
00149 
00150 void usage(const char* cmdName)
00151 {
00152         cerr <<
00153                 "Usage: " << cmdName << " POMDPModelFileName [--fast] [--precison targetPrecision] [--randomization]\n" 
00154 "       [--timeout timeLimit] [--memory memoryLimit] [--output policyFileName]\n" 
00155 "       [--policy-interval timeInterval]\n"
00156                 "    or " <<cmdName << " --help (or -h) Print this help\n"
00157                 "    or " <<cmdName << " --version              Print version information\n"
00158                 "\n"
00159                 "Solver options:\n"
00160         "  -f or --fast         Use fast (but very picky) alternate parser for .pomdp files.\n"
00161         "  -p or --precision targetPrecision\n"    
00162 "                       Set targetPrecision as the target precision in solution \n"
00163 "                       quality; run ends when target precision is reached. The target\n" 
00164 "                       precision is 1e-3 by default.\n"
00165         "  --randomization      Turn on randomization for the sampling algorithm.\n"
00166 "                       Randomization is off by default.\n"
00167         "  --timeout timeLimit  Use timeLimit as the timeout in seconds.  If running time\n" 
00168 "                       exceeds the specified value, the solver writes out a policy and\n" 
00169 "                       terminates. There is no time limit by default.\n"
00170         "  --memory memoryLimit Use memoryLimit as the memory limit in MB. No memory limit\n" 
00171 "                       by default.  If memory usage exceeds the specified value,\n" 
00172 "                       ofsol writes out a policy and terminates. Set the value to be\n" 
00173 "                       less than physical memory to avoid swapping.\n"
00174         "  --trial-improvement-factor improvementConstant\n"
00175 "                       Use improvementConstant as the trial improvement factor in the\n"
00176 "                       sampling algorithm. At the default of 0.5, a trial terminates at\n" 
00177 "                       a belief when the gap between its upper and lower bound is 0.5 of\n" 
00178 "                       the current precision at the initial belief.\n" 
00179                 "\n"
00180                 "Policy output options:\n"
00181        "  -o or --output policyFileName\n"        
00182 "                       Use policyFileName as the name of policy output file. The\n" 
00183 "                       file name is 'out.policy' by default.\n"
00184         "  --policy-interval timeInterval\n"       
00185 "                       Use timeInterval as the time interval between two consecutive\n" 
00186 "                       write-out of policy files. If this is not specified, the solver\n" 
00187 "                       only writes out a policy file upon termination.\n"
00188                 "\n"
00189                 "Examples:\n"
00190                 "  " << cmdName << " Hallway.pomdp\n"
00191                 "  " << cmdName << " --timeout 100 --output hallway.policy Hallway.pomdp\n"
00192                 "\n"
00193                 ;
00194 
00195 //              {"trial_improvement_factor",     1,NULL, 'j'}, // Use ARG as the trial improvement factor. The default is 0.5. So, for example, a trial terminates at a node when its upper and lower bound gap is less than 0.5 of the gap at the root.  
00196 
00197 
00198 /*      cerr <<
00199                 "usage: " << cmdName << " OPTIONS <model>\n"
00200                 "  -h or --help             Print this help\n"
00201                 "  --version                Print version information\n"
00202                 "\n"
00203                 "Solver options:\n"
00204                 "  -f or --fast             Use fast (but very picky) alternate POMDP parser\n"
00205                 "  -p or --precision        Set target precision in solution quality; run ends when\n"
00206                 "                           target is reached [default: 1e-3]\n"
00207                 "  --randomization          Turn Randomization on for sampling\n"
00208                 "\n"
00209                 "Policy output options:\n"
00210                 "  -o or --output           Specifies name of policy output file [default: 'out.policy']\n"
00211                 "  --timeout                Specifies a timeout in seconds.  If running time exceeds\n"
00212                 "                           the specified value, ofsol writes out a policy\n"
00213                 "                           and terminates [default: no maximum]\n"
00214                 "  --memory                 Specifies the maximum memory usage limit in mege bytes.  If memory usage exceeds\n"
00215                 "                           the specified value, ofsol writes out a policy\n"
00216                 "                           and terminates [default: no maximum]\n"
00217                 "  --policy-interval        Specifies the time interval between two consecutive write-\n"
00218                 "                           out of policy files\n"
00219                 "\n"
00220                 "Examples:\n"
00221                 "  " << cmdName << " Hallway.pomdp\n"
00222                 "  " << cmdName << " --timeout 100 --output hallway.policy Hallway.pomdp\n"
00223                 "\n"
00224                 ;*/
00225         exit(-1);
00226 }
00227 
00228 
00229 int QMDPSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00230 {
00231         cout << "Generate QMDP Policy" << endl;
00232         double targetPrecision = MDP_RESIDUAL;
00233         // no need to invoke POMDP solver
00234         // solve MDP
00235         FullObsUBInitializer m;
00236         if(problem->XStates->size() != 1 && problem->hasPOMDPMatrices())
00237         {
00238                 DEBUG_LOG(cout << "Calling FullObsUBInitialize::QMDPSolution_unfac()" << endl;);
00239                 // un-factored 
00240                 // only does this if convert fast is called to produce pomdp version of the matrices
00241                 // need pomdp matrix
00242                 m.QMDPSolution_unfac(problem, targetPrecision); // SYL030909 prevly: m.QValueIteration_unfac(problem, targetPrecision);
00243                 int numActions  = problem->actions->size();
00244                 int numXstates = problem->XStates->size();
00245                 int numYstates = problem->YStates->size();
00246                 m.actionAlphaByState.resize(numActions);
00247                 FOR(a, numActions)
00248                 {
00249                         m.actionAlphaByState[a].resize(numXstates);
00250                         FOR (state_idx, numXstates) 
00251                         {
00252                                 m.actionAlphaByState[a][state_idx].resize(problem->getBeliefSize());
00253                         }
00254 
00255                 }
00256 
00257                 FOR(a, numActions)
00258                 {
00259                         m.UnfacPostProcessing(m.actionAlphas[a], m.actionAlphaByState[a]);
00260                 }
00261         }
00262         else
00263         {
00264                 DEBUG_LOG(cout << "Calling FullObsUBInitialize::QMDPSolution()" << endl;);
00265                 // factored
00266                 m.QMDPSolution(problem, targetPrecision); // SYL030909 prevly: m.QValueIteration(problem, targetPrecision);
00267                 FOR(a, problem->actions->size())
00268                 {
00269                         m.FacPostProcessing(m.actionAlphaByState[a]);
00270                 }
00271         }
00272 
00273         AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00274         alphaPlanePoolSet.setProblem(problem);
00275         alphaPlanePoolSet.setSolver(NULL);
00276         alphaPlanePoolSet.initialize();
00277         //addAlphaPlane(alphaPlane);
00278         
00279         FOR(a, problem->actions->size())
00280         {
00281                 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00282                 {
00283                         SharedPointer<AlphaPlane> plane (new AlphaPlane());
00284                         copy(*plane->alpha, m.actionAlphaByState[a][stateidx]);
00285                         plane->action = a;
00286                         plane->sval = stateidx;
00287 
00288                         alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00289                 }
00290         }
00291         string outFileName (p->outPolicyFileName);
00292         alphaPlanePoolSet.writeToFile(outFileName, p->problemName);
00293         return 0;       
00294 }
00295 
00296 int FIBSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00297 {
00298         cout << "Generate FIB Policy" << endl;
00299         double targetPrecision = MDP_RESIDUAL;
00300         // no need to invoke POMDP solver
00301 
00302         FastInfUBInitializer f(problem);
00303         DEBUG_LOG(cout << "Calling FastInfUBInitializer::getFIBsolution()" << endl;);           f.getFIBsolution(targetPrecision);
00304 
00305         AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00306         alphaPlanePoolSet.setProblem(problem);
00307         alphaPlanePoolSet.setSolver(NULL);
00308         alphaPlanePoolSet.initialize();
00309         //addAlphaPlane(alphaPlane);
00310         
00311         FOR(a, problem->actions->size())
00312         {
00313                 for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00314                 {
00315                         SharedPointer<AlphaPlane> plane (new AlphaPlane());
00316                         copy(*plane->alpha, f.actionAlphaByState[a][stateidx]);
00317                         plane->action = a;
00318                         plane->sval = stateidx;
00319 
00320                         alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00321                 }
00322         }
00323         string outFileName (p->outPolicyFileName);
00324         alphaPlanePoolSet.writeToFile(outFileName, p->problemName); 
00325         return 0;       
00326 }
00327 
00328 int MDPSolution(SharedPointer<MOMDP> problem, SolverParams* p)
00329 {
00330     cout << "Generate MDP Policy" << endl;
00331     double targetPrecision = MDP_RESIDUAL;
00332     // no need to invoke POMDP solver
00333     // solve MDP
00334     FullObsUBInitializer m;
00335     if(problem->XStates->size() != 1 && problem->hasPOMDPMatrices())
00336     {
00337         // un-factored 
00338         // only does this if convert fast is called to produce pomdp version of the matrices
00339         // need pomdp matrix
00340         m.alphaByState.resize(problem->XStates->size());
00341         DEBUG_LOG(cout << "Calling FullObsUBInitialize::valueIteration_unfac()" << endl;);
00342         m.valueIteration_unfac(problem, targetPrecision);
00343         m.UnfacPostProcessing(m.alpha, m.alphaByState);
00344     }
00345     else
00346     {
00347         // factored
00348         DEBUG_LOG(cout << "Calling FullObsUBInitialize::valueIteration()" << endl;);
00349         m.valueIteration(problem, targetPrecision);
00350         m.FacPostProcessing(m.alphaByState);
00351     }
00352 
00353     AlphaPlanePoolSet alphaPlanePoolSet(NULL);
00354     alphaPlanePoolSet.setProblem(problem);
00355     alphaPlanePoolSet.setSolver(NULL);
00356     alphaPlanePoolSet.initialize();
00357     //addAlphaPlane(alphaPlane);
00358 
00359     
00360     //do one step lookahead if problem is pure MDP
00361     if(problem->YStates->size() == 1)
00362     {
00363         for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00364         {
00365             SharedPointer<AlphaPlane> plane (new AlphaPlane());
00366             int maxAction = 0;
00367             double maxActionLB = -DBL_MAX;
00368 
00369             //search for the best action for this state
00370             SharedPointer<BeliefWithState> b = SharedPointer<BeliefWithState>(new BeliefWithState); 
00371             b->bvec = new SparseVector(); b->bvec->resize(1);
00372             b->bvec->push_back(0,1.0); b->sval=stateidx;
00373             //initialise the MDP belief to current state
00374             obsState_prob_vector spv;  // outcome probability for values of observed state
00375             for(Actions::iterator aIter = problem->actions->begin(); aIter != problem->actions->end(); aIter ++) 
00376             {
00377                 int a = aIter.index();
00378 
00379                 double sum = 0.0;
00380                 double immediateReward = problem->rewards->getReward(*b, a);
00381                 problem->getObsStateProbVector(spv, *b, a);
00382 
00383                 FOR(Xn, spv.size()) 
00384                 {
00385                     double sprob = spv(Xn);
00386                     if (sprob > OBS_IS_ZERO_EPS) 
00387                     {
00388                         double childLB =  m.alphaByState[Xn](0);
00389                         sum += childLB * sprob;
00390                     }
00391                 }
00392                 sum *= problem->getDiscount();
00393                 sum += immediateReward;
00394 
00395                 if(sum > maxActionLB)
00396                 {
00397                     maxActionLB = sum;
00398                     maxAction = a;
00399                 }
00400                 assert(maxActionLB !=  -DBL_MAX);
00401             }
00402 
00403             copy(*plane->alpha, m.alphaByState[stateidx]);
00404             plane->action = maxAction;
00405             plane->sval = stateidx;
00406 
00407             alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00408         }
00409     }
00410     else{
00411         for(int stateidx = 0; stateidx < alphaPlanePoolSet.set.size() ; stateidx ++)
00412         {
00413                 SharedPointer<AlphaPlane> plane (new AlphaPlane());
00414                 copy(*plane->alpha, m.alphaByState[stateidx]);
00415                 plane->action = -1;
00416                 plane->sval = stateidx;
00417 
00418                 alphaPlanePoolSet.set[stateidx]->addAlphaPlane(plane);
00419         }
00420     }
00421 
00422     string outFileName (p->outPolicyFileName);
00423     alphaPlanePoolSet.writeToFile(outFileName, p->problemName);
00424     return 0;   
00425 }
00426 int main(int argc, char **argv) 
00427 {
00428 
00429         //try
00430         {
00431                 SolverParams* p = &GlobalResource::getInstance()->solverParams;
00432 
00433                 bool parseCorrect = SolverParams::parseCommandLineOption(argc, argv, *p);
00434                 if(!parseCorrect)
00435                 {
00436                         usage(p->cmdName);
00437                         exit(EXIT_FAILURE);
00438                 }
00439 
00440 
00441                 OutputParams op;
00442                 if(GlobalResource::getInstance()->benchmarkMode)
00443                 {
00444                         if(GlobalResource::getInstance()->simNum == 0|| GlobalResource::getInstance()->simLen == 0)
00445                         {
00446                                 cout << "Benchmark Length and/or Number not set, please set them using option --simLen and --simNum" << endl;
00447                                 exit(-1);
00448                         }
00449                 }
00450 
00451 
00452                 GlobalResource::getInstance()->init();
00453                 string baseName = GlobalResource::getInstance()->parseBaseNameWithoutPath(p->problemName);
00454                 GlobalResource::getInstance()->setBaseName(baseName);
00455 
00456                 //*************************
00457                 //TODO: parse the problem
00458                 //      long int clk_tck = sysconf(_SC_CLK_TCK);
00459                 //      struct tms now1, now2;
00460                 //      float utime, stime;
00461 
00462 #ifdef _MSC_VER
00463                 registerCtrlHanler();
00464 #else
00465                 setSignalHandler(SIGINT, &sigIntHandler);
00466 #endif
00467 
00468                 printf("\nLoading the model ...\n  ");
00469 
00470                 //Parser* parser = new Parser();  
00471 
00472                 GlobalResource::getInstance()->PBSolverPrePOMDPLoad();
00473                 SharedPointer<MOMDP> problem (NULL);
00474                 if(p->hardcodedProblem.length() ==0 )
00475                 {
00476                         problem = ParserSelector::loadProblem(p->problemName, *p);
00477                 }
00478                 else
00479                 {
00480             cout << "Unknown hard coded problem type : " << p->hardcodedProblem << endl;
00481             exit(0);
00482                 }
00483 
00484                 double pomdpLoadTime = GlobalResource::getInstance()->PBSolverPostPOMDPLoad();
00485                 printf("  loading time : %.2fs \n", pomdpLoadTime);
00486                 GlobalResource::getInstance()->problem = problem;
00487 
00488                 //Getting a MDP solutions
00489                 if(p->MDPSolution == true)
00490                 {
00491                         MDPSolution(problem, p);
00492                         return 0;
00493                 }
00494 
00495                 if(p->QMDPSolution == true)
00496                 {
00497                         QMDPSolution(problem, p);
00498                         return 0;
00499                 }
00500 
00501                 if(p->FIBSolution == true)
00502                 {
00503                         FIBSolution(problem, p);
00504                         return 0;
00505                 }
00506 
00507                 if(GlobalResource::getInstance()->benchmarkMode)
00508                 {
00509                         srand(GlobalResource::getInstance()->randSeed);
00510                         GlobalResource::getInstance()->expRewardRecord.resize(GlobalResource::getInstance()->simNum);
00511                 }
00512                 //decide which solver to create
00513                 PointBasedAlgorithm* solver;
00514 
00515                 switch (p->strategy)
00516                 {
00517                 case S_SARSOP:
00518                         {
00519                                 SARSOP* sarsopSolver = NULL;
00520                                 BackupAlphaPlaneMOMDP* lbBackup = new BackupAlphaPlaneMOMDP();
00521                                 BackupBeliefValuePairMOMDP* ubBackup = new BackupBeliefValuePairMOMDP();
00522 
00523                                 sarsopSolver = new SARSOP(problem, p);
00524 
00525                                 lbBackup->problem = problem;
00526                                 sarsopSolver->lowerBoundBackup = lbBackup;
00527 
00528                                 ((BackupAlphaPlaneMOMDP* )(sarsopSolver->lowerBoundBackup))->solver = sarsopSolver;
00529 
00530                                 ubBackup->problem = problem;
00531                                 sarsopSolver->upperBoundBackup = ubBackup;
00532                                 solver = sarsopSolver;
00533                         }
00534                         break;
00535 
00536                         //case S_FSVI:
00537                         //      solver = new FSVI(problem, p);
00538                         //      break;
00539 
00540                         //case S_GES:
00541                         //      if(GlobalResource::getInstance()->migsPathFile != NULL)
00542                         //      {
00543                         //              if(GlobalResource::getInstance()->migsPathFileNum < 0 )
00544                         //              {
00545                         //                      GlobalResource::getInstance()->migsPathFileNum = 10;
00546                         //              }
00547                         //              solver = new GES(problem, p, true);
00548                         //      }
00549                         //      else
00550                         //      {
00551                         //              solver = new GES(problem, p);
00552                         //      }
00553                         //      break;
00554 
00555                 default:
00556                         assert(0);// should never reach this point
00557                 };
00558 
00559                 //solve the problem
00560                 solver->solve(problem);
00561 
00562                 cout << endl;
00563 
00564         }
00565 
00566         // Commented out during merge 02102009
00567         /*catch(bad_alloc &e)
00568         {
00569                 if(GlobalResource::getInstance()->solverParams.memoryLimit == 0)
00570                 {
00571                         cout << "Memory allocation failed. Exit." << endl;
00572                 }
00573                 else
00574                 {
00575                         cout << "Memory limit reached. Please try increase memory limit" << endl;
00576                 }
00577 
00578         }
00579         catch(exception &e)
00580         {
00581                 cout << "Exception: " << e.what() << endl ;
00582         }*/
00583 
00584 
00585         return 0;
00586 
00587 
00588 
00589 }
00590 
00591 
00592 /***************************************************************************
00593 * REVISION HISTORY:
00594 *
00595 ***************************************************************************/
00596 
00597 
00598 //
00599 //
00600 //
00601 //#include <string>
00602 //using namespace std;
00603 //
00604 //
00605 //#include "Belief.h"
00606 //#include "SARSOP.h"
00607 //#include "MOMDP.h"
00608 //#include "MOMDPLite.h"
00609 //#include "PointBasedAlgorithm.h"
00610 //#include "BackupBeliefValuePairMOMDP.h"
00611 //#include "BackupAlphaPlaneMOMDP.h"
00612 //
00613 //
00614 //int main(int argc, char** argv)
00615 //{
00616 //      SARSOP *solver = new SARSOP();
00617 //      string problemName = "something.pomdpx";
00618 //
00619 //      // This section should be the only difference between MOMDP and MOMDP Lite version
00620 //      SharedPointer<MOMDP> problem = MOMDP::LoadProbem(problemName);
00621 //      solver->lowerBoundBackup = new BackupAlphaPlaneMOMDP();
00622 //      solver->upperBoundBackup = new BackupBeliefValuePairMOMDP();
00623 //
00624 //      // MOMDPLite Version
00625 //      //SharedPointer<MOMDP> problem = MOMDPLite::LoadProbem();
00626 //      //solver->lowerBoundBackup = new BackupAlphaPlaneMOMDPLite();
00627 //      //solver->upperBoundBackup = new BackupBeliefValuePairMOMDPLite();
00628 //
00629 //      solver->solve(problem);
00630 //
00631 //
00632 //}