00001 #include "MOMDP.h"
00002 #include "ParserSelector.h"
00003 #include "AlphaVectorPolicy.h"
00004 #include "SimulationRewardCollector.h"
00005 #include "BeliefForest.h"
00006 #include "Sample.h"
00007 #include "BeliefCache.h"
00008 #include "EvaluationEngine.h"
00009 #include "SimulationEngine.h"
00010 #include "EvaluatorSampleEngine.h"
00011 #include "EvaluatorBeliefTreeNodeTuple.h"
00012 
00013 #include "GlobalResource.h"
00014 
00015 #include <string>
00016 #include <stdlib.h>
00017 #include <sstream>
00018 #include <fstream>
00019 #include <ctime>
00020 
00021 #include "CPTimer.h"
00022 
00023 #ifdef _MSC_VER
00024 #else
00025 
00026 #include <sys/param.h>
00027 #include <sys/types.h>
00028 #include <sys/times.h>
00029 
00030 #endif
00031 
00032 using namespace std;
00033 using namespace momdp;
00034 
00035 
00036 void print_usage(const char* cmdName) 
00037 {
00038         cout << "Usage: " << cmdName << " POMDPModelFileName --policy-file policyFileName --simLen numberSteps \n" 
00039 <<"     --simNum numberSimulations [--fast] [--srand randomSeed] [--memory memoryLimit]\n" 
00040 <<"     [--output-file outputFileName]\n" 
00041 <<"    or " << cmdName << " --help (or -h)  Print this help\n" 
00042 <<"    or " << cmdName << " --version      Print version information\n" 
00043 <<"\n"
00044 <<"Evaluator options:\n"
00045 <<"  --policy-file policyFileName       Use policyFileName as the policy file name (compulsory).\n"
00046 <<"  --simLen numberSteps               Use numberSteps as the number of steps for each\n" 
00047 <<"                             simulation run (compulsory).\n"
00048 <<"  --simNum numberSimulations Use numberSimulations as the number of simulation runs\n" 
00049 <<"                             (compulsory).\n"
00050 <<"  -f or --fast                       Use fast (but very picky) alternate parser for .pomdp files.\n"
00051 <<"  --srand randomSeed         Set randomSeed as the random seed for simulation.\n" 
00052 <<"                             It is the current time by default.\n"
00053 <<"  --memory memoryLimit               Use memoryLimit as the memory limit in MB. No memory\n" 
00054 <<"                             limit by default. If memory usage exceeds the specified\n" 
00055 <<"                             value, the evaluator will switch back to a more memory\n" 
00056 <<"                             conservative (and slow) method.\n"
00057 
00058 
00059 
00060 <<"\n"
00061 <<"Output options:\n"
00062 <<"  --output-file outputFileName       Use outputFileName as the name for the output file\n" 
00063 <<"                             that contains the evaluation trace.\n"
00064                 << "Example:\n"
00065                 << "  " << cmdName << " --simLen 100 --simNum 100 --policy-file out.policy Hallway.pomdp\n";
00066 
00067 
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00075 
00076 
00077 
00078 
00079 
00080 
00081 }
00082 
00083 
00084 void generateSimLog(SolverParams& p, double& globalExpRew, double& confInterval)
00085 {
00086      int length;
00087      char str1[102];
00088      string str_comb;
00089 
00090      int startpos = 0;
00091      int i;
00092      for (i = p.problemName.length() - 1; i >= 0; i--) {
00093           if (p.problemName[i] == '/') {
00094                startpos = i + 1;
00095                break;
00096           }
00097      }
00098 
00099      str_comb.append(p.problemName.begin() + startpos, p.problemName.end());
00100 
00101      str_comb.append("SimLog");
00102      cout << str_comb << endl;
00103 
00104      length = str_comb.copy(str1, 100);
00105      str1[length] = '\0';
00106 
00107      FILE *fp = fopen(str1, "a");
00108 
00109      
00110      if (fp == NULL) 
00111      {
00112           cerr << "cant open sim logfile\n";
00113           exit(1);
00114      }
00115 
00116      fprintf(fp, "%f ", globalExpRew);
00117      fprintf(fp, "%f ", globalExpRew - confInterval);
00118      fprintf(fp, "%f ", globalExpRew + confInterval);
00119      fprintf(fp, "\n");
00120      fclose(fp);
00121 
00122 
00123 }
00124 
00125 
00126 int main(int argc, char **argv) 
00127 {
00128      try
00129      {
00130           SolverParams* p =&GlobalResource::getInstance()->solverParams;
00131           bool parseCorrect = SolverParams::parseCommandLineOption(argc, argv, *p);
00132           if(!parseCorrect)
00133           {
00134                print_usage(p->cmdName);
00135                exit(EXIT_FAILURE);
00136           }
00137 
00138           
00139           if (p->policyFile == "" || p->simLen == -1 || p->simNum == -1) 
00140           {
00141                print_usage(p->cmdName);
00142                return 0;
00143           }
00144 
00145 
00146           bool enableFiling = false;
00147 
00148           if (p->outputFile.length() == 0) 
00149           {
00150                enableFiling = false;
00151           } 
00152           else 
00153           {
00154                enableFiling = true;
00155           }
00156 
00157           cout << "\nLoading the model ..." << endl << "  ";
00158           SharedPointer<MOMDP> problem = ParserSelector::loadProblem(p->problemName, *p);
00159 
00160           if (problem->initialBeliefStval->sval == -1) 
00161           { 
00162                cerr << "\nPlease use the simulator. Random initial value for the fully observable state variable is not supported in the evaluator.\n";
00163                exit(1);
00164            }
00165 
00166           cout << "\nLoading the policy ..." << endl;
00167           cout << "  input file   : " << p->policyFile << endl;
00168           SharedPointer<AlphaVectorPolicy> policy = new AlphaVectorPolicy(problem);
00169           bool policyRead = policy->readFromFile(p->policyFile);
00170           if(!policyRead)
00171           {
00172                return 0;
00173           }
00174 
00175           cout << "\nSimulating ..." << endl;
00176           if(p->useLookahead)
00177           {
00178             cout << "  action selection :  one-step look ahead" << endl;
00179           }
00180           else
00181           {
00182           }
00183 
00184           SimulationRewardCollector rewardCollector;
00185           rewardCollector.setup(*p);
00186 
00187 
00188           vector <BeliefCache *> beliefCacheSet;
00189           int xStateNum = problem->XStates->size();
00190           beliefCacheSet.resize(xStateNum);
00191 
00192           for(States::iterator iter = problem->XStates->begin(); iter != problem->XStates->end(); iter ++ )
00193           {
00194                beliefCacheSet[iter.index()] = new BeliefCache();
00195           }
00196 
00197           BeliefForest* beliefForest = new BeliefForest();
00198           EvaluatorSampleEngine* sample = new EvaluatorSampleEngine();
00199 
00200           sample->setup(NULL, problem, &beliefCacheSet, beliefForest);
00201           beliefForest->setup(problem, sample, &beliefCacheSet);
00202           beliefForest->globalRootPrepare();
00203 
00204 
00205           ofstream * foutStream = NULL;
00206           srand(p->seed);
00207           
00208 
00209           
00210           
00211           SharedPointer<BeliefWithState> startBeliefStval(new BeliefWithState());
00212 
00213           copy(*startBeliefStval->bvec, *problem->initialBeliefStval->bvec);
00214           startBeliefStval->sval = problem->initialBeliefStval->sval;
00215 
00216           belief_vector startBel;
00217           copy(startBel, *startBeliefStval->bvec);
00218 
00219           
00220           if (startBel.filled() == 0) 
00221           {
00222                throw runtime_error("startBel.filled() == 0 !?");
00223                int numStates = problem->getBeliefSize();
00224                startBel.resize(numStates);
00225                for (int i = 0; i < numStates; i++) 
00226                {
00227                     startBel.push_back(i, ((double) 1) / (double(numStates)));
00228                }
00229                copy(*startBeliefStval->bvec, startBel);
00230           }
00231 
00232           
00233           belief_vector startBeliefX;
00234           
00235           if (startBeliefStval->sval == -1) 
00236           { 
00237                
00238                copy(startBeliefX, *problem->initialBeliefX);
00239           } 
00240           else 
00241           { 
00242                startBeliefX.resize(problem->XStates->size());
00243                startBeliefX.push_back(startBeliefStval->sval, 1.0);
00244           }
00245 
00246           
00247 
00248           bool hasMemory = true;
00249           if (enableFiling) 
00250           {
00251                foutStream = new ofstream(p->outputFile.c_str());
00252           }
00253 
00254           for (int currSim = 0; currSim < p->simNum; currSim++) 
00255           {
00256                double reward = 0, expReward = 0;
00257 
00258                if(hasMemory)
00259                {
00260                     try
00261                     {
00262                          EvaluationEngine engine;
00263                          engine.setup(problem, policy, beliefForest, &beliefCacheSet, sample, p);
00264                          int firstAction = engine.runFor(p->simLen, *startBeliefStval, startBeliefX, foutStream, reward, expReward);
00265                          if(firstAction < 0)
00266                          {
00267                               
00268                               return 0;
00269                          }
00270                     }
00271                     catch(exception &e)
00272                     {
00273                          cout << "Memory limit reached, switch from evaluation to simulation and continue..." << endl;
00274                          hasMemory = false;
00275                          
00276                          GlobalResource::getInstance()->solverParams.memoryLimit = 0;
00277                          delete beliefForest;
00278                     }
00279                }
00280 
00281                if(!hasMemory)
00282                {
00283                     SimulationEngine engine;
00284                     engine.setup(problem, policy, p);
00285                     int firstAction = engine.runFor(p->simLen, foutStream, reward, expReward);
00286 
00287                     if(firstAction < 0)
00288                     {
00289                          
00290                          return 0;
00291                     }
00292                }
00293 
00294                rewardCollector.addEntry(currSim, reward, expReward);
00295                rewardCollector.printReward(currSim);
00296 
00297 
00298           }
00299 
00300           if (enableFiling)
00301           {
00302                foutStream->close();
00303           }
00304 
00305 
00306           rewardCollector.printFinalReward();
00307           DEBUG_LOG( generateSimLog(*p, rewardCollector.globalExpRew, rewardCollector.confInterval); );
00308      }
00309      catch(bad_alloc &e)
00310      {
00311           if(GlobalResource::getInstance()->solverParams.memoryLimit == 0)
00312           {
00313                cout << "Memory allocation failed. Exit." << endl;
00314           }
00315           else
00316           {
00317                cout << "Memory limit reached. Please try increase memory limit" << endl;
00318           }
00319 
00320      }
00321      catch(exception &e)
00322      {
00323           cout << "Exception: " << e.what() << endl ;
00324      }
00325 
00326      return 0;
00327 }
00328