00001 #include "MOMDP.h"
00002 #include "ParserSelector.h"
00003 #include "AlphaVectorPolicy.h"
00004 #include "SimulationRewardCollector.h"
00005 #include "BeliefForest.h"
00006 #include "Sample.h"
00007 #include "BeliefCache.h"
00008 #include "EvaluationEngine.h"
00009 #include "SimulationEngine.h"
00010 #include "EvaluatorSampleEngine.h"
00011 #include "EvaluatorBeliefTreeNodeTuple.h"
00012
00013 #include "GlobalResource.h"
00014
00015 #include <string>
00016 #include <stdlib.h>
00017 #include <sstream>
00018 #include <fstream>
00019 #include <ctime>
00020
00021 #include "CPTimer.h"
00022
00023 #ifdef _MSC_VER
00024 #else
00025
00026 #include <sys/param.h>
00027 #include <sys/types.h>
00028 #include <sys/times.h>
00029
00030 #endif
00031
00032 using namespace std;
00033 using namespace momdp;
00034
00035
00036 void print_usage(const char* cmdName)
00037 {
00038 cout << "Usage: " << cmdName << " POMDPModelFileName --policy-file policyFileName --simLen numberSteps \n"
00039 <<" --simNum numberSimulations [--fast] [--srand randomSeed] [--memory memoryLimit]\n"
00040 <<" [--output-file outputFileName]\n"
00041 <<" or " << cmdName << " --help (or -h) Print this help\n"
00042 <<" or " << cmdName << " --version Print version information\n"
00043 <<"\n"
00044 <<"Evaluator options:\n"
00045 <<" --policy-file policyFileName Use policyFileName as the policy file name (compulsory).\n"
00046 <<" --simLen numberSteps Use numberSteps as the number of steps for each\n"
00047 <<" simulation run (compulsory).\n"
00048 <<" --simNum numberSimulations Use numberSimulations as the number of simulation runs\n"
00049 <<" (compulsory).\n"
00050 <<" -f or --fast Use fast (but very picky) alternate parser for .pomdp files.\n"
00051 <<" --srand randomSeed Set randomSeed as the random seed for simulation.\n"
00052 <<" It is the current time by default.\n"
00053 <<" --memory memoryLimit Use memoryLimit as the memory limit in MB. No memory\n"
00054 <<" limit by default. If memory usage exceeds the specified\n"
00055 <<" value, the evaluator will switch back to a more memory\n"
00056 <<" conservative (and slow) method.\n"
00057
00058
00059
00060 <<"\n"
00061 <<"Output options:\n"
00062 <<" --output-file outputFileName Use outputFileName as the name for the output file\n"
00063 <<" that contains the evaluation trace.\n"
00064 << "Example:\n"
00065 << " " << cmdName << " --simLen 100 --simNum 100 --policy-file out.policy Hallway.pomdp\n";
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 }
00082
00083
00084 void generateSimLog(SolverParams& p, double& globalExpRew, double& confInterval)
00085 {
00086 int length;
00087 char str1[102];
00088 string str_comb;
00089
00090 int startpos = 0;
00091 int i;
00092 for (i = p.problemName.length() - 1; i >= 0; i--) {
00093 if (p.problemName[i] == '/') {
00094 startpos = i + 1;
00095 break;
00096 }
00097 }
00098
00099 str_comb.append(p.problemName.begin() + startpos, p.problemName.end());
00100
00101 str_comb.append("SimLog");
00102 cout << str_comb << endl;
00103
00104 length = str_comb.copy(str1, 100);
00105 str1[length] = '\0';
00106
00107 FILE *fp = fopen(str1, "a");
00108
00109
00110 if (fp == NULL)
00111 {
00112 cerr << "cant open sim logfile\n";
00113 exit(1);
00114 }
00115
00116 fprintf(fp, "%f ", globalExpRew);
00117 fprintf(fp, "%f ", globalExpRew - confInterval);
00118 fprintf(fp, "%f ", globalExpRew + confInterval);
00119 fprintf(fp, "\n");
00120 fclose(fp);
00121
00122
00123 }
00124
00125
00126 int main(int argc, char **argv)
00127 {
00128 try
00129 {
00130 SolverParams* p =&GlobalResource::getInstance()->solverParams;
00131 bool parseCorrect = SolverParams::parseCommandLineOption(argc, argv, *p);
00132 if(!parseCorrect)
00133 {
00134 print_usage(p->cmdName);
00135 exit(EXIT_FAILURE);
00136 }
00137
00138
00139 if (p->policyFile == "" || p->simLen == -1 || p->simNum == -1)
00140 {
00141 print_usage(p->cmdName);
00142 return 0;
00143 }
00144
00145
00146 bool enableFiling = false;
00147
00148 if (p->outputFile.length() == 0)
00149 {
00150 enableFiling = false;
00151 }
00152 else
00153 {
00154 enableFiling = true;
00155 }
00156
00157 cout << "\nLoading the model ..." << endl << " ";
00158 SharedPointer<MOMDP> problem = ParserSelector::loadProblem(p->problemName, *p);
00159
00160 if (problem->initialBeliefStval->sval == -1)
00161 {
00162 cerr << "\nPlease use the simulator. Random initial value for the fully observable state variable is not supported in the evaluator.\n";
00163 exit(1);
00164 }
00165
00166 cout << "\nLoading the policy ..." << endl;
00167 cout << " input file : " << p->policyFile << endl;
00168 SharedPointer<AlphaVectorPolicy> policy = new AlphaVectorPolicy(problem);
00169 bool policyRead = policy->readFromFile(p->policyFile);
00170 if(!policyRead)
00171 {
00172 return 0;
00173 }
00174
00175 cout << "\nSimulating ..." << endl;
00176 if(p->useLookahead)
00177 {
00178 cout << " action selection : one-step look ahead" << endl;
00179 }
00180 else
00181 {
00182 }
00183
00184 SimulationRewardCollector rewardCollector;
00185 rewardCollector.setup(*p);
00186
00187
00188 vector <BeliefCache *> beliefCacheSet;
00189 int xStateNum = problem->XStates->size();
00190 beliefCacheSet.resize(xStateNum);
00191
00192 for(States::iterator iter = problem->XStates->begin(); iter != problem->XStates->end(); iter ++ )
00193 {
00194 beliefCacheSet[iter.index()] = new BeliefCache();
00195 }
00196
00197 BeliefForest* beliefForest = new BeliefForest();
00198 EvaluatorSampleEngine* sample = new EvaluatorSampleEngine();
00199
00200 sample->setup(NULL, problem, &beliefCacheSet, beliefForest);
00201 beliefForest->setup(problem, sample, &beliefCacheSet);
00202 beliefForest->globalRootPrepare();
00203
00204
00205 ofstream * foutStream = NULL;
00206 srand(p->seed);
00207
00208
00209
00210
00211 SharedPointer<BeliefWithState> startBeliefStval(new BeliefWithState());
00212
00213 copy(*startBeliefStval->bvec, *problem->initialBeliefStval->bvec);
00214 startBeliefStval->sval = problem->initialBeliefStval->sval;
00215
00216 belief_vector startBel;
00217 copy(startBel, *startBeliefStval->bvec);
00218
00219
00220 if (startBel.filled() == 0)
00221 {
00222 throw runtime_error("startBel.filled() == 0 !?");
00223 int numStates = problem->getBeliefSize();
00224 startBel.resize(numStates);
00225 for (int i = 0; i < numStates; i++)
00226 {
00227 startBel.push_back(i, ((double) 1) / (double(numStates)));
00228 }
00229 copy(*startBeliefStval->bvec, startBel);
00230 }
00231
00232
00233 belief_vector startBeliefX;
00234
00235 if (startBeliefStval->sval == -1)
00236 {
00237
00238 copy(startBeliefX, *problem->initialBeliefX);
00239 }
00240 else
00241 {
00242 startBeliefX.resize(problem->XStates->size());
00243 startBeliefX.push_back(startBeliefStval->sval, 1.0);
00244 }
00245
00246
00247
00248 bool hasMemory = true;
00249 if (enableFiling)
00250 {
00251 foutStream = new ofstream(p->outputFile.c_str());
00252 }
00253
00254 for (int currSim = 0; currSim < p->simNum; currSim++)
00255 {
00256 double reward = 0, expReward = 0;
00257
00258 if(hasMemory)
00259 {
00260 try
00261 {
00262 EvaluationEngine engine;
00263 engine.setup(problem, policy, beliefForest, &beliefCacheSet, sample, p);
00264 int firstAction = engine.runFor(p->simLen, *startBeliefStval, startBeliefX, foutStream, reward, expReward);
00265 if(firstAction < 0)
00266 {
00267
00268 return 0;
00269 }
00270 }
00271 catch(exception &e)
00272 {
00273 cout << "Memory limit reached, switch from evaluation to simulation and continue..." << endl;
00274 hasMemory = false;
00275
00276 GlobalResource::getInstance()->solverParams.memoryLimit = 0;
00277 delete beliefForest;
00278 }
00279 }
00280
00281 if(!hasMemory)
00282 {
00283 SimulationEngine engine;
00284 engine.setup(problem, policy, p);
00285 int firstAction = engine.runFor(p->simLen, foutStream, reward, expReward);
00286
00287 if(firstAction < 0)
00288 {
00289
00290 return 0;
00291 }
00292 }
00293
00294 rewardCollector.addEntry(currSim, reward, expReward);
00295 rewardCollector.printReward(currSim);
00296
00297
00298 }
00299
00300 if (enableFiling)
00301 {
00302 foutStream->close();
00303 }
00304
00305
00306 rewardCollector.printFinalReward();
00307 DEBUG_LOG( generateSimLog(*p, rewardCollector.globalExpRew, rewardCollector.confInterval); );
00308 }
00309 catch(bad_alloc &e)
00310 {
00311 if(GlobalResource::getInstance()->solverParams.memoryLimit == 0)
00312 {
00313 cout << "Memory allocation failed. Exit." << endl;
00314 }
00315 else
00316 {
00317 cout << "Memory limit reached. Please try increase memory limit" << endl;
00318 }
00319
00320 }
00321 catch(exception &e)
00322 {
00323 cout << "Exception: " << e.what() << endl ;
00324 }
00325
00326 return 0;
00327 }
00328