00001 #include "SimulationRewardCollector.h" 00002 00003 SimulationRewardCollector::SimulationRewardCollector(void) 00004 { 00005 } 00006 00007 SimulationRewardCollector::~SimulationRewardCollector(void) 00008 { 00009 } 00010 00011 00012 void SimulationRewardCollector::setup(SolverParams& p) 00013 { 00014 this->p = p; 00015 expRewardRecord.resize(p.simNum); 00016 totalVar = 0; 00017 globalRew = 0; 00018 globalExpRew = 0; 00019 confInterval = 0; 00020 legendPrint = true; // print legends for acronyms used 00021 00022 } 00023 void SimulationRewardCollector::addEntry(int currSim, double reward, double expReward) 00024 { 00025 globalRew += reward / p.simNum; 00026 globalExpRew += expReward / p.simNum; 00027 expRewardRecord[currSim] = expReward; 00028 } 00029 00030 void SimulationRewardCollector::printReward(int currSim) 00031 { 00032 int simDisplayInc = int(p.simNum / 10); 00033 if(simDisplayInc == 0) 00034 { 00035 simDisplayInc = 1; 00036 } 00037 if( legendPrint) 00038 { 00039 cout << endl; 00040 cout << "-----------------------------------" << endl; 00041 cout << " #Simulations | Exp Total Reward "<< endl; 00042 cout << "-----------------------------------"<< endl; 00043 legendPrint = false; 00044 } 00045 00046 if (((currSim + 1) % (simDisplayInc)) == 0) 00047 { 00048 //cout << p.policyFile << endl; 00049 //cout << "#Simulations = " << currSim + 1; 00050 cout << " "; cout.width(15);cout <<left << currSim+1; 00051 cout << " " << left << (globalExpRew * p.simNum)/ (currSim + 1) << endl; 00052 // " Global Reward = " << (globalRew * p.simNum) / (currSim + 1) << 00053 } 00054 } 00055 00056 void SimulationRewardCollector::calculateConfidenceInterval() 00057 { 00058 double currVar = 0; 00059 for (int currSim = 0; currSim < p.simNum; currSim++) 00060 { 00061 currVar = pow((expRewardRecord[currSim] - globalExpRew), 2); 00062 totalVar += currVar / p.simNum; 00063 } 00064 totalVar = sqrt(totalVar); // sample's stddev 00065 confInterval = 1.96 * totalVar / sqrt((double)p.simNum); // stddev_mean = stddev_sample/sqrt(simNum) 00066 } 00067 00068 void SimulationRewardCollector::printFinalReward() 00069 { 00070 calculateConfidenceInterval(); 00071 cout << "-----------------------------------"<< endl << endl; 00072 00073 cout << "Finishing ..." << endl << endl; 00074 // cout << "Policy file: " << p.policyFile << endl; 00075 //cout << globalExpRew << " " << globalExpRew - confInterval << " " << globalExpRew + confInterval << endl; //- TEMP FORMAT FOR EXPTS 00076 cout << "-------------------------------------------------------------"<< endl; 00077 cout << " #Simulations | Exp Total Reward | 95% Confidence Interval "<< endl; 00078 cout << "-------------------------------------------------------------"<< endl; 00079 //cout << "#Simulation = " << p.simNum; 00080 // cout << " ETR = " << globalExpRew << " 95% Confidence Interval = (" << globalExpRew - confInterval << ", " << globalExpRew + confInterval << ")" << endl; 00081 cout << " "; cout.width(15);cout <<left << p.simNum ; 00082 cout << " ";cout.width(18);cout << left << globalExpRew; 00083 cout << " (" << globalExpRew - confInterval << ", " << globalExpRew + confInterval << ")" << endl; 00084 cout << "-------------------------------------------------------------"<< endl; 00085 // << confInterval << ", 95 Conf Min = " << globalExpRew - confInterval << ", 95 Conf Max = " << globalExpRew + confInterval << endl; 00086 // << ", Global Exp Reward Stddev = " << totalVar 00087 //" Global Reward = " << globalRew << 00088 00089 } 00090