SarsaActionSelector.h
Go to the documentation of this file.
00001 #ifndef bwi_krexec_SarsaActionSelector_h__guard
00002 #define bwi_krexec_SarsaActionSelector_h__guard
00003 
00004 #include <actasp/ActionSelector.h>
00005 #include <actasp/ExecutionObserver.h>
00006 #include <actasp/state_utils.h>
00007 #include <actasp/actaspfwd.h>
00008 #include <actasp/AspFluent.h>
00009 
00010 #include <actasp/GraphPolicy.h>
00011 #include <actasp/FilteringKR.h>
00012 
00013 
00014 namespace bwi_krexec {
00015 
00016 template <typename T>
00017 class RewardFunction;
00018 
00019 class DefaultActionValue;
00020 
00021 
00022 struct SarsaParams {
00023   
00024   SarsaParams() : alpha(0.8), gamma(0.9999), lambda(0.9), epsilon(0.15) {}
00025   
00026   double alpha;
00027   double gamma;
00028   double lambda;
00029   double epsilon;
00030 };
00031 
00032 class SarsaActionSelector : public actasp::ActionSelector, public actasp::ExecutionObserver {
00033 public:
00034   
00035   typedef std::set< actasp::AspFluent> State;
00036 
00037   SarsaActionSelector(actasp::FilteringKR* reasoner, DefaultActionValue *defval, RewardFunction<State>*reward, const SarsaParams& p = SarsaParams() );
00038 
00039   actasp::ActionSet::const_iterator choose(const actasp::ActionSet &options) throw();
00040 
00041   void actionStarted(const actasp::AspFluent& action) throw();
00042   void actionTerminated(const actasp::AspFluent& action) throw();
00043 
00044   void policyChanged(actasp::PartialPolicy* newPolicy) throw();
00045   void goalChanged(std::vector<actasp::AspRule> newGoalRules) throw();
00046   
00047   //for value:
00048   void readFrom(std::istream & fromStream) throw();
00049   void writeTo(std::ostream & toStream) throw();
00050 
00051   //for notFilteredToFiltered:
00052   void readMapFrom(std::istream & fromStream) throw();
00053   void writeMapTo(std::ostream & toStream) throw();
00054   
00055   void episodeEnded() throw();
00056 
00057   void saveValueInitialState(const std::string& fileName);
00058 
00059 
00060   typedef std::map< actasp::AspFluent, double, actasp::ActionComparator> ActionValueMap;
00061   typedef std::map< State , ActionValueMap , actasp::StateComparator<actasp::AspFluent> > StateActionMap;
00062 
00063 private:
00064   
00065   void updateValue(double v_s_prime);
00066   
00067   actasp::FilteringKR *reasoner;
00068   DefaultActionValue *defval;
00069 
00070   SarsaParams p;
00071   RewardFunction<State> *reward;
00072 
00073   StateActionMap value;
00074   StateActionMap e;
00075   State initial; 
00076   State initialNotFiltered;
00077   State final; 
00078   State finalNotFiltered;
00079   actasp::AspFluent previousAction;
00080   double v_s;
00081 
00082   //for filterstate:
00083   actasp::GraphPolicy *policy;
00084   std::vector<actasp::AspRule> goalRules;
00085   std::map< State, State, actasp::StateComparator<actasp::AspFluent> > notFilteredToFiltered;
00086   bool stateCompare(const std::set<actasp::AspFluent> state, const std::set<actasp::AspFluent> otherstate);
00087 
00088 };
00089 
00090 }
00091 
00092 
00093 #endif


bwi_kr_execution
Author(s): Matteo Leonetti, Piyush Khandelwal
autogenerated on Thu Jun 6 2019 17:57:37