Go to the documentation of this file.00001 #ifndef bwi_krexec_SarsaActionSelector_h__guard
00002 #define bwi_krexec_SarsaActionSelector_h__guard
00003
00004 #include <actasp/ActionSelector.h>
00005 #include <actasp/ExecutionObserver.h>
00006 #include <actasp/state_utils.h>
00007 #include <actasp/actaspfwd.h>
00008 #include <actasp/AspFluent.h>
00009
00010 #include <actasp/GraphPolicy.h>
00011 #include <actasp/FilteringKR.h>
00012
00013
00014 namespace bwi_krexec {
00015
00016 template <typename T>
00017 class RewardFunction;
00018
00019 class DefaultActionValue;
00020
00021
00022 struct SarsaParams {
00023
00024 SarsaParams() : alpha(0.8), gamma(0.9999), lambda(0.9), epsilon(0.15) {}
00025
00026 double alpha;
00027 double gamma;
00028 double lambda;
00029 double epsilon;
00030 };
00031
00032 class SarsaActionSelector : public actasp::ActionSelector, public actasp::ExecutionObserver {
00033 public:
00034
00035 typedef std::set< actasp::AspFluent> State;
00036
00037 SarsaActionSelector(actasp::FilteringKR* reasoner, DefaultActionValue *defval, RewardFunction<State>*reward, const SarsaParams& p = SarsaParams() );
00038
00039 actasp::ActionSet::const_iterator choose(const actasp::ActionSet &options) throw();
00040
00041 void actionStarted(const actasp::AspFluent& action) throw();
00042 void actionTerminated(const actasp::AspFluent& action) throw();
00043
00044 void policyChanged(actasp::PartialPolicy* newPolicy) throw();
00045 void goalChanged(std::vector<actasp::AspRule> newGoalRules) throw();
00046
00047
00048 void readFrom(std::istream & fromStream) throw();
00049 void writeTo(std::ostream & toStream) throw();
00050
00051
00052 void readMapFrom(std::istream & fromStream) throw();
00053 void writeMapTo(std::ostream & toStream) throw();
00054
00055 void episodeEnded() throw();
00056
00057 void saveValueInitialState(const std::string& fileName);
00058
00059
00060 typedef std::map< actasp::AspFluent, double, actasp::ActionComparator> ActionValueMap;
00061 typedef std::map< State , ActionValueMap , actasp::StateComparator<actasp::AspFluent> > StateActionMap;
00062
00063 private:
00064
00065 void updateValue(double v_s_prime);
00066
00067 actasp::FilteringKR *reasoner;
00068 DefaultActionValue *defval;
00069
00070 SarsaParams p;
00071 RewardFunction<State> *reward;
00072
00073 StateActionMap value;
00074 StateActionMap e;
00075 State initial;
00076 State initialNotFiltered;
00077 State final;
00078 State finalNotFiltered;
00079 actasp::AspFluent previousAction;
00080 double v_s;
00081
00082
00083 actasp::GraphPolicy *policy;
00084 std::vector<actasp::AspRule> goalRules;
00085 std::map< State, State, actasp::StateComparator<actasp::AspFluent> > notFilteredToFiltered;
00086 bool stateCompare(const std::set<actasp::AspFluent> state, const std::set<actasp::AspFluent> otherstate);
00087
00088 };
00089
00090 }
00091
00092
00093 #endif