00001 #ifndef bwi_krexec_QLearningActionSelector_h__guard 00002 #define bwi_krexec_QLearningActionSelector_h__guard 00003 00004 #include "DefaultActionValue.h" 00005 00006 #include <actasp/state_utils.h> 00007 #include <actasp/ExecutionObserver.h> 00008 #include <actasp/ActionSelector.h> 00009 #include <actasp/AspKR.h> 00010 00011 #include <map> 00012 #include <set> 00013 00014 #include <iosfwd> 00015 00016 namespace bwi_krexec { 00017 00018 template <typename T> 00019 class RewardFunction; 00020 00021 class QLearningActionSelector : public actasp::ActionSelector, public actasp::ExecutionObserver { 00022 public: 00023 00024 typedef std::set< actasp::AspFluent> State; 00025 00026 QLearningActionSelector(double alpha, RewardFunction<State> *reward, actasp::AspKR *reasoner, DefaultActionValue *defval); 00027 00028 actasp::ActionSet::const_iterator choose(const actasp::ActionSet &options) throw() ; 00029 00030 void episodeEnded(); 00031 00032 void actionStarted(const actasp::AspFluent& action) throw(); 00033 void actionTerminated(const actasp::AspFluent& action) throw(); 00034 00035 void readFrom(std::istream & fromStream) throw(); 00036 void writeTo(std::ostream & toStream) throw(); 00037 00038 00039 typedef std::map< actasp::AspFluent, double, actasp::ActionComparator> ActionValueMap; 00040 typedef std::map< State , ActionValueMap , actasp::StateComparator<actasp::AspFluent> > StateActionMap; 00041 00042 private: 00043 actasp::AspKR *reasoner; 00044 DefaultActionValue *defval; 00045 00046 double alpha; 00047 RewardFunction<State> *reward; 00048 00049 StateActionMap value; 00050 State initial; 00051 State final; 00052 actasp::AspFluent previousAction; 00053 int count; 00054 }; 00055 00056 } 00057 00058 #endif