Classes | |
class | PolicyNetwork |
class | QValueNetwork |
— Q-value and policy networks More... | |
class | ReplayItem |
Functions | |
def | rendertrial (maxiter=NSTEPS, verbose=True) |
Variables | |
batch = random.sample(replayDeque,BATCH_SIZE) | |
int | BATCH_SIZE = 64 |
d_batch = np.vstack([ b.done for b in batch ]) | |
float | DECAY_RATE = 0.99 |
bool | done = False |
env = Pendulum(1) | |
— Environment More... | |
feed_dict | |
list | h_qva = [] |
list | h_rwd = [] |
History of search. More... | |
list | h_ste = [] |
maxq | |
n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) | |
int | NEPISODES = 100 |
— Hyper paramaters More... | |
int | NH1 = 250 |
int | NSTEPS = 100 |
NU = env.nu | |
NX = env.nobs | |
optim | |
policy = PolicyNetwork().setupOptim() | |
— Tensor flow initialization More... | |
float | POLICY_LEARNING_RATE = 0.0001 |
policyTarget = PolicyNetwork().setupTargetAssign(policy) | |
q2_batch | |
qgrad | |
qref_batch = r_batch+(d_batch==False)*(DECAY_RATE*q2_batch) | |
qvalue = QValueNetwork().setupOptim() | |
float | QVALUE_LEARNING_RATE = 0.001 |
qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) | |
r | |
r_batch = np.vstack([ b.reward for b in batch ]) | |
RANDOM_SEED = int((time.time()%10)*1000) | |
— Random seed More... | |
int | REPLAY_SIZE = 10000 |
replayDeque = deque() | |
float | rsum = 0.0 |
sess = tf.InteractiveSession() | |
u = sess.run(policy.policy, feed_dict={ policy.x: x }) | |
u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch}) | |
u_batch = np.vstack([ b.u for b in batch ]) | |
u_init | |
u_targ = sess.run(policy.policy, feed_dict={ policy.x : x_batch} ) | |
float | UPDATE_RATE = 0.01 |
withSinCos | |
x = env.reset().T | |
— Training More... | |
x2 = x2.T | |
x2_batch = np.vstack([ b.x2 for b in batch ]) | |
x_batch = np.vstack([ b.x for b in batch ]) | |
Deep actor-critic network, From "Continuous control with deep reinforcement learning", by Lillicrap et al, arXiv:1509.02971
def continuous.rendertrial | ( | maxiter = NSTEPS , |
|
verbose = True |
|||
) |
Definition at line 138 of file continuous.py.
continuous.batch = random.sample(replayDeque,BATCH_SIZE) |
Definition at line 176 of file continuous.py.
int continuous.BATCH_SIZE = 64 |
Definition at line 34 of file continuous.py.
continuous.d_batch = np.vstack([ b.done for b in batch ]) |
Definition at line 180 of file continuous.py.
float continuous.DECAY_RATE = 0.99 |
Definition at line 31 of file continuous.py.
bool continuous.done = False |
Definition at line 165 of file continuous.py.
continuous.env = Pendulum(1) |
— Environment
Definition at line 38 of file continuous.py.
continuous.feed_dict |
Definition at line 190 of file continuous.py.
list continuous.h_qva = [] |
Definition at line 152 of file continuous.py.
list continuous.h_rwd = [] |
History of search.
Definition at line 151 of file continuous.py.
list continuous.h_ste = [] |
Definition at line 153 of file continuous.py.
continuous.maxq |
Definition at line 209 of file continuous.py.
continuous.n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) |
Definition at line 22 of file continuous.py.
int continuous.NEPISODES = 100 |
— Hyper paramaters
Definition at line 27 of file continuous.py.
int continuous.NH1 = 250 |
Definition at line 35 of file continuous.py.
int continuous.NSTEPS = 100 |
Definition at line 28 of file continuous.py.
continuous.NU = env.nu |
Definition at line 41 of file continuous.py.
continuous.NX = env.nobs |
Definition at line 40 of file continuous.py.
continuous.optim |
Definition at line 190 of file continuous.py.
continuous.policy = PolicyNetwork().setupOptim() |
— Tensor flow initialization
Definition at line 125 of file continuous.py.
float continuous.POLICY_LEARNING_RATE = 0.0001 |
Definition at line 30 of file continuous.py.
continuous.policyTarget = PolicyNetwork().setupTargetAssign(policy) |
Definition at line 126 of file continuous.py.
continuous.q2_batch |
Definition at line 185 of file continuous.py.
continuous.qgrad |
Definition at line 196 of file continuous.py.
continuous.qref_batch = r_batch+(d_batch==False)*(DECAY_RATE*q2_batch) |
Definition at line 187 of file continuous.py.
continuous.qvalue = QValueNetwork().setupOptim() |
Definition at line 128 of file continuous.py.
float continuous.QVALUE_LEARNING_RATE = 0.001 |
Definition at line 29 of file continuous.py.
continuous.qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) |
Definition at line 129 of file continuous.py.
continuous.r |
Definition at line 163 of file continuous.py.
continuous.r_batch = np.vstack([ b.reward for b in batch ]) |
Definition at line 179 of file continuous.py.
continuous.RANDOM_SEED = int((time.time()%10)*1000) |
— Random seed
Definition at line 17 of file continuous.py.
int continuous.REPLAY_SIZE = 10000 |
Definition at line 33 of file continuous.py.
continuous.replayDeque = deque() |
Definition at line 121 of file continuous.py.
float continuous.rsum = 0.0 |
Definition at line 158 of file continuous.py.
continuous.sess = tf.InteractiveSession() |
Definition at line 131 of file continuous.py.
Definition at line 161 of file continuous.py.
continuous.u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch}) |
Definition at line 184 of file continuous.py.
continuous.u_batch = np.vstack([ b.u for b in batch ]) |
Definition at line 178 of file continuous.py.
continuous.u_init |
Definition at line 23 of file continuous.py.
Definition at line 195 of file continuous.py.
float continuous.UPDATE_RATE = 0.01 |
Definition at line 32 of file continuous.py.
continuous.withSinCos |
Definition at line 39 of file continuous.py.
continuous.x = env.reset().T |
— Training
Definition at line 157 of file continuous.py.
continuous.x2 = x2.T |
Definition at line 163 of file continuous.py.
continuous.x2_batch = np.vstack([ b.x2 for b in batch ]) |
Definition at line 181 of file continuous.py.
continuous.x_batch = np.vstack([ b.x for b in batch ]) |
Definition at line 177 of file continuous.py.