Go to the source code of this file.
Classes | |
| class | continuous.PolicyNetwork |
| class | continuous.QValueNetwork |
| — Q-value and policy networks More... | |
| class | continuous.ReplayItem |
Namespaces | |
| continuous | |
Functions | |
| def | continuous.rendertrial (maxiter=NSTEPS, verbose=True) |
Variables | |
| continuous.batch = random.sample(replayDeque,BATCH_SIZE) | |
| int | continuous.BATCH_SIZE = 64 |
| continuous.d_batch = np.vstack([ b.done for b in batch ]) | |
| float | continuous.DECAY_RATE = 0.99 |
| bool | continuous.done = False |
| continuous.env = Pendulum(1) | |
| — Environment More... | |
| continuous.feed_dict | |
| list | continuous.h_qva = [] |
| list | continuous.h_rwd = [] |
| History of search. More... | |
| list | continuous.h_ste = [] |
| continuous.maxq | |
| continuous.n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) | |
| int | continuous.NEPISODES = 100 |
| — Hyper paramaters More... | |
| int | continuous.NH1 = 250 |
| int | continuous.NSTEPS = 100 |
| continuous.NU = env.nu | |
| continuous.NX = env.nobs | |
| continuous.optim | |
| continuous.policy = PolicyNetwork(). setupOptim() | |
| — Tensor flow initialization More... | |
| float | continuous.POLICY_LEARNING_RATE = 0.0001 |
| continuous.policyTarget = PolicyNetwork(). setupTargetAssign(policy) | |
| continuous.q2_batch | |
| continuous.qgrad | |
| continuous.qref_batch = r_batch + (d_batch==False)*(DECAY_RATE*q2_batch) | |
| continuous.qvalue = QValueNetwork(). setupOptim() | |
| float | continuous.QVALUE_LEARNING_RATE = 0.001 |
| continuous.qvalueTarget = QValueNetwork(). setupTargetAssign(qvalue) | |
| continuous.r | |
| continuous.r_batch = np.vstack([ b.reward for b in batch ]) | |
| continuous.RANDOM_SEED = int((time.time()%10)*1000) | |
| — Random seed More... | |
| int | continuous.REPLAY_SIZE = 10000 |
| continuous.replayDeque = deque() | |
| float | continuous.rsum = 0.0 |
| continuous.sess = tf.InteractiveSession() | |
| continuous.u = sess.run(policy.policy, feed_dict={ policy.x: x }) | |
| continuous.u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch}) | |
| continuous.u_batch = np.vstack([ b.u for b in batch ]) | |
| continuous.u_init | |
| continuous.u_targ = sess.run(policy.policy, feed_dict={ policy.x : x_batch} ) | |
| float | continuous.UPDATE_RATE = 0.01 |
| continuous.withSinCos | |
| continuous.x = env.reset().T | |
| — Training More... | |
| continuous.x2 = x2.T | |
| continuous.x2_batch = np.vstack([ b.x2 for b in batch ]) | |
| continuous.x_batch = np.vstack([ b.x for b in batch ]) | |