|
| continuous.batch |
|
int | continuous.BATCH_SIZE = 64 |
|
| continuous.d_batch = np.vstack([b.done for b in batch]) |
|
float | continuous.DECAY_RATE = 0.99 |
|
bool | continuous.done = False |
|
| continuous.env = Pendulum(1) |
|
| continuous.feed_dict |
|
list | continuous.h_qva = [] |
|
list | continuous.h_rwd = [] |
|
list | continuous.h_ste = [] |
|
tuple | continuous.maxq |
|
| continuous.n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED) |
|
int | continuous.NEPISODES = 100 |
|
int | continuous.NH1 = 250 |
|
int | continuous.NSTEPS = 100 |
|
| continuous.NU = env.nu |
|
| continuous.NX = env.nobs |
|
| continuous.optim |
|
| continuous.policy = PolicyNetwork().setupOptim() |
|
float | continuous.POLICY_LEARNING_RATE = 0.0001 |
|
| continuous.policyTarget = PolicyNetwork().setupTargetAssign(policy) |
|
| continuous.q2_batch |
|
| continuous.qgrad |
|
| continuous.qref_batch = r_batch + (not d_batch) * (DECAY_RATE * q2_batch) |
|
| continuous.qvalue = QValueNetwork().setupOptim() |
|
float | continuous.QVALUE_LEARNING_RATE = 0.001 |
|
| continuous.qvalueTarget = QValueNetwork().setupTargetAssign(qvalue) |
|
| continuous.r |
|
| continuous.r_batch = np.vstack([b.reward for b in batch]) |
|
| continuous.RANDOM_SEED = int((time.time() % 10) * 1000) |
|
int | continuous.REPLAY_SIZE = 10000 |
|
| continuous.replayDeque = deque() |
|
float | continuous.rsum = 0.0 |
|
| continuous.sess = tf.InteractiveSession() |
|
| continuous.u = sess.run(policy.policy, feed_dict={policy.x: x}) |
|
| continuous.u2_batch |
|
| continuous.u_batch = np.vstack([b.u for b in batch]) |
|
| continuous.u_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003, seed=RANDOM_SEED) |
|
| continuous.u_targ = sess.run(policy.policy, feed_dict={policy.x: x_batch}) |
|
float | continuous.UPDATE_RATE = 0.01 |
|
| continuous.withSinCos |
|
| continuous.x = env.reset().T |
|
| continuous.x2 = x2.T |
|
| continuous.x2_batch = np.vstack([b.x2 for b in batch]) |
|
| continuous.x_batch = np.vstack([b.x for b in batch]) |
|