Go to the source code of this file.
Classes | |
class | qnet.QValueNetwork |
Namespaces | |
qnet | |
Functions | |
def | qnet.disturb (u, i) |
def | qnet.onehot (ix, n=NX) |
def | qnet.rendertrial (maxiter=100) |
Variables | |
float | qnet.DECAY_RATE = 0.99 |
qnet.env = DPendulum() | |
qnet.feed_dict | |
list | qnet.h_rwd = [] |
float | qnet.LEARNING_RATE = 0.1 |
int | qnet.NEPISODES = 500 |
int | qnet.NSTEPS = 50 |
qnet.NU = env.nu | |
qnet.NX = env.nx | |
qnet.optim | |
qnet.Q2 = sess.run(qvalue.qvalue, feed_dict={qvalue.x: onehot(x2)}) | |
qnet.Qref = sess.run(qvalue.qvalue, feed_dict={qvalue.x: onehot(x)}) | |
qnet.qvalue = QValueNetwork() | |
qnet.RANDOM_SEED = int((time.time() % 10) * 1000) | |
qnet.reward | |
float | qnet.rsum = 0.0 |
qnet.sess = tf.InteractiveSession() | |
qnet.u = sess.run(qvalue.u, feed_dict={qvalue.x: onehot(x)})[0] | |
qnet.x = env.reset() | |
qnet.x2 | |