Classes | Namespaces | Functions | Variables
continuous.py File Reference

Go to the source code of this file.

Classes

class  continuous.PolicyNetwork
 
class  continuous.QValueNetwork
 — Q-value and policy networks More...
 
class  continuous.ReplayItem
 

Namespaces

 continuous
 

Functions

def continuous.rendertrial (maxiter=NSTEPS, verbose=True)
 

Variables

 continuous.batch = random.sample(replayDeque,BATCH_SIZE)
 
int continuous.BATCH_SIZE = 64
 
 continuous.d_batch = np.vstack([ b.done for b in batch ])
 
float continuous.DECAY_RATE = 0.99
 
bool continuous.done = False
 
 continuous.env = Pendulum(1)
 — Environment More...
 
 continuous.feed_dict
 
list continuous.h_qva = []
 
list continuous.h_rwd = []
 History of search. More...
 
list continuous.h_ste = []
 
 continuous.maxq
 
 continuous.n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED)
 
int continuous.NEPISODES = 100
 — Hyper paramaters More...
 
int continuous.NH1 = 250
 
int continuous.NSTEPS = 100
 
 continuous.NU = env.nu
 
 continuous.NX = env.nobs
 
 continuous.optim
 
 continuous.policy = PolicyNetwork().setupOptim()
 — Tensor flow initialization More...
 
float continuous.POLICY_LEARNING_RATE = 0.0001
 
 continuous.policyTarget = PolicyNetwork().setupTargetAssign(policy)
 
 continuous.q2_batch
 
 continuous.qgrad
 
 continuous.qref_batch = r_batch+(d_batch==False)*(DECAY_RATE*q2_batch)
 
 continuous.qvalue = QValueNetwork().setupOptim()
 
float continuous.QVALUE_LEARNING_RATE = 0.001
 
 continuous.qvalueTarget = QValueNetwork().setupTargetAssign(qvalue)
 
 continuous.r
 
 continuous.r_batch = np.vstack([ b.reward for b in batch ])
 
 continuous.RANDOM_SEED = int((time.time()%10)*1000)
 — Random seed More...
 
int continuous.REPLAY_SIZE = 10000
 
 continuous.replayDeque = deque()
 
float continuous.rsum = 0.0
 
 continuous.sess = tf.InteractiveSession()
 
 continuous.u = sess.run(policy.policy, feed_dict={ policy.x: x })
 
 continuous.u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch})
 
 continuous.u_batch = np.vstack([ b.u for b in batch ])
 
 continuous.u_init
 
 continuous.u_targ = sess.run(policy.policy, feed_dict={ policy.x : x_batch} )
 
float continuous.UPDATE_RATE = 0.01
 
 continuous.withSinCos
 
 continuous.x = env.reset().T
 — Training More...
 
 continuous.x2 = x2.T
 
 continuous.x2_batch = np.vstack([ b.x2 for b in batch ])
 
 continuous.x_batch = np.vstack([ b.x for b in batch ])
 


pinocchio
Author(s):
autogenerated on Tue Jun 1 2021 02:45:05