Classes | Functions | Variables
continuous Namespace Reference

Classes

class  PolicyNetwork
 
class  QValueNetwork
 — Q-value and policy networks More...
 
class  ReplayItem
 

Functions

def rendertrial (maxiter=NSTEPS, verbose=True)
 

Variables

 batch = random.sample(replayDeque,BATCH_SIZE)
 
int BATCH_SIZE = 64
 
 d_batch = np.vstack([ b.done for b in batch ])
 
float DECAY_RATE = 0.99
 
bool done = False
 
 env = Pendulum(1)
 — Environment More...
 
 feed_dict
 
list h_qva = []
 
list h_rwd = []
 History of search. More...
 
list h_ste = []
 
 maxq
 
 n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED)
 
int NEPISODES = 100
 — Hyper paramaters More...
 
int NH1 = 250
 
int NSTEPS = 100
 
 NU = env.nu
 
 NX = env.nobs
 
 optim
 
 policy = PolicyNetwork().setupOptim()
 — Tensor flow initialization More...
 
float POLICY_LEARNING_RATE = 0.0001
 
 policyTarget = PolicyNetwork().setupTargetAssign(policy)
 
 q2_batch
 
 qgrad
 
 qref_batch = r_batch+(d_batch==False)*(DECAY_RATE*q2_batch)
 
 qvalue = QValueNetwork().setupOptim()
 
float QVALUE_LEARNING_RATE = 0.001
 
 qvalueTarget = QValueNetwork().setupTargetAssign(qvalue)
 
 r
 
 r_batch = np.vstack([ b.reward for b in batch ])
 
 RANDOM_SEED = int((time.time()%10)*1000)
 — Random seed More...
 
int REPLAY_SIZE = 10000
 
 replayDeque = deque()
 
float rsum = 0.0
 
 sess = tf.InteractiveSession()
 
 u = sess.run(policy.policy, feed_dict={ policy.x: x })
 
 u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch})
 
 u_batch = np.vstack([ b.u for b in batch ])
 
 u_init
 
 u_targ = sess.run(policy.policy, feed_dict={ policy.x : x_batch} )
 
float UPDATE_RATE = 0.01
 
 withSinCos
 
 x = env.reset().T
 — Training More...
 
 x2 = x2.T
 
 x2_batch = np.vstack([ b.x2 for b in batch ])
 
 x_batch = np.vstack([ b.x for b in batch ])
 

Detailed Description

Deep actor-critic network, 
From "Continuous control with deep reinforcement learning", by Lillicrap et al, arXiv:1509.02971

Function Documentation

def continuous.rendertrial (   maxiter = NSTEPS,
  verbose = True 
)

Definition at line 138 of file continuous.py.

Variable Documentation

continuous.batch = random.sample(replayDeque,BATCH_SIZE)

Definition at line 176 of file continuous.py.

int continuous.BATCH_SIZE = 64

Definition at line 34 of file continuous.py.

continuous.d_batch = np.vstack([ b.done for b in batch ])

Definition at line 180 of file continuous.py.

float continuous.DECAY_RATE = 0.99

Definition at line 31 of file continuous.py.

bool continuous.done = False

Definition at line 165 of file continuous.py.

continuous.env = Pendulum(1)

— Environment

Definition at line 38 of file continuous.py.

continuous.feed_dict

Definition at line 190 of file continuous.py.

list continuous.h_qva = []

Definition at line 152 of file continuous.py.

list continuous.h_rwd = []

History of search.

Definition at line 151 of file continuous.py.

list continuous.h_ste = []

Definition at line 153 of file continuous.py.

continuous.maxq
Initial value:
1 = np.max( sess.run(qvalue.qvalue,feed_dict={ qvalue.x : x_batch,
2  qvalue.u : u_batch }) )\

Definition at line 209 of file continuous.py.

continuous.n_init = tflearn.initializations.truncated_normal(seed=RANDOM_SEED)

Definition at line 22 of file continuous.py.

int continuous.NEPISODES = 100

— Hyper paramaters

Definition at line 27 of file continuous.py.

int continuous.NH1 = 250

Definition at line 35 of file continuous.py.

int continuous.NSTEPS = 100

Definition at line 28 of file continuous.py.

continuous.NU = env.nu

Definition at line 41 of file continuous.py.

continuous.NX = env.nobs

Definition at line 40 of file continuous.py.

continuous.optim

Definition at line 190 of file continuous.py.

continuous.policy = PolicyNetwork().setupOptim()

— Tensor flow initialization

Definition at line 125 of file continuous.py.

float continuous.POLICY_LEARNING_RATE = 0.0001

Definition at line 30 of file continuous.py.

continuous.policyTarget = PolicyNetwork().setupTargetAssign(policy)

Definition at line 126 of file continuous.py.

continuous.q2_batch
Initial value:
1 = sess.run(qvalueTarget.qvalue, feed_dict={ qvalueTarget.x : x2_batch,
2  qvalueTarget.u : u2_batch })

Definition at line 185 of file continuous.py.

continuous.qgrad
Initial value:
1 = sess.run(qvalue.gradient, feed_dict={ qvalue.x : x_batch,
2  qvalue.u : u_targ })

Definition at line 196 of file continuous.py.

continuous.qref_batch = r_batch+(d_batch==False)*(DECAY_RATE*q2_batch)

Definition at line 187 of file continuous.py.

continuous.qvalue = QValueNetwork().setupOptim()

Definition at line 128 of file continuous.py.

float continuous.QVALUE_LEARNING_RATE = 0.001

Definition at line 29 of file continuous.py.

continuous.qvalueTarget = QValueNetwork().setupTargetAssign(qvalue)

Definition at line 129 of file continuous.py.

continuous.r

Definition at line 163 of file continuous.py.

continuous.r_batch = np.vstack([ b.reward for b in batch ])

Definition at line 179 of file continuous.py.

continuous.RANDOM_SEED = int((time.time()%10)*1000)

— Random seed

Definition at line 17 of file continuous.py.

int continuous.REPLAY_SIZE = 10000

Definition at line 33 of file continuous.py.

continuous.replayDeque = deque()

Definition at line 121 of file continuous.py.

float continuous.rsum = 0.0

Definition at line 158 of file continuous.py.

continuous.sess = tf.InteractiveSession()

Definition at line 131 of file continuous.py.

continuous.u = sess.run(policy.policy, feed_dict={ policy.x: x })

Definition at line 161 of file continuous.py.

continuous.u2_batch = sess.run(policyTarget.policy, feed_dict={ policyTarget .x : x2_batch})

Definition at line 184 of file continuous.py.

continuous.u_batch = np.vstack([ b.u for b in batch ])

Definition at line 178 of file continuous.py.

continuous.u_init
Initial value:
1 = tflearn.initializations.uniform(minval=-0.003, maxval=0.003,\
2  seed=RANDOM_SEED)

Definition at line 23 of file continuous.py.

continuous.u_targ = sess.run(policy.policy, feed_dict={ policy.x : x_batch} )

Definition at line 195 of file continuous.py.

float continuous.UPDATE_RATE = 0.01

Definition at line 32 of file continuous.py.

continuous.withSinCos

Definition at line 39 of file continuous.py.

continuous.x = env.reset().T

— Training

Definition at line 157 of file continuous.py.

continuous.x2 = x2.T

Definition at line 163 of file continuous.py.

continuous.x2_batch = np.vstack([ b.x2 for b in batch ])

Definition at line 181 of file continuous.py.

continuous.x_batch = np.vstack([ b.x for b in batch ])

Definition at line 177 of file continuous.py.



pinocchio
Author(s):
autogenerated on Tue Jun 1 2021 02:45:05