qtable.py
Go to the documentation of this file.
1 """
2 Example of Q-table learning with a simple discretized 1-pendulum environment.
3 """
4 
5 import signal
6 import time
7 
8 import matplotlib.pyplot as plt
9 import numpy as np
10 from dpendulum import DPendulum
11 
12 
13 RANDOM_SEED = int((time.time() % 10) * 1000)
14 print("Seed = %d" % RANDOM_SEED)
15 np.random.seed(RANDOM_SEED)
16 
17 
18 NEPISODES = 500 # Number of training episodes
19 NSTEPS = 50 # Max episode length
20 LEARNING_RATE = 0.85 #
21 DECAY_RATE = 0.99 # Discount factor
22 
23 
24 env = DPendulum()
25 NX = env.nx # Number of (discrete) states
26 NU = env.nu # Number of (discrete) controls
27 
28 Q = np.zeros([env.nx, env.nu]) # Q-table initialized to 0
29 
30 
31 def rendertrial(maxiter=100):
32  """Roll-out from random state using greedy policy."""
33  s = env.reset()
34  for i in range(maxiter):
35  a = np.argmax(Q[s, :])
36  s, r = env.step(a)
37  env.render()
38  if r == 1:
39  print("Reward!")
40  break
41 
42 
43 signal.signal(
44  signal.SIGTSTP, lambda x, y: rendertrial()
45 ) # Roll-out when CTRL-Z is pressed
46 h_rwd = [] # Learning history (for plot).
47 
48 for episode in range(1, NEPISODES):
49  x = env.reset()
50  rsum = 0.0
51  for steps in range(NSTEPS):
52  u = np.argmax(
53  Q[x, :] + np.random.randn(1, NU) / episode
54  ) # Greedy action with noise
55  x2, reward = env.step(u)
56 
57  # Compute reference Q-value at state x respecting HJB
58  Qref = reward + DECAY_RATE * np.max(Q[x2, :])
59 
60  # Update Q-Table to better fit HJB
61  Q[x, u] += LEARNING_RATE * (Qref - Q[x, u])
62  x = x2
63  rsum += reward
64  if reward == 1:
65  break
66 
67  h_rwd.append(rsum)
68  if not episode % 20:
69  print("Episode #%d done with %d sucess" % (episode, sum(h_rwd[-20:])))
70 
71 print("Total rate of success: %.3f" % (sum(h_rwd) / NEPISODES))
73 plt.plot(np.cumsum(h_rwd) / range(1, NEPISODES))
74 plt.show()
dpendulum.DPendulum
Definition: dpendulum.py:74
qtable.rendertrial
def rendertrial(maxiter=100)
Definition: qtable.py:31


pinocchio
Author(s):
autogenerated on Tue Jan 7 2025 03:41:47