ocp.py
Go to the documentation of this file.
1 """
2 Example of optimal control resolution by direct optimization of a single trajectory.
3 """
4 
5 import signal
6 import time
7 
8 import matplotlib.pyplot as plt
9 import numpy as np
10 from pendulum import Pendulum
11 from scipy.optimize import fmin_l_bfgs_b
12 
13 env = Pendulum(1)
14 NSTEPS = 50
15 x0 = env.reset().copy()
16 
17 
18 def cost(U):
19  """Cost for a trajectory starting at state X0 with control U"""
20  env.reset(x0)
21  csum = 0.0
22  for t in range(NSTEPS):
23  u = U[env.nu * t : env.nu * (t + 1)] # Control at time step <t>
24  _, r = env.step(u) # Pendulum step, with reward r
25  csum += r # Cumulative sum
26  return -csum # Returns cost ie negative reward
27 
28 
29 def display(U, verbose=False):
30  """Display the trajectory on Gepetto viewer."""
31  x = x0.copy()
32  if verbose:
33  print("U = ", " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())))
34  for i in range(len(U) / env.nu):
35  env.dynamics(x, U[env.nu * i : env.nu * (i + 1)], True)
36  env.display(x)
37  time.sleep(5e-2)
38  if verbose:
39  print("X%d" % i, x.T)
40 
41 
42 class CallBack:
43  """Call back function used to follow optimizer steps."""
44 
45  def __init__(self):
46  self.iter = 0
47  self.withdisplay = False
48  self.h_rwd = []
49 
50  def __call__(self, U):
51  print(
52  "Iteration ",
53  self.iter,
54  " ".join(map(lambda u: f"{u:.1f}", np.asarray(U).flatten())),
55  )
56  self.iter += 1
57  self.U = U.copy()
58  self.h_rwd.append(cost(U))
59  if self.withdisplay:
60  display(U) # Display if CTRL-Z has been pressed.
61 
62  def setWithDisplay(self, boolean=None):
63  self.withdisplay = not self.withdisplay if boolean is None else boolean
64 
65 
66 callback = CallBack()
67 signal.signal(signal.SIGTSTP, lambda x, y: callback.setWithDisplay())
68 
69 
70 U0 = np.zeros(NSTEPS * env.nu) - env.umax # Initial guess for the control trajectory.
71 bounds = (
72  [
73  [-env.umax, env.umax],
74  ]
75  * env.nu
76  * NSTEPS
77 ) # Set control bounds to environment umax.
78 
79 # Start BFGS optimization routine
80 U, c, info = fmin_l_bfgs_b(
81  cost, x0=U0, callback=callback, approx_grad=True, bounds=bounds
82 )
83 
84 # When done, display the trajectory in Gepetto-viewer
85 display(U, True)
86 
87 plt.plot(callback.h_rwd)
88 plt.show()
ocp.CallBack
Definition: ocp.py:42
ocp.CallBack.U
U
Definition: ocp.py:57
ocp.CallBack.__init__
def __init__(self)
Definition: ocp.py:45
ocp.CallBack.__call__
def __call__(self, U)
Definition: ocp.py:50
ocp.display
def display(U, verbose=False)
Definition: ocp.py:29
ocp.CallBack.setWithDisplay
def setWithDisplay(self, boolean=None)
Definition: ocp.py:62
pendulum.Pendulum
Definition: pendulum.py:43
ocp.cost
cost
Definition: ocp.py:81
ocp.CallBack.withdisplay
withdisplay
Definition: ocp.py:47
display
Definition: display.py:1
ocp.CallBack.h_rwd
h_rwd
Definition: ocp.py:48
pinocchio::copy
void copy(const ModelTpl< Scalar, Options, JointCollectionTpl > &model, const DataTpl< Scalar, Options, JointCollectionTpl > &origin, DataTpl< Scalar, Options, JointCollectionTpl > &dest, KinematicLevel kinematic_level)
Copy part of the data from origin to dest. Template parameter can be used to select at which differen...
Definition: copy.hpp:42
boost::fusion::append
result_of::push_front< V const, T >::type append(T const &t, V const &v)
Append the element T at the front of boost fusion vector V.
Definition: fusion.hpp:32
ocp.CallBack.iter
iter
Definition: ocp.py:46


pinocchio
Author(s):
autogenerated on Sun Dec 22 2024 03:41:12