LBFGS.h
Go to the documentation of this file.
1 // Copyright (C) 2016 Yixuan Qiu <yixuan.qiu@cos.name>
2 // Under MIT license
3 
4 #ifndef LBFGS_H
5 #define LBFGS_H
6 
7 #include <Eigen/Core>
8 #include "Param.h"
9 #include "LineSearch.h"
10 
11 namespace LBFGSpp
12 {
16 template <typename Scalar>
18 {
19 private:
23 
24  const LBFGSParam<Scalar>& m_param; // Parameters to control the LBFGS algorithm
25  Matrix m_s; // History of the s vectors
26  Matrix m_y; // History of the y vectors
27  Vector m_ys; // History of the s'y values
28  Vector m_alpha; // History of the step lengths
29  Vector m_fx; // History of the objective function values
30  Vector m_xp; // Old x
31  Vector m_grad; // New gradient
32  Vector m_gradp; // Old gradient
33  Vector m_drt; // Moving direction
34 
35  inline void reset(int n)
36  {
37  const int m = m_param.m;
38  m_s.resize(n, m);
39  m_y.resize(n, m);
40  m_ys.resize(m);
41  m_alpha.resize(m);
42  m_xp.resize(n);
43  m_grad.resize(n);
44  m_gradp.resize(n);
45  m_drt.resize(n);
46  if(m_param.past > 0)
47  m_fx.resize(m_param.past);
48  }
49 
50 public:
58  m_param(param)
59  {
60  m_param.check_param();
61  }
62 
76  template <typename Foo>
77  inline int minimize(Foo& f, Vector& x, Scalar& fx)
78  {
79  const int n = x.size();
80  const int fpast = m_param.past;
81  reset(n);
82 
83  // Evaluate function and compute gradient
84  fx = f(x, m_grad);
85  Scalar xnorm = x.norm();
86  Scalar gnorm = m_grad.norm();
87  if(fpast > 0)
88  m_fx[0] = fx;
89 
90  // Early exit if the initial x is already a minimizer
91  if(gnorm <= m_param.epsilon * std::max(xnorm, Scalar(1.0)))
92  {
93  return 1;
94  }
95 
96  // Initial direction
97  m_drt.noalias() = -m_grad;
98  // Initial step
99  Scalar step = Scalar(1.0) / m_drt.norm();
100 
101  int k = 1;
102  int end = 0;
103  for( ; ; )
104  {
105  // Save the curent x and gradient
106  m_xp.noalias() = x;
107  m_gradp.noalias() = m_grad;
108 
109  // Line search to update x, fx and gradient
110  LineSearch<Scalar>::Backtracking(f, fx, x, m_grad, step, m_drt, m_xp, m_param);
111 
112  // New x norm and gradient norm
113  xnorm = x.norm();
114  gnorm = m_grad.norm();
115 
116  // Convergence test -- gradient
117  if(gnorm <= m_param.epsilon * std::max(xnorm, Scalar(1.0)))
118  {
119  return k;
120  }
121  // Convergence test -- objective function value
122  if(fpast > 0)
123  {
124  if(k >= fpast && std::abs((m_fx[k % fpast] - fx) / fx) < m_param.delta)
125  return k;
126 
127  m_fx[k % fpast] = fx;
128  }
129  // Maximum number of iterations
130  if(m_param.max_iterations != 0 && k >= m_param.max_iterations)
131  {
132  return k;
133  }
134 
135  // Update s and y
136  // s_{k+1} = x_{k+1} - x_k
137  // y_{k+1} = g_{k+1} - g_k
138  MapVec svec(&m_s(0, end), n);
139  MapVec yvec(&m_y(0, end), n);
140  svec.noalias() = x - m_xp;
141  yvec.noalias() = m_grad - m_gradp;
142 
143  // ys = y's = 1/rho
144  // yy = y'y
145  Scalar ys = yvec.dot(svec);
146  Scalar yy = yvec.squaredNorm();
147  m_ys[end] = ys;
148 
149  // Recursive formula to compute d = -H * g
150  m_drt.noalias() = -m_grad;
151  int bound = std::min(m_param.m, k);
152  end = (end + 1) % m_param.m;
153  int j = end;
154  for(int i = 0; i < bound; i++)
155  {
156  j = (j + m_param.m - 1) % m_param.m;
157  MapVec sj(&m_s(0, j), n);
158  MapVec yj(&m_y(0, j), n);
159  m_alpha[j] = sj.dot(m_drt) / m_ys[j];
160  m_drt.noalias() -= m_alpha[j] * yj;
161  }
162 
163  m_drt *= (ys / yy);
164 
165  for(int i = 0; i < bound; i++)
166  {
167  MapVec sj(&m_s(0, j), n);
168  MapVec yj(&m_y(0, j), n);
169  Scalar beta = yj.dot(m_drt) / m_ys[j];
170  m_drt.noalias() += (m_alpha[j] - beta) * sj;
171  j = (j + 1) % m_param.m;
172  }
173 
174  // step = 1.0 as initial guess
175  step = Scalar(1.0);
176  k++;
177  }
178 
179  return k;
180  }
181 };
182 
183 
184 } // namespace LBFGSpp
185 
186 #endif // LBFGS_H
void check_param() const
Definition: Param.h:182
#define min(a, b)
Definition: Chen_Han.cpp:11
Eigen::Map< Vector > MapVec
Definition: LBFGS.h:22
Vector m_alpha
Definition: LBFGS.h:28
Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > Vector
Definition: LBFGS.h:20
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:88
Definition: LBFGS.h:11
int minimize(Foo &f, Vector &x, Scalar &fx)
Definition: LBFGS.h:77
const LBFGSParam< Scalar > & m_param
Definition: LBFGS.h:24
Scalar epsilon
Definition: Param.h:91
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > Matrix
Definition: LBFGS.h:21
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const AbsReturnType abs() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
LBFGSSolver(const LBFGSParam< Scalar > &param)
Definition: LBFGS.h:57
void reset(int n)
Definition: LBFGS.h:35
static void Backtracking(Foo &f, Scalar &fx, Vector &x, Vector &grad, Scalar &step, const Vector &drt, const Vector &xp, const LBFGSParam< Scalar > &param)
Definition: LineSearch.h:41
int64_t max(int64_t a, const int b)
Definition: Xin_Wang.cpp:10
Vector m_gradp
Definition: LBFGS.h:32


co_scan
Author(s):
autogenerated on Mon Feb 28 2022 23:00:43