LBFGS++
Loading...
Searching...
No Matches
LBFGS.h
1// Copyright (C) 2016-2023 Yixuan Qiu <yixuan.qiu@cos.name>
2// Under MIT license
3
4#ifndef LBFGSPP_LBFGS_H
5#define LBFGSPP_LBFGS_H
6
7#include <Eigen/Core>
8#include "LBFGSpp/Param.h"
9#include "LBFGSpp/BFGSMat.h"
10#include "LBFGSpp/LineSearchBacktracking.h"
11#include "LBFGSpp/LineSearchBracketing.h"
12#include "LBFGSpp/LineSearchNocedalWright.h"
13#include "LBFGSpp/LineSearchMoreThuente.h"
14
15namespace LBFGSpp {
16
20template <typename Scalar,
21 template <class> class LineSearch = LineSearchNocedalWright>
23{
24private:
25 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
26 using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
27 using MapVec = Eigen::Map<Vector>;
28
29 const LBFGSParam<Scalar>& m_param; // Parameters to control the LBFGS algorithm
30 BFGSMat<Scalar> m_bfgs; // Approximation to the Hessian matrix
31 Vector m_fx; // History of the objective function values
32 Vector m_xp; // Old x
33 Vector m_grad; // New gradient
34 Scalar m_gnorm; // Norm of the gradient
35 Vector m_gradp; // Old gradient
36 Vector m_drt; // Moving direction
37
38 // Reset internal variables
39 // n: dimension of the vector to be optimized
40 inline void reset(int n)
41 {
42 const int m = m_param.m;
43 m_bfgs.reset(n, m);
44 m_xp.resize(n);
45 m_grad.resize(n);
46 m_gradp.resize(n);
47 m_drt.resize(n);
48 if (m_param.past > 0)
49 m_fx.resize(m_param.past);
50 }
51
52public:
60 m_param(param)
61 {
62 m_param.check_param();
63 }
64
78 template <typename Foo>
79 inline int minimize(Foo& f, Vector& x, Scalar& fx)
80 {
81 using std::abs;
82
83 // Dimension of the vector
84 const int n = x.size();
85 reset(n);
86
87 // The length of lag for objective function value to test convergence
88 const int fpast = m_param.past;
89
90 // Evaluate function and compute gradient
91 fx = f(x, m_grad);
92 m_gnorm = m_grad.norm();
93 if (fpast > 0)
94 m_fx[0] = fx;
95
96 // std::cout << "x0 = " << x.transpose() << std::endl;
97 // std::cout << "f(x0) = " << fx << ", ||grad|| = " << m_gnorm << std::endl << std::endl;
98
99 // Early exit if the initial x is already a minimizer
100 if (m_gnorm <= m_param.epsilon || m_gnorm <= m_param.epsilon_rel * x.norm())
101 {
102 return 1;
103 }
104
105 // Initial direction
106 m_drt.noalias() = -m_grad;
107 // Initial step size
108 Scalar step = Scalar(1) / m_drt.norm();
109 // Tolerance for s'y >= eps * (y'y)
110 constexpr Scalar eps = std::numeric_limits<Scalar>::epsilon();
111 // s and y vectors
112 Vector vecs(n), vecy(n);
113
114 // Number of iterations used
115 int k = 1;
116 for (;;)
117 {
118 // std::cout << "Iter " << k << " begins" << std::endl << std::endl;
119
120 // Save the curent x and gradient
121 m_xp.noalias() = x;
122 m_gradp.noalias() = m_grad;
123 Scalar dg = m_grad.dot(m_drt);
124 const Scalar step_max = m_param.max_step;
125
126 // Line search to update x, fx and gradient
127 LineSearch<Scalar>::LineSearch(f, m_param, m_xp, m_drt, step_max, step, fx, m_grad, dg, x);
128
129 // New gradient norm
130 m_gnorm = m_grad.norm();
131
132 // std::cout << "Iter " << k << " finished line search" << std::endl;
133 // std::cout << " x = " << x.transpose() << std::endl;
134 // std::cout << " f(x) = " << fx << ", ||grad|| = " << m_gnorm << std::endl << std::endl;
135
136 // Convergence test -- gradient
137 if (m_gnorm <= m_param.epsilon || m_gnorm <= m_param.epsilon_rel * x.norm())
138 {
139 return k;
140 }
141 // Convergence test -- objective function value
142 if (fpast > 0)
143 {
144 const Scalar fxd = m_fx[k % fpast];
145 if (k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1)))
146 return k;
147
148 m_fx[k % fpast] = fx;
149 }
150 // Maximum number of iterations
151 if (m_param.max_iterations != 0 && k >= m_param.max_iterations)
152 {
153 return k;
154 }
155
156 // Update s and y
157 // s_{k+1} = x_{k+1} - x_k
158 // y_{k+1} = g_{k+1} - g_k
159 vecs.noalias() = x - m_xp;
160 vecy.noalias() = m_grad - m_gradp;
161 if (vecs.dot(vecy) > eps * vecy.squaredNorm())
162 m_bfgs.add_correction(vecs, vecy);
163
164 // Recursive formula to compute d = -H * g
165 m_bfgs.apply_Hv(m_grad, -Scalar(1), m_drt);
166
167 // Reset step = 1.0 as initial guess for the next line search
168 step = Scalar(1);
169 k++;
170 }
171
172 return k;
173 }
174
182 const Vector& final_grad() const { return m_grad; }
183
187 Scalar final_grad_norm() const { return m_gnorm; }
188};
189
190} // namespace LBFGSpp
191
192#endif // LBFGSPP_LBFGS_H
void check_param() const
Definition: Param.h:191
Scalar epsilon
Definition: Param.h:88
Scalar max_step
Definition: Param.h:147
Scalar epsilon_rel
Definition: Param.h:97
LBFGSSolver(const LBFGSParam< Scalar > &param)
Definition: LBFGS.h:59
const Vector & final_grad() const
Definition: LBFGS.h:182
int minimize(Foo &f, Vector &x, Scalar &fx)
Definition: LBFGS.h:79
Scalar final_grad_norm() const
Definition: LBFGS.h:187