1// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2012 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9//   this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11//   this list of conditions and the following disclaimer in the documentation
12//   and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14//   used to endorse or promote products derived from this software without
15//   specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30//
31// Interface for and implementation of various Line search algorithms.
32
33#ifndef CERES_INTERNAL_LINE_SEARCH_H_
34#define CERES_INTERNAL_LINE_SEARCH_H_
35
36#ifndef CERES_NO_LINE_SEARCH_MINIMIZER
37
38#include <string>
39#include <vector>
40#include "ceres/internal/eigen.h"
41#include "ceres/internal/port.h"
42#include "ceres/types.h"
43
44namespace ceres {
45namespace internal {
46
47class Evaluator;
48struct FunctionSample;
49
50// Line search is another name for a one dimensional optimization
51// algorithm. The name "line search" comes from the fact one
52// dimensional optimization problems that arise as subproblems of
53// general multidimensional optimization problems.
54//
55// While finding the exact minimum of a one dimensionl function is
56// hard, instances of LineSearch find a point that satisfies a
57// sufficient decrease condition. Depending on the particular
58// condition used, we get a variety of different line search
59// algorithms, e.g., Armijo, Wolfe etc.
60class LineSearch {
61 public:
62  class Function;
63
64  struct Options {
65    Options()
66        : interpolation_type(CUBIC),
67          sufficient_decrease(1e-4),
68          max_step_contraction(1e-3),
69          min_step_contraction(0.9),
70          min_step_size(1e-9),
71          max_num_iterations(20),
72          sufficient_curvature_decrease(0.9),
73          max_step_expansion(10.0),
74          function(NULL) {}
75
76    // Degree of the polynomial used to approximate the objective
77    // function.
78    LineSearchInterpolationType interpolation_type;
79
80    // Armijo and Wolfe line search parameters.
81
82    // Solving the line search problem exactly is computationally
83    // prohibitive. Fortunately, line search based optimization
84    // algorithms can still guarantee convergence if instead of an
85    // exact solution, the line search algorithm returns a solution
86    // which decreases the value of the objective function
87    // sufficiently. More precisely, we are looking for a step_size
88    // s.t.
89    //
90    //  f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
91    double sufficient_decrease;
92
93    // In each iteration of the Armijo / Wolfe line search,
94    //
95    // new_step_size >= max_step_contraction * step_size
96    //
97    // Note that by definition, for contraction:
98    //
99    //  0 < max_step_contraction < min_step_contraction < 1
100    //
101    double max_step_contraction;
102
103    // In each iteration of the Armijo / Wolfe line search,
104    //
105    // new_step_size <= min_step_contraction * step_size
106    // Note that by definition, for contraction:
107    //
108    //  0 < max_step_contraction < min_step_contraction < 1
109    //
110    double min_step_contraction;
111
112    // If during the line search, the step_size falls below this
113    // value, it is truncated to zero.
114    double min_step_size;
115
116    // Maximum number of trial step size iterations during each line search,
117    // if a step size satisfying the search conditions cannot be found within
118    // this number of trials, the line search will terminate.
119    int max_num_iterations;
120
121    // Wolfe-specific line search parameters.
122
123    // The strong Wolfe conditions consist of the Armijo sufficient
124    // decrease condition, and an additional requirement that the
125    // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
126    // conditions) of the gradient along the search direction
127    // decreases sufficiently. Precisely, this second condition
128    // is that we seek a step_size s.t.
129    //
130    //   |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
131    //
132    // Where f() is the line search objective and f'() is the derivative
133    // of f w.r.t step_size (d f / d step_size).
134    double sufficient_curvature_decrease;
135
136    // During the bracketing phase of the Wolfe search, the step size is
137    // increased until either a point satisfying the Wolfe conditions is
138    // found, or an upper bound for a bracket containing a point satisfying
139    // the conditions is found.  Precisely, at each iteration of the
140    // expansion:
141    //
142    //   new_step_size <= max_step_expansion * step_size.
143    //
144    // By definition for expansion, max_step_expansion > 1.0.
145    double max_step_expansion;
146
147    // The one dimensional function that the line search algorithm
148    // minimizes.
149    Function* function;
150  };
151
152  // An object used by the line search to access the function values
153  // and gradient of the one dimensional function being optimized.
154  //
155  // In practice, this object will provide access to the objective
156  // function value and the directional derivative of the underlying
157  // optimization problem along a specific search direction.
158  //
159  // See LineSearchFunction for an example implementation.
160  class Function {
161   public:
162    virtual ~Function() {}
163    // Evaluate the line search objective
164    //
165    //   f(x) = p(position + x * direction)
166    //
167    // Where, p is the objective function of the general optimization
168    // problem.
169    //
170    // g is the gradient f'(x) at x.
171    //
172    // f must not be null. The gradient is computed only if g is not null.
173    virtual bool Evaluate(double x, double* f, double* g) = 0;
174  };
175
176  // Result of the line search.
177  struct Summary {
178    Summary()
179        : success(false),
180          optimal_step_size(0.0),
181          num_function_evaluations(0),
182          num_gradient_evaluations(0),
183          num_iterations(0) {}
184
185    bool success;
186    double optimal_step_size;
187    int num_function_evaluations;
188    int num_gradient_evaluations;
189    int num_iterations;
190    string error;
191  };
192
193  explicit LineSearch(const LineSearch::Options& options);
194  virtual ~LineSearch() {}
195
196  static LineSearch* Create(const LineSearchType line_search_type,
197                            const LineSearch::Options& options,
198                            string* error);
199
200  // Perform the line search.
201  //
202  // step_size_estimate must be a positive number.
203  //
204  // initial_cost and initial_gradient are the values and gradient of
205  // the function at zero.
206  // summary must not be null and will contain the result of the line
207  // search.
208  //
209  // Summary::success is true if a non-zero step size is found.
210  virtual void Search(double step_size_estimate,
211                      double initial_cost,
212                      double initial_gradient,
213                      Summary* summary) = 0;
214  double InterpolatingPolynomialMinimizingStepSize(
215      const LineSearchInterpolationType& interpolation_type,
216      const FunctionSample& lowerbound_sample,
217      const FunctionSample& previous_sample,
218      const FunctionSample& current_sample,
219      const double min_step_size,
220      const double max_step_size) const;
221
222 protected:
223  const LineSearch::Options& options() const { return options_; }
224
225 private:
226  LineSearch::Options options_;
227};
228
229class LineSearchFunction : public LineSearch::Function {
230 public:
231  explicit LineSearchFunction(Evaluator* evaluator);
232  virtual ~LineSearchFunction() {}
233  void Init(const Vector& position, const Vector& direction);
234  virtual bool Evaluate(double x, double* f, double* g);
235  double DirectionInfinityNorm() const;
236
237 private:
238  Evaluator* evaluator_;
239  Vector position_;
240  Vector direction_;
241
242  // evaluation_point = Evaluator::Plus(position_,  x * direction_);
243  Vector evaluation_point_;
244
245  // scaled_direction = x * direction_;
246  Vector scaled_direction_;
247  Vector gradient_;
248};
249
250// Backtracking and interpolation based Armijo line search. This
251// implementation is based on the Armijo line search that ships in the
252// minFunc package by Mark Schmidt.
253//
254// For more details: http://www.di.ens.fr/~mschmidt/Software/minFunc.html
255class ArmijoLineSearch : public LineSearch {
256 public:
257  explicit ArmijoLineSearch(const LineSearch::Options& options);
258  virtual ~ArmijoLineSearch() {}
259  virtual void Search(double step_size_estimate,
260                      double initial_cost,
261                      double initial_gradient,
262                      Summary* summary);
263};
264
265// Bracketing / Zoom Strong Wolfe condition line search.  This implementation
266// is based on the pseudo-code algorithm presented in Nocedal & Wright [1]
267// (p60-61) with inspiration from the WolfeLineSearch which ships with the
268// minFunc package by Mark Schmidt [2].
269//
270// [1] Nocedal J., Wright S., Numerical Optimization, 2nd Ed., Springer, 1999.
271// [2] http://www.di.ens.fr/~mschmidt/Software/minFunc.html.
272class WolfeLineSearch : public LineSearch {
273 public:
274  explicit WolfeLineSearch(const LineSearch::Options& options);
275  virtual ~WolfeLineSearch() {}
276  virtual void Search(double step_size_estimate,
277                      double initial_cost,
278                      double initial_gradient,
279                      Summary* summary);
280  // Returns true iff either a valid point, or valid bracket are found.
281  bool BracketingPhase(const FunctionSample& initial_position,
282                       const double step_size_estimate,
283                       FunctionSample* bracket_low,
284                       FunctionSample* bracket_high,
285                       bool* perform_zoom_search,
286                       Summary* summary);
287  // Returns true iff final_line_sample satisfies strong Wolfe conditions.
288  bool ZoomPhase(const FunctionSample& initial_position,
289                 FunctionSample bracket_low,
290                 FunctionSample bracket_high,
291                 FunctionSample* solution,
292                 Summary* summary);
293};
294
295}  // namespace internal
296}  // namespace ceres
297
298#endif  // CERES_NO_LINE_SEARCH_MINIMIZER
299#endif  // CERES_INTERNAL_LINE_SEARCH_H_
300