Adding Wolfe line search algorithm and full BFGS search direction options. Change-Id: I9d3fb117805bdfa5bc33613368f45ae8f10e0d79

commit: 9aa0e3cf7243a2e837bbfa22d4677010463f6a4e [log] [tgz]
author: Alex Stewart <alexs.mac@gmail.com> Fri Jul 05 20:22:37 2013 +0100
committer: Keir Mierle <mierle@gmail.com> Wed Jul 17 08:14:20 2013 +0000
tree: 7a7f5d12f260b4fbce2885a6e0107465be5f6d9a
parent: 51c772c843ccecca006c706a9f64b8cbaf5416f9 [diff]
diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc
index 06c823f..2c75d89 100644
--- a/internal/ceres/line_search.cc
+++ b/internal/ceres/line_search.cc

@@ -35,6 +35,7 @@
 #include "ceres/evaluator.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/polynomial.h"
+#include "ceres/stringprintf.h"
 #include "glog/logging.h"
 
 namespace ceres {
@@ -61,8 +62,41 @@
   return sample;
 };
 
+// Convenience stream operator for pushing FunctionSamples into log messages.
+std::ostream& operator<<(std::ostream &os,
+                         const FunctionSample& sample) {
+  os << "[x: " << sample.x << ", value: " << sample.value
+     << ", gradient: " << sample.gradient << ", value_is_valid: "
+     << std::boolalpha << sample.value_is_valid << ", gradient_is_valid: "
+     << std::boolalpha << sample.gradient_is_valid << "]";
+  return os;
+};
+
 }  // namespace
 
+LineSearch::LineSearch(const LineSearch::Options& options)
+    : options_(options) {}
+
+LineSearch* LineSearch::Create(const LineSearchType line_search_type,
+                               const LineSearch::Options& options,
+                               string* error) {
+  LineSearch* line_search = NULL;
+  switch (line_search_type) {
+  case ceres::ARMIJO:
+    line_search = new ArmijoLineSearch(options);
+    break;
+  case ceres::WOLFE:
+    line_search = new WolfeLineSearch(options);
+    break;
+  default:
+    *error = string("Invalid line search algorithm type: ") +
+        LineSearchTypeToString(line_search_type) +
+        string(", unable to create line search.");
+    return NULL;
+  }
+  return line_search;
+}
+
 LineSearchFunction::LineSearchFunction(Evaluator* evaluator)
     : evaluator_(evaluator),
       position_(evaluator->NumParameters()),
@@ -103,104 +137,608 @@
   return IsFinite(*f) && IsFinite(*g);
 }
 
-void ArmijoLineSearch::Search(const LineSearch::Options& options,
-                              const double initial_step_size,
+double LineSearchFunction::DirectionInfinityNorm() const {
+  return direction_.lpNorm<Eigen::Infinity>();
+}
+
+// Returns step_size \in [min_step_size, max_step_size] which minimizes the
+// polynomial of degree defined by interpolation_type which interpolates all
+// of the provided samples with valid values.
+double LineSearch::InterpolatingPolynomialMinimizingStepSize(
+    const LineSearchInterpolationType& interpolation_type,
+    const FunctionSample& lowerbound,
+    const FunctionSample& previous,
+    const FunctionSample& current,
+    const double min_step_size,
+    const double max_step_size) const {
+  if (!current.value_is_valid ||
+      (interpolation_type == BISECTION &&
+       max_step_size <= current.x)) {
+    // Either: sample is invalid; or we are using BISECTION and contracting
+    // the step size.
+    return min(max(current.x * 0.5, min_step_size), max_step_size);
+  } else if (interpolation_type == BISECTION) {
+    CHECK_GT(max_step_size, current.x);
+    // We are expanding the search (during a Wolfe bracketing phase) using
+    // BISECTION interpolation.  Using BISECTION when trying to expand is
+    // strictly speaking an oxymoron, but we define this to mean always taking
+    // the maximum step size so that the Armijo & Wolfe implementations are
+    // agnostic to the interpolation type.
+    return max_step_size;
+  }
+  // Only check if lower-bound is valid here, where it is required
+  // to avoid replicating current.value_is_valid == false
+  // behaviour in WolfeLineSearch.
+  CHECK(lowerbound.value_is_valid)
+      << "Ceres bug: lower-bound sample for interpolation is invalid, "
+      << "please contact the developers!, interpolation_type: "
+      << LineSearchInterpolationTypeToString(interpolation_type)
+      << ", lowerbound: " << lowerbound << ", previous: " << previous
+      << ", current: " << current;
+
+  // Select step size by interpolating the function and gradient values
+  // and minimizing the corresponding polynomial.
+  vector<FunctionSample> samples;
+  samples.push_back(lowerbound);
+
+  if (interpolation_type == QUADRATIC) {
+    // Two point interpolation using function values and the
+    // gradient at the lower bound.
+    samples.push_back(ValueSample(current.x, current.value));
+
+    if (previous.value_is_valid) {
+      // Three point interpolation, using function values and the
+      // gradient at the lower bound.
+      samples.push_back(ValueSample(previous.x, previous.value));
+    }
+  } else if (interpolation_type == CUBIC) {
+    // Two point interpolation using the function values and the gradients.
+    samples.push_back(current);
+
+    if (previous.value_is_valid) {
+      // Three point interpolation using the function values and
+      // the gradients.
+      samples.push_back(previous);
+    }
+  } else {
+    LOG(FATAL) << "Ceres bug: No handler for interpolation_type: "
+               << LineSearchInterpolationTypeToString(interpolation_type)
+               << ", please contact the developers!";
+  }
+
+  double step_size = 0.0, unused_min_value = 0.0;
+  MinimizeInterpolatingPolynomial(samples, min_step_size, max_step_size,
+                                  &step_size, &unused_min_value);
+  return step_size;
+}
+
+ArmijoLineSearch::ArmijoLineSearch(const LineSearch::Options& options)
+    : LineSearch(options) {}
+
+void ArmijoLineSearch::Search(const double step_size_estimate,
                               const double initial_cost,
                               const double initial_gradient,
                               Summary* summary) {
   *CHECK_NOTNULL(summary) = LineSearch::Summary();
-  Function* function = options.function;
+  CHECK_GE(step_size_estimate, 0.0);
+  CHECK_GT(options().sufficient_decrease, 0.0);
+  CHECK_LT(options().sufficient_decrease, 1.0);
+  CHECK_GT(options().max_num_iterations, 0);
+  Function* function = options().function;
 
-  double previous_step_size = 0.0;
-  double previous_cost = 0.0;
-  double previous_gradient = 0.0;
-  bool previous_step_size_is_valid = false;
+  // Note initial_cost & initial_gradient are evaluated at step_size = 0,
+  // not step_size_estimate, which is our starting guess.
+  const FunctionSample initial_position =
+      ValueAndGradientSample(0.0, initial_cost, initial_gradient);
 
-  double step_size = initial_step_size;
-  double cost = 0.0;
-  double gradient = 0.0;
-  bool step_size_is_valid = false;
+  FunctionSample previous = ValueAndGradientSample(0.0, 0.0, 0.0);
+  previous.value_is_valid = false;
 
-  ++summary->num_evaluations;
-  step_size_is_valid =
-      function->Evaluate(step_size,
-                         &cost,
-                         options.interpolation_type != CUBIC ? NULL : &gradient);
-  while (!step_size_is_valid || cost > (initial_cost
-                                        + options.sufficient_decrease
-                                        * initial_gradient
-                                        * step_size)) {
-    // If step_size_is_valid is not true we treat it as if the cost at
-    // that point is not large enough to satisfy the sufficient
-    // decrease condition.
+  FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0);
+  current.value_is_valid = false;
 
-    const double current_step_size = step_size;
-    // Backtracking search. Each iteration of this loop finds a new point
+  const bool interpolation_uses_gradients =
+      options().interpolation_type == CUBIC;
+  const double descent_direction_max_norm =
+      static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
 
-    if ((options.interpolation_type == BISECTION) || !step_size_is_valid) {
-      step_size *= 0.5;
-    } else {
-      // Backtrack by interpolating the function and gradient values
-      // and minimizing the corresponding polynomial.
-      vector<FunctionSample> samples;
-      samples.push_back(ValueAndGradientSample(0.0,
-                                               initial_cost,
-                                               initial_gradient));
-
-      if (options.interpolation_type == QUADRATIC) {
-        // Two point interpolation using function values and the
-        // initial gradient.
-        samples.push_back(ValueSample(step_size, cost));
-
-        if (summary->num_evaluations > 1 && previous_step_size_is_valid) {
-          // Three point interpolation, using function values and the
-          // initial gradient.
-          samples.push_back(ValueSample(previous_step_size, previous_cost));
-        }
-      } else {
-        // Two point interpolation using the function values and the gradients.
-        samples.push_back(ValueAndGradientSample(step_size,
-                                                 cost,
-                                                 gradient));
-
-        if (summary->num_evaluations > 1 && previous_step_size_is_valid) {
-          // Three point interpolation using the function values and
-          // the gradients.
-          samples.push_back(ValueAndGradientSample(previous_step_size,
-                                                   previous_cost,
-                                                   previous_gradient));
-        }
-      }
-
-      double min_value;
-      MinimizeInterpolatingPolynomial(samples, 0.0, current_step_size,
-                                      &step_size, &min_value);
-      step_size =
-          min(max(step_size,
-                  options.min_relative_step_size_change * current_step_size),
-              options.max_relative_step_size_change * current_step_size);
-    }
-
-    previous_step_size = current_step_size;
-    previous_cost = cost;
-    previous_gradient = gradient;
-
-    if (fabs(initial_gradient) * step_size < options.min_step_size) {
-      LOG(WARNING) << "Line search failed: step_size too small: " << step_size;
+  ++summary->num_function_evaluations;
+  if (interpolation_uses_gradients) { ++summary->num_gradient_evaluations; }
+  current.value_is_valid =
+      function->Evaluate(current.x,
+                         &current.value,
+                         interpolation_uses_gradients
+                         ? &current.gradient : NULL);
+  current.gradient_is_valid =
+      interpolation_uses_gradients && current.value_is_valid;
+  while (!current.value_is_valid ||
+         current.value > (initial_cost
+                          + options().sufficient_decrease
+                          * initial_gradient
+                          * current.x)) {
+    // If current.value_is_valid is false, we treat it as if the cost at that
+    // point is not large enough to satisfy the sufficient decrease condition.
+    ++summary->num_iterations;
+    if (summary->num_iterations >= options().max_num_iterations) {
+      summary->error =
+          StringPrintf("Line search failed: Armijo failed to find a point "
+                       "satisfying the sufficient decrease condition within "
+                       "specified max_num_iterations: %d.",
+                       options().max_num_iterations);
+      LOG(WARNING) << summary->error;
       return;
     }
 
-    ++summary->num_evaluations;
-    step_size_is_valid =
-        function->Evaluate(step_size,
-                           &cost,
-                           options.interpolation_type != CUBIC ? NULL : &gradient);
+    const double step_size =
+        this->InterpolatingPolynomialMinimizingStepSize(
+            options().interpolation_type,
+            initial_position,
+            previous,
+            current,
+            (options().max_step_contraction * current.x),
+            (options().min_step_contraction * current.x));
+
+    if (step_size * descent_direction_max_norm < options().min_step_size) {
+      summary->error =
+          StringPrintf("Line search failed: step_size too small: %.5e "
+                       "with descent_direction_max_norm: %.5e.", step_size,
+                       descent_direction_max_norm);
+      LOG(WARNING) << summary->error;
+      return;
+    }
+
+    previous = current;
+    current.x = step_size;
+
+    ++summary->num_function_evaluations;
+    if (interpolation_uses_gradients) { ++summary->num_gradient_evaluations; }
+    current.value_is_valid =
+      function->Evaluate(current.x,
+                         &current.value,
+                         interpolation_uses_gradients
+                         ? &current.gradient : NULL);
+    current.gradient_is_valid =
+        interpolation_uses_gradients && current.value_is_valid;
   }
 
-  summary->optimal_step_size = step_size;
+  summary->optimal_step_size = current.x;
   summary->success = true;
 }
 
+WolfeLineSearch::WolfeLineSearch(const LineSearch::Options& options)
+    : LineSearch(options) {}
+
+void WolfeLineSearch::Search(const double step_size_estimate,
+                             const double initial_cost,
+                             const double initial_gradient,
+                             Summary* summary) {
+  *CHECK_NOTNULL(summary) = LineSearch::Summary();
+  // All parameters should have been validated by the Solver, but as
+  // invalid values would produce crazy nonsense, hard check them here.
+  CHECK_GE(step_size_estimate, 0.0);
+  CHECK_GT(options().sufficient_decrease, 0.0);
+  CHECK_GT(options().sufficient_curvature_decrease,
+           options().sufficient_decrease);
+  CHECK_LT(options().sufficient_curvature_decrease, 1.0);
+  CHECK_GT(options().max_step_expansion, 1.0);
+
+  // Note initial_cost & initial_gradient are evaluated at step_size = 0,
+  // not step_size_estimate, which is our starting guess.
+  const FunctionSample initial_position =
+      ValueAndGradientSample(0.0, initial_cost, initial_gradient);
+
+  bool do_zoom_search = false;
+  // Important: The high/low in bracket_high & bracket_low refer to their
+  // _function_ values, not their step sizes i.e. it is _not_ required that
+  // bracket_low.x < bracket_high.x.
+  FunctionSample solution, bracket_low, bracket_high;
+
+  // Wolfe bracketing phase: Increases step_size until either it finds a point
+  // that satisfies the (strong) Wolfe conditions, or an interval that brackets
+  // step sizes which satisfy the conditions.  From Nocedal & Wright [1] p61 the
+  // interval: (step_size_{k-1}, step_size_{k}) contains step lengths satisfying
+  // the strong Wolfe conditions if one of the following conditions are met:
+  //
+  //   1. step_size_{k} violates the sufficient decrease (Armijo) condition.
+  //   2. f(step_size_{k}) >= f(step_size_{k-1}).
+  //   3. f'(step_size_{k}) >= 0.
+  //
+  // Caveat: If f(step_size_{k}) is invalid, then step_size is reduced, ignoring
+  // this special case, step_size monotonically increases during bracketing.
+  if (!this->BracketingPhase(initial_position,
+                             step_size_estimate,
+                             &bracket_low,
+                             &bracket_high,
+                             &do_zoom_search,
+                             summary) &&
+      summary->num_iterations < options().max_num_iterations) {
+    // Failed to find either a valid point or a valid bracket, but we did not
+    // run out of iterations.
+    return;
+  }
+  if (!do_zoom_search) {
+    // Either: Bracketing phase already found a point satisfying the strong
+    // Wolfe conditions, thus no Zoom required.
+    //
+    // Or: Bracketing failed to find a valid bracket or a point satisfying the
+    // strong Wolfe conditions within max_num_iterations.  As this is an
+    // 'artificial' constraint, and we would otherwise fail to produce a valid
+    // point when ArmijoLineSearch would succeed, we return the lowest point
+    // found thus far which satsifies the Armijo condition (but not the Wolfe
+    // conditions).
+    CHECK(bracket_low.value_is_valid)
+        << "Ceres bug: Bracketing produced an invalid bracket_low, please "
+        << "contact the developers!, bracket_low: " << bracket_low
+        << ", bracket_high: " << bracket_high << ", num_iterations: "
+        << summary->num_iterations << ", max_num_iterations: "
+        << options().max_num_iterations;
+    summary->optimal_step_size = bracket_low.x;
+    summary->success = true;
+    return;
+  }
+
+  // Wolfe Zoom phase: Called when the Bracketing phase finds an interval of
+  // non-zero, finite width that should bracket step sizes which satisfy the
+  // (strong) Wolfe conditions (before finding a step size that satisfies the
+  // conditions).  Zoom successively decreases the size of the interval until a
+  // step size which satisfies the Wolfe conditions is found.  The interval is
+  // defined by bracket_low & bracket_high, which satisfy:
+  //
+  //   1. The interval bounded by step sizes: bracket_low.x & bracket_high.x
+  //      contains step sizes that satsify the strong Wolfe conditions.
+  //   2. bracket_low.x is of all the step sizes evaluated *which satisifed the
+  //      Armijo sufficient decrease condition*, the one which generated the
+  //      smallest function value, i.e. bracket_low.value <
+  //      f(all other steps satisfying Armijo).
+  //        - Note that this does _not_ (necessarily) mean that initially
+  //          bracket_low.value < bracket_high.value (although this is typical)
+  //          e.g. when bracket_low = initial_position, and bracket_high is the
+  //          first sample, and which does not satisfy the Armijo condition,
+  //          but still has bracket_high.value < initial_position.value.
+  //   3. bracket_high is chosen after bracket_low, s.t.
+  //      bracket_low.gradient * (bracket_high.x - bracket_low.x) < 0.
+  if (!this->ZoomPhase(initial_position,
+                       bracket_low,
+                       bracket_high,
+                       &solution,
+                       summary) && !solution.value_is_valid) {
+    // Failed to find a valid point (given the specified decrease parameters)
+    // within the specified bracket.
+    return;
+  }
+  // Ensure that if we ran out of iterations whilst zooming the bracket, or
+  // shrank the bracket width to < tolerance and failed to find a point which
+  // satisfies the strong Wolfe curvature condition, that we return the point
+  // amongst those found thus far, which minimizes f() and satisfies the Armijo
+  // condition.
+  solution =
+      solution.value_is_valid && solution.value <= bracket_low.value
+      ? solution : bracket_low;
+
+  summary->optimal_step_size = solution.x;
+  summary->success = true;
+}
+
+// Returns true iff bracket_low & bracket_high bound a bracket that contains
+// points which satisfy the strong Wolfe conditions. Otherwise, on return false,
+// if we stopped searching due to the 'artificial' condition of reaching
+// max_num_iterations, bracket_low is the step size amongst all those
+// tested, which satisfied the Armijo decrease condition and minimized f().
+bool WolfeLineSearch::BracketingPhase(
+    const FunctionSample& initial_position,
+    const double step_size_estimate,
+    FunctionSample* bracket_low,
+    FunctionSample* bracket_high,
+    bool* do_zoom_search,
+    Summary* summary) {
+  Function* function = options().function;
+
+  FunctionSample previous = initial_position;
+  FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0);
+  current.value_is_valid = false;
+
+  const bool interpolation_uses_gradients =
+      options().interpolation_type == CUBIC;
+  const double descent_direction_max_norm =
+      static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
+
+  *do_zoom_search = false;
+  *bracket_low = initial_position;
+
+  ++summary->num_function_evaluations;
+  if (interpolation_uses_gradients) { ++summary->num_gradient_evaluations; }
+  current.value_is_valid =
+      function->Evaluate(current.x,
+                         &current.value,
+                         interpolation_uses_gradients
+                         ? &current.gradient : NULL);
+  current.gradient_is_valid =
+      interpolation_uses_gradients && current.value_is_valid;
+
+  while (true) {
+    ++summary->num_iterations;
+
+    if (current.value_is_valid &&
+        (current.value > (initial_position.value
+                          + options().sufficient_decrease
+                          * initial_position.gradient
+                          * current.x) ||
+         (previous.value_is_valid && current.value > previous.value))) {
+      // Bracket found: current step size violates Armijo sufficient decrease
+      // condition, or has stepped past an inflection point of f() relative to
+      // previous step size.
+      *do_zoom_search = true;
+      *bracket_low = previous;
+      *bracket_high = current;
+      break;
+    }
+
+    // Irrespective of the interpolation type we are using, we now need the
+    // gradient at the current point (which satisfies the Armijo condition)
+    // in order to check the strong Wolfe conditions.
+    if (!interpolation_uses_gradients) {
+      ++summary->num_function_evaluations;
+      ++summary->num_gradient_evaluations;
+      current.value_is_valid =
+          function->Evaluate(current.x,
+                             &current.value,
+                             &current.gradient);
+      current.gradient_is_valid = current.value_is_valid;
+    }
+
+    if (current.value_is_valid &&
+        fabs(current.gradient) <=
+        -options().sufficient_curvature_decrease * initial_position.gradient) {
+      // Current step size satisfies the strong Wolfe conditions, and is thus a
+      // valid termination point, therefore a Zoom not required.
+      *bracket_low = current;
+      *bracket_high = current;
+      break;
+
+    } else if (current.value_is_valid && current.gradient >= 0) {
+      // Bracket found: current step size has stepped past an inflection point
+      // of f(), but Armijo sufficient decrease is still satisfied and
+      // f(current) is our best minimum thus far.  Remember step size
+      // monotonically increases, thus previous_step_size < current_step_size
+      // even though f(previous) > f(current).
+      *do_zoom_search = true;
+      // Note inverse ordering from first bracket case.
+      *bracket_low = current;
+      *bracket_high = previous;
+      break;
+
+    } else if (summary->num_iterations >= options().max_num_iterations) {
+      // Check num iterations bound here so that we always evaluate the
+      // max_num_iterations-th iteration against all conditions, and
+      // then perform no additional (unused) evaluations.
+      summary->error =
+          StringPrintf("Line search failed: Wolfe bracketing phase failed to "
+                       "find a point satisfying strong Wolfe conditions, or a "
+                       "bracket containing such a point within specified "
+                       "max_num_iterations: %d", options().max_num_iterations);
+      LOG(WARNING) << summary->error;
+      // Ensure that bracket_low is always set to the step size amongst all
+      // those tested which minimizes f() and satisfies the Armijo condition
+      // when we terminate due to the 'artificial' max_num_iterations condition.
+      *bracket_low =
+          current.value_is_valid && current.value < bracket_low->value
+          ? current : *bracket_low;
+      return false;
+    }
+    // Either: f(current) is invalid; or, f(current) is valid, but does not
+    // satisfy the strong Wolfe conditions itself, or the conditions for
+    // being a boundary of a bracket.
+
+    // If f(current) is valid, (but meets no criteria) expand the search by
+    // increasing the step size.
+    const double max_step_size =
+        current.value_is_valid
+        ? (current.x * options().max_step_expansion) : current.x;
+
+    // We are performing 2-point interpolation only here, but the API of
+    // InterpolatingPolynomialMinimizingStepSize() allows for up to
+    // 3-point interpolation, so pad call with a sample with an invalid
+    // value that will therefore be ignored.
+    const FunctionSample unused_previous;
+    DCHECK(!unused_previous.value_is_valid);
+    // Contracts step size if f(current) is not valid.
+    const double step_size =
+        this->InterpolatingPolynomialMinimizingStepSize(
+            options().interpolation_type,
+            previous,
+            unused_previous,
+            current,
+            previous.x,
+            max_step_size);
+    if (step_size * descent_direction_max_norm < options().min_step_size) {
+      summary->error =
+          StringPrintf("Line search failed: step_size too small: %.5e "
+                       "with descent_direction_max_norm: %.5e", step_size,
+                       descent_direction_max_norm);
+      LOG(WARNING) << summary->error;
+      return false;
+    }
+
+    previous = current.value_is_valid ? current : previous;
+    current.x = step_size;
+
+    ++summary->num_function_evaluations;
+    if (interpolation_uses_gradients) { ++summary->num_gradient_evaluations; }
+    current.value_is_valid =
+        function->Evaluate(current.x,
+                           &current.value,
+                           interpolation_uses_gradients
+                           ? &current.gradient : NULL);
+    current.gradient_is_valid =
+        interpolation_uses_gradients && current.value_is_valid;
+  }
+  // Either we have a valid point, defined as a bracket of zero width, in which
+  // case no zoom is required, or a valid bracket in which to zoom.
+  return true;
+}
+
+// Returns true iff solution satisfies the strong Wolfe conditions. Otherwise,
+// on return false, if we stopped searching due to the 'artificial' condition of
+// reaching max_num_iterations, solution is the step size amongst all those
+// tested, which satisfied the Armijo decrease condition and minimized f().
+bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
+                                FunctionSample bracket_low,
+                                FunctionSample bracket_high,
+                                FunctionSample* solution,
+                                Summary* summary) {
+  Function* function = options().function;
+
+  CHECK(bracket_low.value_is_valid && bracket_low.gradient_is_valid)
+      << "Ceres bug: f_low input to Wolfe Zoom invalid, please contact "
+      << "the developers!, initial_position: " << initial_position
+      << ", bracket_low: " << bracket_low
+      << ", bracket_high: "<< bracket_high;
+  // We do not require bracket_high.gradient_is_valid as the gradient condition
+  // for a valid bracket is only dependent upon bracket_low.gradient, and
+  // in order to minimize jacobian evaluations, bracket_high.gradient may
+  // not have been calculated (if bracket_high.value does not satisfy the
+  // Armijo sufficient decrease condition and interpolation method does not
+  // require it).
+  CHECK(bracket_high.value_is_valid)
+      << "Ceres bug: f_high input to Wolfe Zoom invalid, please "
+      << "contact the developers!, initial_position: " << initial_position
+      << ", bracket_low: " << bracket_low
+      << ", bracket_high: "<< bracket_high;
+  CHECK_LT(bracket_low.gradient *
+           (bracket_high.x - bracket_low.x), 0.0)
+      << "Ceres bug: f_high input to Wolfe Zoom does not satisfy gradient "
+      << "condition combined with f_low, please contact the developers!"
+      << ", initial_position: " << initial_position
+      << ", bracket_low: " << bracket_low
+      << ", bracket_high: "<< bracket_high;
+
+  const int num_bracketing_iterations = summary->num_iterations;
+  const bool interpolation_uses_gradients =
+      options().interpolation_type == CUBIC;
+  const double descent_direction_max_norm =
+      static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
+
+  while (true) {
+    // Set solution to bracket_low, as it is our best step size (smallest f())
+    // found thus far and satisfies the Armijo condition, even though it does
+    // not satisfy the Wolfe condition.
+    *solution = bracket_low;
+    if (summary->num_iterations >= options().max_num_iterations) {
+      summary->error =
+          StringPrintf("Line search failed: Wolfe zoom phase failed to "
+                       "find a point satisfying strong Wolfe conditions "
+                       "within specified max_num_iterations: %d, "
+                       "(num iterations taken for bracketing: %d).",
+                       options().max_num_iterations, num_bracketing_iterations);
+      LOG(WARNING) << summary->error;
+      return false;
+    }
+    if (fabs(bracket_high.x - bracket_low.x) * descent_direction_max_norm
+        < options().min_step_size) {
+      // Bracket width has been reduced below tolerance, and no point satisfying
+      // the strong Wolfe conditions has been found.
+      summary->error =
+          StringPrintf("Line search failed: Wolfe zoom bracket width: %.5e "
+                       "too small with descent_direction_max_norm: %.5e.",
+                       fabs(bracket_high.x - bracket_low.x),
+                       descent_direction_max_norm);
+      LOG(WARNING) << summary->error;
+      return false;
+    }
+
+    ++summary->num_iterations;
+    // Polynomial interpolation requires inputs ordered according to step size,
+    // not f(step size).
+    const FunctionSample& lower_bound_step =
+        bracket_low.x < bracket_high.x ? bracket_low : bracket_high;
+    const FunctionSample& upper_bound_step =
+        bracket_low.x < bracket_high.x ? bracket_high : bracket_low;
+    // We are performing 2-point interpolation only here, but the API of
+    // InterpolatingPolynomialMinimizingStepSize() allows for up to
+    // 3-point interpolation, so pad call with a sample with an invalid
+    // value that will therefore be ignored.
+    const FunctionSample unused_previous;
+    DCHECK(!unused_previous.value_is_valid);
+    solution->x =
+        this->InterpolatingPolynomialMinimizingStepSize(
+            options().interpolation_type,
+            lower_bound_step,
+            unused_previous,
+            upper_bound_step,
+            lower_bound_step.x,
+            upper_bound_step.x);
+    // No check on magnitude of step size being too small here as it is
+    // lower-bounded by the initial bracket start point, which was valid.
+    ++summary->num_function_evaluations;
+    if (interpolation_uses_gradients) { ++summary->num_gradient_evaluations; }
+    solution->value_is_valid =
+        function->Evaluate(solution->x,
+                           &solution->value,
+                           interpolation_uses_gradients
+                           ? &solution->gradient : NULL);
+    solution->gradient_is_valid =
+        interpolation_uses_gradients && solution->value_is_valid;
+    if (!solution->value_is_valid) {
+      summary->error =
+          StringPrintf("Line search failed: Wolfe Zoom phase found "
+                       "step_size: %.5e, for which function is invalid, "
+                       "between low_step: %.5e and high_step: %.5e "
+                       "at which function is valid.",
+                       solution->x, bracket_low.x, bracket_high.x);
+      LOG(WARNING) << summary->error;
+      return false;
+    }
+
+    if ((solution->value > (initial_position.value
+                            + options().sufficient_decrease
+                            * initial_position.gradient
+                            * solution->x)) ||
+        (solution->value >= bracket_low.value)) {
+      // Armijo sufficient decrease not satisfied, or not better
+      // than current lowest sample, use as new upper bound.
+      bracket_high = *solution;
+      continue;
+    }
+
+    // Armijo sufficient decrease satisfied, check strong Wolfe condition.
+    if (!interpolation_uses_gradients) {
+      // Irrespective of the interpolation type we are using, we now need the
+      // gradient at the current point (which satisfies the Armijo condition)
+      // in order to check the strong Wolfe conditions.
+      ++summary->num_function_evaluations;
+      ++summary->num_gradient_evaluations;
+      solution->value_is_valid =
+          function->Evaluate(solution->x,
+                             &solution->value,
+                             &solution->gradient);
+      solution->gradient_is_valid = solution->value_is_valid;
+      if (!solution->value_is_valid) {
+        summary->error =
+            StringPrintf("Line search failed: Wolfe Zoom phase found "
+                         "step_size: %.5e, for which function is invalid, "
+                         "between low_step: %.5e and high_step: %.5e "
+                         "at which function is valid.",
+                         solution->x, bracket_low.x, bracket_high.x);
+        LOG(WARNING) << summary->error;
+        return false;
+      }
+    }
+    if (fabs(solution->gradient) <=
+        -options().sufficient_curvature_decrease * initial_position.gradient) {
+      // Found a valid termination point satisfying strong Wolfe conditions.
+      break;
+
+    } else if (solution->gradient * (bracket_high.x - bracket_low.x) >= 0) {
+      bracket_high = bracket_low;
+    }
+
+    bracket_low = *solution;
+  }
+  // Solution contains a valid point which satisfies the strong Wolfe
+  // conditions.
+  return true;
+}
+
 }  // namespace internal
 }  // namespace ceres
 

diff --git a/internal/ceres/line_search.h b/internal/ceres/line_search.h
index 5792652..e4836b2 100644
--- a/internal/ceres/line_search.h
+++ b/internal/ceres/line_search.h

@@ -35,6 +35,7 @@
 
 #ifndef CERES_NO_LINE_SEARCH_MINIMIZER
 
+#include <string>
 #include <vector>
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/port.h"
@@ -44,6 +45,7 @@
 namespace internal {
 
 class Evaluator;
+struct FunctionSample;
 
 // Line search is another name for a one dimensional optimization
 // algorithm. The name "line search" comes from the fact one
@@ -63,16 +65,19 @@
     Options()
         : interpolation_type(CUBIC),
           sufficient_decrease(1e-4),
-          min_relative_step_size_change(1e-3),
-          max_relative_step_size_change(0.9),
+          max_step_contraction(1e-3),
+          min_step_contraction(0.9),
           min_step_size(1e-9),
+          max_num_iterations(20),
+          sufficient_curvature_decrease(0.9),
+          max_step_expansion(10.0),
           function(NULL) {}
 
     // Degree of the polynomial used to approximate the objective
     // function.
     LineSearchInterpolationType interpolation_type;
 
-    // Armijo line search parameters.
+    // Armijo and Wolfe line search parameters.
 
     // Solving the line search problem exactly is computationally
     // prohibitive. Fortunately, line search based optimization
@@ -85,20 +90,60 @@
     //  f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
     double sufficient_decrease;
 
-    // In each iteration of the Armijo line search,
+    // In each iteration of the Armijo / Wolfe line search,
     //
-    // new_step_size >= min_relative_step_size_change * step_size
-    double min_relative_step_size_change;
+    // new_step_size >= max_step_contraction * step_size
+    //
+    // Note that by definition, for contraction:
+    //
+    //  0 < max_step_contraction < min_step_contraction < 1
+    //
+    double max_step_contraction;
 
-    // In each iteration of the Armijo line search,
+    // In each iteration of the Armijo / Wolfe line search,
     //
-    // new_step_size <= max_relative_step_size_change * step_size
-    double max_relative_step_size_change;
+    // new_step_size <= min_step_contraction * step_size
+    // Note that by definition, for contraction:
+    //
+    //  0 < max_step_contraction < min_step_contraction < 1
+    //
+    double min_step_contraction;
 
     // If during the line search, the step_size falls below this
     // value, it is truncated to zero.
     double min_step_size;
 
+    // Maximum number of trial step size iterations during each line search,
+    // if a step size satisfying the search conditions cannot be found within
+    // this number of trials, the line search will terminate.
+    int max_num_iterations;
+
+    // Wolfe-specific line search parameters.
+
+    // The strong Wolfe conditions consist of the Armijo sufficient
+    // decrease condition, and an additional requirement that the
+    // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
+    // conditions) of the gradient along the search direction
+    // decreases sufficiently. Precisely, this second condition
+    // is that we seek a step_size s.t.
+    //
+    //   |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
+    //
+    // Where f() is the line search objective and f'() is the derivative
+    // of f w.r.t step_size (d f / d step_size).
+    double sufficient_curvature_decrease;
+
+    // During the bracketing phase of the Wolfe search, the step size is
+    // increased until either a point satisfying the Wolfe conditions is
+    // found, or an upper bound for a bracket containing a point satisfying
+    // the conditions is found.  Precisely, at each iteration of the
+    // expansion:
+    //
+    //   new_step_size <= max_step_expansion * step_size.
+    //
+    // By definition for expansion, max_step_expansion > 1.0.
+    double max_step_expansion;
+
     // The one dimensional function that the line search algorithm
     // minimizes.
     Function* function;
@@ -133,18 +178,28 @@
     Summary()
         : success(false),
           optimal_step_size(0.0),
-          num_evaluations(0) {}
+          num_function_evaluations(0),
+          num_gradient_evaluations(0),
+          num_iterations(0) {}
 
     bool success;
     double optimal_step_size;
-    int num_evaluations;
+    int num_function_evaluations;
+    int num_gradient_evaluations;
+    int num_iterations;
+    string error;
   };
 
+  explicit LineSearch(const LineSearch::Options& options);
   virtual ~LineSearch() {}
 
+  static LineSearch* Create(const LineSearchType line_search_type,
+                            const LineSearch::Options& options,
+                            string* error);
+
   // Perform the line search.
   //
-  // initial_step_size must be a positive number.
+  // step_size_estimate must be a positive number.
   //
   // initial_cost and initial_gradient are the values and gradient of
   // the function at zero.
@@ -152,11 +207,23 @@
   // search.
   //
   // Summary::success is true if a non-zero step size is found.
-  virtual void Search(const LineSearch::Options& options,
-                      double initial_step_size,
+  virtual void Search(double step_size_estimate,
                       double initial_cost,
                       double initial_gradient,
                       Summary* summary) = 0;
+  double InterpolatingPolynomialMinimizingStepSize(
+      const LineSearchInterpolationType& interpolation_type,
+      const FunctionSample& lowerbound_sample,
+      const FunctionSample& previous_sample,
+      const FunctionSample& current_sample,
+      const double min_step_size,
+      const double max_step_size) const;
+
+ protected:
+  const LineSearch::Options& options() const { return options_; }
+
+ private:
+  LineSearch::Options options_;
 };
 
 class LineSearchFunction : public LineSearch::Function {
@@ -165,6 +232,7 @@
   virtual ~LineSearchFunction() {}
   void Init(const Vector& position, const Vector& direction);
   virtual bool Evaluate(const double x, double* f, double* g);
+  double DirectionInfinityNorm() const;
 
  private:
   Evaluator* evaluator_;
@@ -186,14 +254,44 @@
 // For more details: http://www.di.ens.fr/~mschmidt/Software/minFunc.html
 class ArmijoLineSearch : public LineSearch {
  public:
+  explicit ArmijoLineSearch(const LineSearch::Options& options);
   virtual ~ArmijoLineSearch() {}
-  virtual void Search(const LineSearch::Options& options,
-                      double initial_step_size,
+  virtual void Search(double step_size_estimate,
                       double initial_cost,
                       double initial_gradient,
                       Summary* summary);
 };
 
+// Bracketing / Zoom Strong Wolfe condition line search.  This implementation
+// is based on the pseudo-code algorithm presented in Nocedal & Wright [1]
+// (p60-61) with inspiration from the WolfeLineSearch which ships with the
+// minFunc package by Mark Schmidt [2].
+//
+// [1] Nocedal J., Wright S., Numerical Optimization, 2nd Ed., Springer, 1999.
+// [2] http://www.di.ens.fr/~mschmidt/Software/minFunc.html.
+class WolfeLineSearch : public LineSearch {
+ public:
+  explicit WolfeLineSearch(const LineSearch::Options& options);
+  virtual ~WolfeLineSearch() {}
+  virtual void Search(double step_size_estimate,
+                      double initial_cost,
+                      double initial_gradient,
+                      Summary* summary);
+  // Returns true iff either a valid point, or valid bracket are found.
+  bool BracketingPhase(const FunctionSample& initial_position,
+                       const double step_size_estimate,
+                       FunctionSample* bracket_low,
+                       FunctionSample* bracket_high,
+                       bool* perform_zoom_search,
+                       Summary* summary);
+  // Returns true iff final_line_sample satisfies strong Wolfe conditions.
+  bool ZoomPhase(const FunctionSample& initial_position,
+                 FunctionSample bracket_low,
+                 FunctionSample bracket_high,
+                 FunctionSample* solution,
+                 Summary* summary);
+};
+
 }  // namespace internal
 }  // namespace ceres
 

diff --git a/internal/ceres/line_search_direction.cc b/internal/ceres/line_search_direction.cc
index b8b582c..8ded823 100644
--- a/internal/ceres/line_search_direction.cc
+++ b/internal/ceres/line_search_direction.cc

@@ -100,14 +100,24 @@
 
 class LBFGS : public LineSearchDirection {
  public:
-  LBFGS(const int num_parameters, const int max_lbfgs_rank)
-      : low_rank_inverse_hessian_(num_parameters, max_lbfgs_rank) {}
+  LBFGS(const int num_parameters,
+        const int max_lbfgs_rank,
+        const bool use_approximate_eigenvalue_bfgs_scaling)
+      : low_rank_inverse_hessian_(num_parameters,
+                                  max_lbfgs_rank,
+                                  use_approximate_eigenvalue_bfgs_scaling),
+        is_positive_definite_(true) {}
 
   virtual ~LBFGS() {}
 
   bool NextDirection(const LineSearchMinimizer::State& previous,
                      const LineSearchMinimizer::State& current,
                      Vector* search_direction) {
+    CHECK(is_positive_definite_)
+        << "Ceres bug: NextDirection() called on L-BFGS after inverse Hessian "
+        << "approximation has become indefinite, please contact the "
+        << "developers!";
+
     low_rank_inverse_hessian_.Update(
         previous.search_direction * previous.step_size,
         current.gradient - previous.gradient);
@@ -115,11 +125,177 @@
     low_rank_inverse_hessian_.RightMultiply(current.gradient.data(),
                                             search_direction->data());
     *search_direction *= -1.0;
+
+    if (search_direction->dot(current.gradient) >= 0.0) {
+      LOG(WARNING) << "Numerical failure in L-BFGS update: inverse Hessian "
+                   << "approximation is not positive definite, and thus "
+                   << "initial gradient for search direction is positive: "
+                   << search_direction->dot(current.gradient);
+      is_positive_definite_ = false;
+      return false;
+    }
+
     return true;
   }
 
  private:
   LowRankInverseHessian low_rank_inverse_hessian_;
+  bool is_positive_definite_;
+};
+
+class BFGS : public LineSearchDirection {
+ public:
+  BFGS(const int num_parameters,
+       const bool use_approximate_eigenvalue_scaling)
+      : num_parameters_(num_parameters),
+        use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling),
+        initialized_(false),
+        is_positive_definite_(true) {
+    LOG_IF(WARNING, num_parameters_ >= 1e3)
+        << "BFGS line search being created with: " << num_parameters_
+        << " parameters, this will allocate a dense approximate inverse Hessian"
+        << " of size: " << num_parameters_ << " x " << num_parameters_
+        << ", consider using the L-BFGS memory-efficient line search direction "
+        << "instead.";
+    // Construct inverse_hessian_ after logging warning about size s.t. if the
+    // allocation crashes us, the log will highlight what the issue likely was.
+    inverse_hessian_ = Matrix::Identity(num_parameters, num_parameters);
+  }
+
+  virtual ~BFGS() {}
+
+  bool NextDirection(const LineSearchMinimizer::State& previous,
+                     const LineSearchMinimizer::State& current,
+                     Vector* search_direction) {
+    CHECK(is_positive_definite_)
+        << "Ceres bug: NextDirection() called on BFGS after inverse Hessian "
+        << "approximation has become indefinite, please contact the "
+        << "developers!";
+
+    const Vector delta_x = previous.search_direction * previous.step_size;
+    const Vector delta_gradient = current.gradient - previous.gradient;
+    const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient);
+
+    if (delta_x_dot_delta_gradient <= 1e-10) {
+      VLOG(2) << "Skipping BFGS Update, delta_x_dot_delta_gradient too "
+              << "small: " << delta_x_dot_delta_gradient;
+    } else {
+      // Update dense inverse Hessian approximation.
+
+      if (!initialized_ && use_approximate_eigenvalue_scaling_) {
+        // Rescale the initial inverse Hessian approximation (H_0) to be
+        // iteratively updated so that it is of similar 'size' to the true
+        // inverse Hessian at the start point.  As shown in [1]:
+        //
+        //   \gamma = (delta_gradient_{0}' * delta_x_{0}) /
+        //            (delta_gradient_{0}' * delta_gradient_{0})
+        //
+        // Satisfies:
+        //
+        //   (1 / \lambda_m) <= \gamma <= (1 / \lambda_1)
+        //
+        // Where \lambda_1 & \lambda_m are the smallest and largest eigenvalues
+        // of the true initial Hessian (not the inverse) respectively. Thus,
+        // \gamma is an approximate eigenvalue of the true inverse Hessian, and
+        // choosing: H_0 = I * \gamma will yield a starting point that has a
+        // similar scale to the true inverse Hessian.  This technique is widely
+        // reported to often improve convergence, however this is not
+        // universally true, particularly if there are errors in the initial
+        // gradients, or if there are significant differences in the sensitivity
+        // of the problem to the parameters (i.e. the range of the magnitudes of
+        // the components of the gradient is large).
+        //
+        // The original origin of this rescaling trick is somewhat unclear, the
+        // earliest reference appears to be Oren [1], however it is widely
+        // discussed without specific attributation in various texts including
+        // [2] (p143).
+        //
+        // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms
+        //     Part II: Implementation and experiments, Management Science,
+        //     20(5), 863-874, 1974.
+        // [2] Nocedal J., Wright S., Numerical Optimization, Springer, 1999.
+        inverse_hessian_ *=
+            delta_x_dot_delta_gradient / delta_gradient.dot(delta_gradient);
+      }
+      initialized_ = true;
+
+      // Efficient O(num_parameters^2) BFGS update [2].
+      //
+      // Starting from dense BFGS update detailed in Nocedal [2] p140/177 and
+      // using: y_k = delta_gradient, s_k = delta_x:
+      //
+      //   \rho_k = 1.0 / (s_k' * y_k)
+      //   V_k = I - \rho_k * y_k * s_k'
+      //   H_k = (V_k' * H_{k-1} * V_k) + (\rho_k * s_k * s_k')
+      //
+      // This update involves matrix, matrix products which naively O(N^3),
+      // however we can exploit our knowledge that H_k is positive definite
+      // and thus by defn. symmetric to reduce the cost of the update:
+      //
+      // Expanding the update above yields:
+      //
+      //   H_k = H_{k-1} +
+      //         \rho_k * ( (1.0 + \rho_k * y_k' * H_k * y_k) * s_k * s_k' -
+      //                    (s_k * y_k' * H_k + H_k * y_k * s_k') )
+      //
+      // Using: A = (s_k * y_k' * H_k), and the knowledge that H_k = H_k', the
+      // last term simplifies to (A + A'). Note that although A is not symmetric
+      // (A + A') is symmetric. For ease of construction we also define
+      // B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k', which is by defn
+      // symmetric due to construction from: s_k * s_k'.
+      //
+      // Now we can write the BFGS update as:
+      //
+      //   H_k = H_{k-1} + \rho_k * (B - (A + A'))
+
+      // For efficiency, as H_k is by defn. symmetric, we will only maintain the
+      // *lower* triangle of H_k (and all intermediary terms).
+
+      const double rho_k = 1.0 / delta_x_dot_delta_gradient;
+
+      // Calculate: A = s_k * y_k' * H_k
+      Matrix A = delta_x * (delta_gradient.transpose() *
+                            inverse_hessian_.selfadjointView<Eigen::Lower>());
+
+      // Calculate scalar: (1 + \rho_k * y_k' * H_k * y_k)
+      const double delta_x_times_delta_x_transpose_scale_factor =
+          (1.0 + (rho_k * delta_gradient.transpose() *
+                  inverse_hessian_.selfadjointView<Eigen::Lower>() *
+                  delta_gradient));
+      // Calculate: B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k'
+      Matrix B = Matrix::Zero(num_parameters_, num_parameters_);
+      B.selfadjointView<Eigen::Lower>().
+          rankUpdate(delta_x, delta_x_times_delta_x_transpose_scale_factor);
+
+      // Finally, update inverse Hessian approximation according to:
+      // H_k = H_{k-1} + \rho_k * (B - (A + A')).  Note that (A + A') is
+      // symmetric, even though A is not.
+      inverse_hessian_.triangularView<Eigen::Lower>() +=
+          rho_k * (B - A - A.transpose());
+    }
+
+    *search_direction =
+        inverse_hessian_.selfadjointView<Eigen::Lower>() *
+        (-1.0 * current.gradient);
+
+    if (search_direction->dot(current.gradient) >= 0.0) {
+      LOG(WARNING) << "Numerical failure in BFGS update: inverse Hessian "
+                   << "approximation is not positive definite, and thus "
+                   << "initial gradient for search direction is positive: "
+                   << search_direction->dot(current.gradient);
+      is_positive_definite_ = false;
+      return false;
+    }
+
+    return true;
+  }
+
+ private:
+  const int num_parameters_;
+  const bool use_approximate_eigenvalue_scaling_;
+  Matrix inverse_hessian_;
+  bool initialized_;
+  bool is_positive_definite_;
 };
 
 LineSearchDirection*
@@ -135,8 +311,16 @@
   }
 
   if (options.type == ceres::LBFGS) {
-    return new ceres::internal::LBFGS(options.num_parameters,
-                                      options.max_lbfgs_rank);
+    return new ceres::internal::LBFGS(
+        options.num_parameters,
+        options.max_lbfgs_rank,
+        options.use_approximate_eigenvalue_bfgs_scaling);
+  }
+
+  if (options.type == ceres::BFGS) {
+    return new ceres::internal::BFGS(
+        options.num_parameters,
+        options.use_approximate_eigenvalue_bfgs_scaling);
   }
 
   LOG(ERROR) << "Unknown line search direction type: " << options.type;

diff --git a/internal/ceres/line_search_direction.h b/internal/ceres/line_search_direction.h
index 0874754..0857cb0 100644
--- a/internal/ceres/line_search_direction.h
+++ b/internal/ceres/line_search_direction.h

@@ -48,7 +48,8 @@
           type(LBFGS),
           nonlinear_conjugate_gradient_type(FLETCHER_REEVES),
           function_tolerance(1e-12),
-          max_lbfgs_rank(20) {
+          max_lbfgs_rank(20),
+          use_approximate_eigenvalue_bfgs_scaling(true) {
     }
 
     int num_parameters;
@@ -56,6 +57,7 @@
     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
     double function_tolerance;
     int max_lbfgs_rank;
+    bool use_approximate_eigenvalue_bfgs_scaling;
   };
 
   static LineSearchDirection* Create(const Options& options);

diff --git a/internal/ceres/line_search_minimizer.cc b/internal/ceres/line_search_minimizer.cc
index 24aada3..2cc89fa 100644
--- a/internal/ceres/line_search_minimizer.cc
+++ b/internal/ceres/line_search_minimizer.cc

@@ -160,6 +160,8 @@
   line_search_direction_options.nonlinear_conjugate_gradient_type =
       options.nonlinear_conjugate_gradient_type;
   line_search_direction_options.max_lbfgs_rank = options.max_lbfgs_rank;
+  line_search_direction_options.use_approximate_eigenvalue_bfgs_scaling =
+      options.use_approximate_eigenvalue_bfgs_scaling;
   scoped_ptr<LineSearchDirection> line_search_direction(
       LineSearchDirection::Create(line_search_direction_options));
 
@@ -170,15 +172,32 @@
       options.line_search_interpolation_type;
   line_search_options.min_step_size = options.min_line_search_step_size;
   line_search_options.sufficient_decrease =
-      options.armijo_sufficient_decrease;
-  line_search_options.min_relative_step_size_change =
-      options.min_armijo_relative_step_size_change;
-  line_search_options.max_relative_step_size_change =
-      options.max_armijo_relative_step_size_change;
+      options.line_search_sufficient_function_decrease;
+  line_search_options.max_step_contraction =
+      options.max_line_search_step_contraction;
+  line_search_options.min_step_contraction =
+      options.min_line_search_step_contraction;
+  line_search_options.max_num_iterations =
+      options.max_num_line_search_step_size_iterations;
+  line_search_options.sufficient_curvature_decrease =
+      options.line_search_sufficient_curvature_decrease;
+  line_search_options.max_step_expansion =
+      options.max_line_search_step_expansion;
   line_search_options.function = &line_search_function;
 
-  ArmijoLineSearch line_search;
+  scoped_ptr<LineSearch>
+      line_search(LineSearch::Create(options.line_search_type,
+                                     line_search_options,
+                                     &summary->error));
+  if (line_search.get() == NULL) {
+    LOG(ERROR) << "Ceres bug: Unable to create a LineSearch object, please "
+               << "contact the developers!, error: " << summary->error;
+    summary->termination_type = DID_NOT_RUN;
+    return;
+  }
+
   LineSearch::Summary line_search_summary;
+  int num_line_search_direction_restarts = 0;
 
   while (true) {
     if (!RunCallbacks(options.callbacks, iteration_summary, summary)) {
@@ -215,9 +234,36 @@
           &current_state.search_direction);
     }
 
-    if (!line_search_status) {
-      LOG(WARNING) << "Line search direction computation failed. "
-          "Resorting to steepest descent.";
+    if (!line_search_status &&
+        num_line_search_direction_restarts >=
+        options.max_num_line_search_direction_restarts) {
+      // Line search direction failed to generate a new direction, and we
+      // have already reached our specified maximum number of restarts,
+      // terminate optimization.
+      summary->error =
+          StringPrintf("Line search direction failure: specified "
+                       "max_num_line_search_direction_restarts: %d reached.",
+                       options.max_num_line_search_direction_restarts);
+      LOG(WARNING) << summary->error << " terminating optimization.";
+      summary->termination_type = NUMERICAL_FAILURE;
+      break;
+
+    } else if (!line_search_status) {
+      // Restart line search direction with gradient descent on first iteration
+      // as we have not yet reached our maximum number of restarts.
+      CHECK_LT(num_line_search_direction_restarts,
+               options.max_num_line_search_direction_restarts);
+
+      ++num_line_search_direction_restarts;
+      LOG(WARNING)
+          << "Line search direction algorithm: "
+          << LineSearchDirectionTypeToString(options.line_search_direction_type)
+          << ", failed to produce a valid new direction at iteration: "
+          << iteration_summary.iteration << ". Restarting, number of "
+          << "restarts: " << num_line_search_direction_restarts << " / "
+          << options.max_num_line_search_direction_restarts << " [max].";
+      line_search_direction.reset(
+          LineSearchDirection::Create(line_search_direction_options));
       current_state.search_direction = -current_state.gradient;
     }
 
@@ -227,16 +273,34 @@
 
     // TODO(sameeragarwal): Refactor this into its own object and add
     // explanations for the various choices.
-    const double initial_step_size = (iteration_summary.iteration == 1)
+    //
+    // Note that we use !line_search_status to ensure that we treat cases when
+    // we restarted the line search direction equivalently to the first
+    // iteration.
+    const double initial_step_size =
+        (iteration_summary.iteration == 1 || !line_search_status)
         ? min(1.0, 1.0 / current_state.gradient_max_norm)
         : min(1.0, 2.0 * (current_state.cost - previous_state.cost) /
               current_state.directional_derivative);
+    // By definition, we should only ever go forwards along the specified search
+    // direction in a line search, most likely cause for this being violated
+    // would be a numerical failure in the line search direction calculation.
+    if (initial_step_size < 0.0) {
+      summary->error =
+          StringPrintf("Numerical failure in line search, initial_step_size is "
+                       "negative: %.5e, directional_derivative: %.5e, "
+                       "(current_cost - previous_cost): %.5e",
+                       initial_step_size, current_state.directional_derivative,
+                       (current_state.cost - previous_state.cost));
+      LOG(WARNING) << summary->error;
+      summary->termination_type = NUMERICAL_FAILURE;
+      break;
+    }
 
-    line_search.Search(line_search_options,
-                       initial_step_size,
-                       current_state.cost,
-                       current_state.directional_derivative,
-                       &line_search_summary);
+    line_search->Search(initial_step_size,
+                        current_state.cost,
+                        current_state.directional_derivative,
+                        &line_search_summary);
 
     current_state.step_size = line_search_summary.optimal_step_size;
     delta = current_state.step_size * current_state.search_direction;
@@ -282,7 +346,11 @@
     iteration_summary.step_norm = delta.norm();
     iteration_summary.step_size =  current_state.step_size;
     iteration_summary.line_search_function_evaluations =
-        line_search_summary.num_evaluations;
+        line_search_summary.num_function_evaluations;
+    iteration_summary.line_search_gradient_evaluations =
+        line_search_summary.num_gradient_evaluations;
+    iteration_summary.line_search_iterations =
+        line_search_summary.num_iterations;
     iteration_summary.iteration_time_in_seconds =
         WallTimeInSeconds() - iteration_start_time;
     iteration_summary.cumulative_time_in_seconds =

diff --git a/internal/ceres/low_rank_inverse_hessian.cc b/internal/ceres/low_rank_inverse_hessian.cc
index 4fabd5b..372165f 100644
--- a/internal/ceres/low_rank_inverse_hessian.cc
+++ b/internal/ceres/low_rank_inverse_hessian.cc

@@ -35,12 +35,15 @@
 namespace ceres {
 namespace internal {
 
-LowRankInverseHessian::LowRankInverseHessian(int num_parameters,
-                                             int max_num_corrections)
+LowRankInverseHessian::LowRankInverseHessian(
+    int num_parameters,
+    int max_num_corrections,
+    bool use_approximate_eigenvalue_scaling)
     : num_parameters_(num_parameters),
       max_num_corrections_(max_num_corrections),
+      use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling),
       num_corrections_(0),
-      diagonal_(1.0),
+      approximate_eigenvalue_scale_(1.0),
       delta_x_history_(num_parameters, max_num_corrections),
       delta_gradient_history_(num_parameters, max_num_corrections),
       delta_x_dot_delta_gradient_(max_num_corrections) {
@@ -50,7 +53,8 @@
                                    const Vector& delta_gradient) {
   const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient);
   if (delta_x_dot_delta_gradient <= 1e-10) {
-    VLOG(2) << "Skipping LBFGS Update. " << delta_x_dot_delta_gradient;
+    VLOG(2) << "Skipping LBFGS Update, delta_x_dot_delta_gradient too small: "
+            << delta_x_dot_delta_gradient;
     return false;
   }
 
@@ -77,7 +81,8 @@
   delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient;
   delta_x_dot_delta_gradient_(num_corrections_ - 1) =
       delta_x_dot_delta_gradient;
-  diagonal_ = delta_x_dot_delta_gradient / delta_gradient.squaredNorm();
+  approximate_eigenvalue_scale_ =
+      delta_x_dot_delta_gradient / delta_gradient.squaredNorm();
   return true;
 }
 
@@ -96,7 +101,39 @@
     search_direction -= alpha(i) * delta_gradient_history_.col(i);
   }
 
-  search_direction *= diagonal_;
+  if (use_approximate_eigenvalue_scaling_) {
+    // Rescale the initial inverse Hessian approximation (H_0) to be iteratively
+    // updated so that it is of similar 'size' to the true inverse Hessian along
+    // the most recent search direction.  As shown in [1]:
+    //
+    //   \gamma_k = (delta_gradient_{k-1}' * delta_x_{k-1}) /
+    //              (delta_gradient_{k-1}' * delta_gradient_{k-1})
+    //
+    // Satisfies:
+    //
+    //   (1 / \lambda_m) <= \gamma_k <= (1 / \lambda_1)
+    //
+    // Where \lambda_1 & \lambda_m are the smallest and largest eigenvalues of
+    // the true Hessian (not the inverse) along the most recent search direction
+    // respectively.  Thus \gamma is an approximate eigenvalue of the true
+    // inverse Hessian, and choosing: H_0 = I * \gamma will yield a starting
+    // point that has a similar scale to the true inverse Hessian.  This
+    // technique is widely reported to often improve convergence, however this
+    // is not universally true, particularly if there are errors in the initial
+    // jacobians, or if there are significant differences in the sensitivity
+    // of the problem to the parameters (i.e. the range of the magnitudes of
+    // the components of the gradient is large).
+    //
+    // The original origin of this rescaling trick is somewhat unclear, the
+    // earliest reference appears to be Oren [1], however it is widely discussed
+    // without specific attributation in various texts including [2] (p143/178).
+    //
+    // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms Part II:
+    //     Implementation and experiments, Management Science,
+    //     20(5), 863-874, 1974.
+    // [2] Nocedal J., Wright S., Numerical Optimization, Springer, 1999.
+    search_direction *= approximate_eigenvalue_scale_;
+  }
 
   for (int i = 0; i < num_corrections_; ++i) {
     const double beta = delta_gradient_history_.col(i).dot(search_direction) /

diff --git a/internal/ceres/low_rank_inverse_hessian.h b/internal/ceres/low_rank_inverse_hessian.h
index 6f3fc0c..7d293d0 100644
--- a/internal/ceres/low_rank_inverse_hessian.h
+++ b/internal/ceres/low_rank_inverse_hessian.h

@@ -61,10 +61,16 @@
  public:
   // num_parameters is the row/column size of the Hessian.
   // max_num_corrections is the rank of the Hessian approximation.
+  // use_approximate_eigenvalue_scaling controls whether the initial
+  // inverse Hessian used during Right/LeftMultiply() is scaled by
+  // the approximate eigenvalue of the true inverse Hessian at the
+  // current operating point.
   // The approximation uses:
   // 2 * max_num_corrections * num_parameters + max_num_corrections
   // doubles.
-  LowRankInverseHessian(int num_parameters, int max_num_corrections);
+  LowRankInverseHessian(int num_parameters,
+                        int max_num_corrections,
+                        bool use_approximate_eigenvalue_scaling);
   virtual ~LowRankInverseHessian() {}
 
   // Update the low rank approximation. delta_x is the change in the
@@ -86,8 +92,9 @@
  private:
   const int num_parameters_;
   const int max_num_corrections_;
+  const bool use_approximate_eigenvalue_scaling_;
   int num_corrections_;
-  double diagonal_;
+  double approximate_eigenvalue_scale_;
   Matrix delta_x_history_;
   Matrix delta_gradient_history_;
   Vector delta_x_dot_delta_gradient_;

diff --git a/internal/ceres/minimizer.h b/internal/ceres/minimizer.h
index 300d2df..622e9ce 100644
--- a/internal/ceres/minimizer.h
+++ b/internal/ceres/minimizer.h

@@ -88,14 +88,25 @@
       nonlinear_conjugate_gradient_type =
           options.nonlinear_conjugate_gradient_type;
       max_lbfgs_rank = options.max_lbfgs_rank;
+      use_approximate_eigenvalue_bfgs_scaling =
+          options.use_approximate_eigenvalue_bfgs_scaling;
       line_search_interpolation_type =
           options.line_search_interpolation_type;
       min_line_search_step_size = options.min_line_search_step_size;
-      armijo_sufficient_decrease = options.armijo_sufficient_decrease;
-      min_armijo_relative_step_size_change =
-          options.min_armijo_relative_step_size_change;
-      max_armijo_relative_step_size_change =
-          options.max_armijo_relative_step_size_change;
+      line_search_sufficient_function_decrease =
+          options.line_search_sufficient_function_decrease;
+      max_line_search_step_contraction =
+          options.max_line_search_step_contraction;
+      min_line_search_step_contraction =
+          options.min_line_search_step_contraction;
+      max_num_line_search_step_size_iterations =
+          options.max_num_line_search_step_size_iterations;
+      max_num_line_search_direction_restarts =
+          options.max_num_line_search_direction_restarts;
+      line_search_sufficient_curvature_decrease =
+          options.line_search_sufficient_curvature_decrease;
+      max_line_search_step_expansion =
+          options.max_line_search_step_expansion;
       evaluator = NULL;
       trust_region_strategy = NULL;
       jacobian = NULL;
@@ -131,11 +142,16 @@
     LineSearchType line_search_type;
     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
     int max_lbfgs_rank;
+    bool use_approximate_eigenvalue_bfgs_scaling;
     LineSearchInterpolationType line_search_interpolation_type;
     double min_line_search_step_size;
-    double armijo_sufficient_decrease;
-    double min_armijo_relative_step_size_change;
-    double max_armijo_relative_step_size_change;
+    double line_search_sufficient_function_decrease;
+    double max_line_search_step_contraction;
+    double min_line_search_step_contraction;
+    int max_num_line_search_step_size_iterations;
+    int max_num_line_search_direction_restarts;
+    double line_search_sufficient_curvature_decrease;
+    double max_line_search_step_expansion;
 
 
     // List of callbacks that are executed by the Minimizer at the end

diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 3d58de7..8fc1305 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc

@@ -636,6 +636,87 @@
   summary->num_effective_parameters =
       original_program->NumEffectiveParameters();
 
+  // Validate values for configuration parameters supplied by user.
+  if ((original_options.line_search_direction_type == ceres::BFGS ||
+       original_options.line_search_direction_type == ceres::LBFGS) &&
+      original_options.line_search_type != ceres::WOLFE) {
+    summary->error =
+        string("Invalid configuration: require line_search_type == "
+               "ceres::WOLFE when using (L)BFGS to ensure that underlying "
+               "assumptions are guaranteed to be satisfied.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.max_lbfgs_rank == 0) {
+    summary->error =
+        string("Invalid configuration: require max_lbfgs_rank > 0");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.min_line_search_step_size <= 0.0) {
+    summary->error = "Invalid configuration: min_line_search_step_size <= 0.0.";
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.line_search_sufficient_function_decrease <= 0.0) {
+    summary->error =
+        string("Invalid configuration: require ") +
+        string("line_search_sufficient_function_decrease <= 0.0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.max_line_search_step_contraction <= 0.0 ||
+      original_options.max_line_search_step_contraction >= 1.0) {
+    summary->error = string("Invalid configuration: require ") +
+        string("0.0 < max_line_search_step_contraction < 1.0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.min_line_search_step_contraction <=
+      original_options.max_line_search_step_contraction ||
+      original_options.min_line_search_step_contraction > 1.0) {
+    summary->error = string("Invalid configuration: require ") +
+        string("max_line_search_step_contraction < ") +
+        string("min_line_search_step_contraction <= 1.0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  // Warn user if they have requested BISECTION interpolation, but constraints
+  // on max/min step size change during line search prevent bisection scaling
+  // from occurring. Warn only, as this is likely a user mistake, but one which
+  // does not prevent us from continuing.
+  LOG_IF(WARNING,
+         (original_options.line_search_interpolation_type == ceres::BISECTION &&
+          (original_options.max_line_search_step_contraction > 0.5 ||
+           original_options.min_line_search_step_contraction < 0.5)))
+      << "Line search interpolation type is BISECTION, but specified "
+      << "max_line_search_step_contraction: "
+      << original_options.max_line_search_step_contraction << ", and "
+      << "min_line_search_step_contraction: "
+      << original_options.min_line_search_step_contraction
+      << ", prevent bisection (0.5) scaling, continuing with solve regardless.";
+  if (original_options.max_num_line_search_step_size_iterations <= 0) {
+    summary->error = string("Invalid configuration: require ") +
+        string("max_num_line_search_step_size_iterations > 0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.line_search_sufficient_curvature_decrease <=
+      original_options.line_search_sufficient_function_decrease ||
+      original_options.line_search_sufficient_curvature_decrease > 1.0) {
+    summary->error = string("Invalid configuration: require ") +
+        string("line_search_sufficient_function_decrease < ") +
+        string("line_search_sufficient_curvature_decrease < 1.0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+  if (original_options.max_line_search_step_expansion <= 1.0) {
+    summary->error = string("Invalid configuration: require ") +
+        string("max_line_search_step_expansion > 1.0.");
+    LOG(ERROR) << summary->error;
+    return;
+  }
+
   // Empty programs are usually a user error.
   if (summary->num_parameter_blocks == 0) {
     summary->error = "Problem contains no parameter blocks.";

diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc
index e88cdd6..42990e3 100644
--- a/internal/ceres/types.cc
+++ b/internal/ceres/types.cc

@@ -165,6 +165,7 @@
     CASESTR(STEEPEST_DESCENT);
     CASESTR(NONLINEAR_CONJUGATE_GRADIENT);
     CASESTR(LBFGS);
+    CASESTR(BFGS);
     default:
       return "UNKNOWN";
   }
@@ -176,12 +177,14 @@
   STRENUM(STEEPEST_DESCENT);
   STRENUM(NONLINEAR_CONJUGATE_GRADIENT);
   STRENUM(LBFGS);
+  STRENUM(BFGS);
   return false;
 }
 
 const char* LineSearchTypeToString(LineSearchType type) {
   switch (type) {
     CASESTR(ARMIJO);
+    CASESTR(WOLFE);
     default:
       return "UNKNOWN";
   }
@@ -190,6 +193,7 @@
 bool StringToLineSearchType(string value, LineSearchType* type) {
   UpperCase(&value);
   STRENUM(ARMIJO);
+  STRENUM(WOLFE);
   return false;
 }
commit	9aa0e3cf7243a2e837bbfa22d4677010463f6a4e	[log] [tgz]
author	Alex Stewart <alexs.mac@gmail.com>	Fri Jul 05 20:22:37 2013 +0100
committer	Keir Mierle <mierle@gmail.com>	Wed Jul 17 08:14:20 2013 +0000
tree	7a7f5d12f260b4fbce2885a6e0107465be5f6d9a
parent	51c772c843ccecca006c706a9f64b8cbaf5416f9 [diff]