Get rid of redundant function evaluations in LineSearchMinimizer

1. Replace LineSearch::Summary::optimal_step_size with
   LineSearch:Summary::optimal_point which is a FunctionSample.
2. Add the actual vector position and vector gradient of the
   point in the FunctionSample
3. Use the above two to get rid of an extraneous function evalation
   in LineSearchMinimizer.

Runtime performance is almost 2x improved as a result.

Thanks to @svenpilz for reporting this.

https://github.com/ceres-solver/ceres-solver/issues/296

Change-Id: Iebf2db7acecb2c95c9b1683b73cdc5faab78b02e
diff --git a/internal/ceres/function_sample.cc b/internal/ceres/function_sample.cc
index 01f3136..2fd3dbd 100644
--- a/internal/ceres/function_sample.cc
+++ b/internal/ceres/function_sample.cc
@@ -34,6 +34,35 @@
 namespace ceres {
 namespace internal {
 
+FunctionSample::FunctionSample()
+    : x(0.0),
+      vector_x_is_valid(false),
+      value(0.0),
+      value_is_valid(false),
+      vector_gradient_is_valid(false),
+      gradient(0.0),
+      gradient_is_valid(false) {}
+
+FunctionSample::FunctionSample(const double x, const double value)
+    : x(x),
+      vector_x_is_valid(false),
+      value(value),
+      value_is_valid(true),
+      vector_gradient_is_valid(false),
+      gradient(0.0),
+      gradient_is_valid(false) {}
+
+FunctionSample::FunctionSample(const double x,
+                               const double value,
+                               const double gradient)
+    : x(x),
+      vector_x_is_valid(false),
+      value(value),
+      value_is_valid(true),
+      vector_gradient_is_valid(false),
+      gradient(gradient),
+      gradient_is_valid(true) {}
+
 std::string FunctionSample::ToDebugString() const {
   return StringPrintf("[x: %.8e, value: %.8e, gradient: %.8e, "
                       "value_is_valid: %d, gradient_is_valid: %d]",
diff --git a/internal/ceres/function_sample.h b/internal/ceres/function_sample.h
index e4356c6..df79aef 100644
--- a/internal/ceres/function_sample.h
+++ b/internal/ceres/function_sample.h
@@ -32,29 +32,62 @@
 #define CERES_INTERNAL_FUNCTION_SAMPLE_H_
 
 #include <string>
+#include "ceres/internal/eigen.h"
 
 namespace ceres {
 namespace internal {
 
-// Clients can use this struct to communicate the value of the
-// function and or its gradient at a given point x.
+// FunctionSample is used by the line search routines to store and
+// communicate the value and (optionally) the gradient of the function
+// being minimized.
+//
+// Since line search as the name implies happens along a certain
+// line/direction. FunctionSample contains the information in two
+// ways. Information in the ambient space and information along the
+// direction of search.
 struct FunctionSample {
-  FunctionSample()
-      : x(0.0),
-        value(0.0),
-        value_is_valid(false),
-        gradient(0.0),
-        gradient_is_valid(false) {
-  }
+  FunctionSample();
+  FunctionSample(double x, double value);
+  FunctionSample(double x, double value, double gradient);
+
   std::string ToDebugString() const;
 
+  // x is the location of the sample along the search direction.
   double x;
-  double value;      // value = f(x)
+
+  // Let p be a point and d be the search direction then
+  //
+  // vector_x = p + x * d;
+  Vector vector_x;
+  // True if vector_x has been assigned a valid value.
+  bool vector_x_is_valid;
+
+  // value = f(vector_x)
+  double value;
+  // True of the evaluation was successful and value is a finite
+  // number.
   bool value_is_valid;
-  double gradient;   // gradient = f'(x)
+
+  // vector_gradient = Df(vector_position);
+  //
+  // D is the derivative operator.
+  Vector vector_gradient;
+  // True if the vector gradient was evaluated and the evaluation was
+  // successful (the value is a finite number).
+  bool vector_gradient_is_valid;
+
+  // gradient = d.transpose() * vector_gradient
+  //
+  // where d is the search direction.
+  double gradient;
+  // True if the evaluation of the gradient was sucessful and the
+  // value is a finite number.
   bool gradient_is_valid;
 };
 
+
+
+
 }  // namespace internal
 }  // namespace ceres
 
diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc
index 3d946dc..bf7c3d4 100644
--- a/internal/ceres/line_search.cc
+++ b/internal/ceres/line_search.cc
@@ -54,30 +54,8 @@
 namespace {
 // Precision used for floating point values in error message output.
 const int kErrorMessageNumericPrecision = 8;
-
-FunctionSample ValueSample(const double x, const double value) {
-  FunctionSample sample;
-  sample.x = x;
-  sample.value = value;
-  sample.value_is_valid = true;
-  return sample;
-}
-
-FunctionSample ValueAndGradientSample(const double x,
-                                      const double value,
-                                      const double gradient) {
-  FunctionSample sample;
-  sample.x = x;
-  sample.value = value;
-  sample.gradient = gradient;
-  sample.value_is_valid = true;
-  sample.gradient_is_valid = true;
-  return sample;
-}
-
 }  // namespace
 
-
 ostream& operator<<(ostream &os, const FunctionSample& sample);
 
 // Convenience stream operator for pushing FunctionSamples into log messages.
@@ -113,9 +91,7 @@
     : evaluator_(evaluator),
       position_(evaluator->NumParameters()),
       direction_(evaluator->NumEffectiveParameters()),
-      evaluation_point_(evaluator->NumParameters()),
       scaled_direction_(evaluator->NumEffectiveParameters()),
-      gradient_(evaluator->NumEffectiveParameters()),
       initial_evaluator_residual_time_in_seconds(0.0),
       initial_evaluator_jacobian_time_in_seconds(0.0) {}
 
@@ -129,33 +105,44 @@
                                   const bool evaluate_gradient,
                                   FunctionSample* output) {
   output->x = x;
+  output->vector_x_is_valid = false;
   output->value_is_valid = false;
   output->gradient_is_valid = false;
+  output->vector_gradient_is_valid = false;
 
   scaled_direction_ = output->x * direction_;
+  output->vector_x.resize(position_.rows(), 1);
   if (!evaluator_->Plus(position_.data(),
                         scaled_direction_.data(),
-                        evaluation_point_.data())) {
+                        output->vector_x.data())) {
     return;
   }
+  output->vector_x_is_valid = true;
 
-  const bool eval_status =
-      evaluator_->Evaluate(evaluation_point_.data(),
-                           &(output->value),
-                           NULL,
-                           evaluate_gradient ? gradient_.data() : NULL,
-                           NULL);
+  double* gradient = NULL;
+  if (evaluate_gradient) {
+    output->vector_gradient.resize(direction_.rows(), 1);
+    gradient = output->vector_gradient.data();
+  }
+  const bool eval_status = evaluator_->Evaluate(
+      output->vector_x.data(), &(output->value), NULL, gradient, NULL);
 
   if (!eval_status || !IsFinite(output->value)) {
     return;
   }
 
   output->value_is_valid = true;
-  if (evaluate_gradient) {
-    output->gradient = direction_.dot(gradient_);
+  if (!evaluate_gradient) {
+    return;
   }
-  output->gradient_is_valid = IsFinite(output->gradient);
-  return;
+
+  output->gradient = direction_.dot(output->vector_gradient);
+  if (!IsFinite(output->gradient)) {
+    return;
+  }
+
+  output->gradient_is_valid = true;
+  output->vector_gradient_is_valid = true;
 }
 
 double LineSearchFunction::DirectionInfinityNorm() const {
@@ -200,7 +187,6 @@
   summary->cost_evaluation_time_in_seconds = 0.0;
   summary->gradient_evaluation_time_in_seconds = 0.0;
   summary->polynomial_minimization_time_in_seconds = 0.0;
-
   options().function->ResetTimeStatistics();
   this->DoSearch(step_size_estimate, initial_cost, initial_gradient, summary);
   options().function->
@@ -254,12 +240,12 @@
   if (interpolation_type == QUADRATIC) {
     // Two point interpolation using function values and the
     // gradient at the lower bound.
-    samples.push_back(ValueSample(current.x, current.value));
+    samples.push_back(FunctionSample(current.x, current.value));
 
     if (previous.value_is_valid) {
       // Three point interpolation, using function values and the
       // gradient at the lower bound.
-      samples.push_back(ValueSample(previous.x, previous.value));
+      samples.push_back(FunctionSample(previous.x, previous.value));
     }
   } else if (interpolation_type == CUBIC) {
     // Two point interpolation using the function values and the gradients.
@@ -297,8 +283,9 @@
 
   // Note initial_cost & initial_gradient are evaluated at step_size = 0,
   // not step_size_estimate, which is our starting guess.
-  const FunctionSample initial_position =
-      ValueAndGradientSample(0.0, initial_cost, initial_gradient);
+  FunctionSample initial_position(0.0, initial_cost, initial_gradient);
+  initial_position.vector_x = function->position();
+  initial_position.vector_x_is_valid = true;
 
   const double descent_direction_max_norm = function->DirectionInfinityNorm();
   FunctionSample previous;
@@ -365,7 +352,7 @@
     function->Evaluate(step_size, kEvaluateGradient, &current);
   }
 
-  summary->optimal_step_size = current.x;
+  summary->optimal_point = current;
   summary->success = true;
 }
 
@@ -387,9 +374,9 @@
 
   // Note initial_cost & initial_gradient are evaluated at step_size = 0,
   // not step_size_estimate, which is our starting guess.
-  const FunctionSample initial_position =
-      ValueAndGradientSample(0.0, initial_cost, initial_gradient);
-
+  FunctionSample initial_position(0.0, initial_cost, initial_gradient);
+  initial_position.vector_x = options().function->position();
+  initial_position.vector_x_is_valid = true;
   bool do_zoom_search = false;
   // Important: The high/low in bracket_high & bracket_low refer to their
   // _function_ values, not their step sizes i.e. it is _not_ required that
@@ -431,7 +418,7 @@
     // produce a valid point when ArmijoLineSearch would succeed, we return the
     // point with the lowest cost found thus far which satsifies the Armijo
     // condition (but not the Wolfe conditions).
-    summary->optimal_step_size = bracket_low.x;
+    summary->optimal_point = bracket_low;
     summary->success = true;
     return;
   }
@@ -477,11 +464,13 @@
   // satisfies the strong Wolfe curvature condition, that we return the point
   // amongst those found thus far, which minimizes f() and satisfies the Armijo
   // condition.
-  solution =
-      solution.value_is_valid && solution.value <= bracket_low.value
-      ? solution : bracket_low;
 
-  summary->optimal_step_size = solution.x;
+  if (!solution.value_is_valid || solution.value > bracket_low.value) {
+    summary->optimal_point = bracket_low;
+  } else {
+    summary->optimal_point = solution;
+  }
+
   summary->success = true;
 }
 
diff --git a/internal/ceres/line_search.h b/internal/ceres/line_search.h
index b3f03b4..6e39a65 100644
--- a/internal/ceres/line_search.h
+++ b/internal/ceres/line_search.h
@@ -35,6 +35,7 @@
 
 #include <string>
 #include <vector>
+#include "ceres/function_sample.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/port.h"
 #include "ceres/types.h"
@@ -43,7 +44,6 @@
 namespace internal {
 
 class Evaluator;
-struct FunctionSample;
 class LineSearchFunction;
 
 // Line search is another name for a one dimensional optimization
@@ -155,7 +155,6 @@
   struct Summary {
     Summary()
         : success(false),
-          optimal_step_size(0.0),
           num_function_evaluations(0),
           num_gradient_evaluations(0),
           num_iterations(0),
@@ -165,7 +164,7 @@
           total_time_in_seconds(-1.0) {}
 
     bool success;
-    double optimal_step_size;
+    FunctionSample optimal_point;
     int num_function_evaluations;
     int num_gradient_evaluations;
     int num_iterations;
@@ -245,9 +244,8 @@
   // evaluate_gradient controls whether the gradient will be evaluated
   // or not.
   //
-  // On return output->value_is_valid and output->gradient_is_valid
-  // indicate whether output->value and output->gradient can be used
-  // or not.
+  // On return output->*_is_valid indicate indicate whether the
+  // corresponding fields have numerically valid values or not.
   void Evaluate(double x, bool evaluate_gradient, FunctionSample* output);
 
   double DirectionInfinityNorm() const;
@@ -256,18 +254,16 @@
   void ResetTimeStatistics();
   void TimeStatistics(double* cost_evaluation_time_in_seconds,
                       double* gradient_evaluation_time_in_seconds) const;
+  const Vector& position() const { return position_; }
+  const Vector& direction() const { return direction_; }
 
  private:
   Evaluator* evaluator_;
   Vector position_;
   Vector direction_;
 
-  // evaluation_point = Evaluator::Plus(position_,  x * direction_);
-  Vector evaluation_point_;
-
   // scaled_direction = x * direction_;
   Vector scaled_direction_;
-  Vector gradient_;
 
   // We may not exclusively own the evaluator (e.g. in the Trust Region
   // minimizer), hence we need to save the initial evaluation durations for the
diff --git a/internal/ceres/line_search_minimizer.cc b/internal/ceres/line_search_minimizer.cc
index 9516ba2..cc2e3c3 100644
--- a/internal/ceres/line_search_minimizer.cc
+++ b/internal/ceres/line_search_minimizer.cc
@@ -63,27 +63,14 @@
 namespace internal {
 namespace {
 
-// TODO(sameeragarwal): I think there is a small bug here, in that if
-// the evaluation fails, then the state can contain garbage. Look at
-// this more carefully.
-bool Evaluate(Evaluator* evaluator,
-              const Vector& x,
-              LineSearchMinimizer::State* state,
-              std::string* message) {
-  if (!evaluator->Evaluate(x.data(),
-                           &(state->cost),
-                           NULL,
-                           state->gradient.data(),
-                           NULL)) {
-    *message = "Gradient evaluation failed.";
-    return false;
-  }
-
+bool EvaluateGradientNorms(Evaluator* evaluator,
+                           const Vector& x,
+                           LineSearchMinimizer::State* state,
+                           std::string* message) {
   Vector negative_gradient = -state->gradient;
   Vector projected_gradient_step(x.size());
-  if (!evaluator->Plus(x.data(),
-                       negative_gradient.data(),
-                       projected_gradient_step.data())) {
+  if (!evaluator->Plus(
+          x.data(), negative_gradient.data(), projected_gradient_step.data())) {
     *message = "projected_gradient_step = Plus(x, -gradient) failed.";
     return false;
   }
@@ -116,9 +103,6 @@
   State current_state(num_parameters, num_effective_parameters);
   State previous_state(num_parameters, num_effective_parameters);
 
-  Vector delta(num_effective_parameters);
-  Vector x_plus_delta(num_parameters);
-
   IterationSummary iteration_summary;
   iteration_summary.iteration = 0;
   iteration_summary.step_is_valid = false;
@@ -131,7 +115,18 @@
   iteration_summary.step_solver_time_in_seconds = 0;
 
   // Do initial cost and gradient evaluation.
-  if (!Evaluate(evaluator, x, &current_state, &summary->message)) {
+  if (!evaluator->Evaluate(x.data(),
+                           &(current_state.cost),
+                           NULL,
+                           current_state.gradient.data(),
+                           NULL)) {
+    summary->termination_type = FAILURE;
+    summary->message = "Initial cost and jacobian evaluation failed.";
+    LOG_IF(WARNING, is_not_silent) << "Terminating: " << summary->message;
+    return;
+  }
+
+  if (!EvaluateGradientNorms(evaluator, x, &current_state, &summary->message)) {
     summary->termination_type = FAILURE;
     summary->message = "Initial cost and jacobian evaluation failed. "
         "More details: " + summary->message;
@@ -325,28 +320,34 @@
       break;
     }
 
-    current_state.step_size = line_search_summary.optimal_step_size;
-    delta = current_state.step_size * current_state.search_direction;
-
+    const FunctionSample& optimal_point = line_search_summary.optimal_point;
+    CHECK(optimal_point.vector_x_is_valid)
+        << "Congratulations, you found a bug in Ceres. Please report it.";
+    current_state.step_size = optimal_point.x;
     previous_state = current_state;
     iteration_summary.step_solver_time_in_seconds =
         WallTimeInSeconds() - iteration_start_time;
 
-    const double x_norm = x.norm();
-
-    if (!evaluator->Plus(x.data(), delta.data(), x_plus_delta.data())) {
-      summary->termination_type = FAILURE;
-      summary->message =
-          "x_plus_delta = Plus(x, delta) failed. This should not happen "
-          "as the step was valid when it was selected by the line search.";
-      LOG_IF(WARNING, is_not_silent) << "Terminating: " << summary->message;
-      break;
+    if (optimal_point.vector_gradient_is_valid) {
+      current_state.cost = optimal_point.value;
+      current_state.gradient = optimal_point.vector_gradient;
+    } else {
+      if (!evaluator->Evaluate(optimal_point.vector_x.data(),
+                               &(current_state.cost),
+                               NULL,
+                               current_state.gradient.data(),
+                               NULL)) {
+        summary->termination_type = FAILURE;
+        summary->message = "Cost and jacobian evaluation failed.";
+        LOG_IF(WARNING, is_not_silent) << "Terminating: " << summary->message;
+        return;
+      }
     }
 
-    if (!Evaluate(evaluator,
-                  x_plus_delta,
-                  &current_state,
-                  &summary->message)) {
+    if (!EvaluateGradientNorms(evaluator,
+                               optimal_point.vector_x,
+                               &current_state,
+                               &summary->message)) {
       summary->termination_type = FAILURE;
       summary->message =
           "Step failed to evaluate. This should not happen as the step was "
@@ -357,8 +358,9 @@
     }
 
     // Compute the norm of the step in the ambient space.
-    iteration_summary.step_norm = (x_plus_delta - x).norm();
-    x = x_plus_delta;
+    iteration_summary.step_norm = (optimal_point.vector_x - x).norm();
+    const double x_norm = x.norm();
+    x = optimal_point.vector_x;
 
     iteration_summary.gradient_max_norm = current_state.gradient_max_norm;
     iteration_summary.gradient_norm = sqrt(current_state.gradient_squared_norm);
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index d809906..3a779c6 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc
@@ -586,7 +586,7 @@
       line_search_summary.total_time_in_seconds;
 
   if (line_search_summary.success) {
-    *delta *= line_search_summary.optimal_step_size;
+    *delta *= line_search_summary.optimal_point.x;
   }
 }