Non-monotonic trust region algorithm.

Non-monotonic trust region algorithm based on the work of Phil Toint, as
described in

Non-monotone trust region algorithms for nonlinear
optimization subject to convex constraints.
Philippe L. Toint
Mathematical Programming 77 (1997), 69-94.

Change-Id: I199ecc644e8d1a8cb43666052aef66fb93e15569
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index 5e1b3a6..bb8d3b4 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -86,6 +86,8 @@
 DEFINE_bool(use_block_amd, true, "Use a block oriented fill reducing ordering.");
 DEFINE_string(trust_region_strategy, "lm", "Options are: lm, dogleg");
 DEFINE_double(max_solver_time, 1e32, "Maximum solve time in seconds.");
+DEFINE_bool(nonmonotonic_steps, false, "Trust region algorithm can use"
+            " nonmonotic steps");
 
 namespace ceres {
 namespace examples {
@@ -221,6 +223,7 @@
   options->num_threads = FLAGS_num_threads;
   options->eta = FLAGS_eta;
   options->max_solver_time_in_seconds = FLAGS_max_solver_time;
+  options->use_nonmonotonic_steps = FLAGS_nonmonotonic_steps;
   if (FLAGS_trust_region_strategy == "lm") {
     options->trust_region_strategy_type = LEVENBERG_MARQUARDT;
   } else if (FLAGS_trust_region_strategy == "dogleg") {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 4d59bde..8a58cb1 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -58,6 +58,8 @@
     // Default constructor that sets up a generic sparse problem.
     Options() {
       trust_region_strategy_type = LEVENBERG_MARQUARDT;
+      use_nonmonotonic_steps = false;
+      max_consecutive_nonmonotonic_steps = 5;
       max_num_iterations = 50;
       max_solver_time_in_seconds = 1e9;
       num_threads = 1;
@@ -119,6 +121,34 @@
 
     TrustRegionStrategyType trust_region_strategy_type;
 
+    // The classical trust region methods are descent methods, in that
+    // they only accept a point if it strictly reduces the value of
+    // the objective function.
+    //
+    // Relaxing this requirement allows the algorithm to be more
+    // efficient in the long term at the cost of some local increase
+    // in the value of the objective function.
+    //
+    // This is because allowing for non-decreasing objective function
+    // values in a princpled manner allows the algorithm to "jump over
+    // boulders" as the method is not restricted to move into narrow
+    // valleys while preserving its convergence properties.
+    //
+    // Setting use_nonmonotonic_steps to true enables the
+    // non-monotonic trust region algorithm as described by Conn,
+    // Gould & Toint in "Trust Region Methods", Section 10.1.
+    //
+    // The parameter max_consecutive_nonmonotonic_steps controls the
+    // window size used by the step selection algorithm to accept
+    // non-monotonic steps.
+    //
+    // Even though the value of the objective function may be larger
+    // than the minimum value encountered over the course of the
+    // optimization, the final parameters returned to the user are the
+    // ones corresponding to the minimum cost over all iterations.
+    bool use_nonmonotonic_steps;
+    int max_consecutive_nonmonotonic_steps;
+
     // Maximum number of iterations for the minimizer to run for.
     int max_num_iterations;
 
diff --git a/internal/ceres/minimizer.h b/internal/ceres/minimizer.h
index 70b530f..28b76ce 100644
--- a/internal/ceres/minimizer.h
+++ b/internal/ceres/minimizer.h
@@ -68,6 +68,9 @@
       min_relative_decrease = options.min_relative_decrease;
       eta = options.eta;
       jacobi_scaling = options.jacobi_scaling;
+      use_nonmonotonic_steps = options.use_nonmonotonic_steps;
+      max_consecutive_nonmonotonic_steps =
+          options.max_consecutive_nonmonotonic_steps;
       lsqp_dump_directory = options.lsqp_dump_directory;
       lsqp_iterations_to_dump = options.lsqp_iterations_to_dump;
       lsqp_dump_format_type = options.lsqp_dump_format_type;
@@ -96,6 +99,8 @@
     double min_relative_decrease;
     double eta;
     bool jacobi_scaling;
+    bool use_nonmonotonic_steps;
+    bool max_consecutive_nonmonotonic_steps;
     vector<int> lsqp_iterations_to_dump;
     DumpFormatType lsqp_dump_format_type;
     string lsqp_dump_directory;
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index 357d822..a6cb830 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc
@@ -132,7 +132,8 @@
   const int num_effective_parameters = evaluator->NumEffectiveParameters();
   const int num_residuals = evaluator->NumResiduals();
 
-  VectorRef x(parameters, num_parameters);
+  VectorRef x_min(parameters, num_parameters);
+  Vector x = x_min;
   double x_norm = x.norm();
 
   Vector residuals(num_residuals);
@@ -145,8 +146,8 @@
 
   IterationSummary iteration_summary;
   iteration_summary.iteration = 0;
-  iteration_summary.step_is_valid=false;
-  iteration_summary.step_is_successful=false;
+  iteration_summary.step_is_valid = false;
+  iteration_summary.step_is_successful = false;
   iteration_summary.cost = summary->initial_cost;
   iteration_summary.cost_change = 0.0;
   iteration_summary.gradient_max_norm = 0.0;
@@ -167,6 +168,13 @@
     return;
   }
 
+  int num_consecutive_nonmonotonic_steps = 0;
+  double minimum_cost = cost;
+  double reference_cost = cost;
+  double accumulated_reference_model_cost_change = 0.0;
+  double candidate_cost = cost;
+  double accumulated_candidate_model_cost_change = 0.0;
+
   gradient.setZero();
   jacobian->LeftMultiply(residuals.data(), gradient.data());
   iteration_summary.gradient_max_norm = gradient.lpNorm<Eigen::Infinity>();
@@ -366,10 +374,43 @@
         return;
       }
 
-      iteration_summary.relative_decrease =
+      const double relative_decrease =
           iteration_summary.cost_change / model_cost_change;
+
+      const double historical_relative_decrease =
+          (reference_cost - new_cost) /
+          (accumulated_reference_model_cost_change + model_cost_change);
+
+      // If monotonic steps are being used, then the relative_decrease
+      // is the usual ratio of the change in objective function value
+      // divided by the change in model cost.
+      //
+      // If non-monotonic steps are allowed, then we take the maximum
+      // of the relative_decrease and the
+      // historical_relative_decrease, which measures the increase
+      // from a reference iteration. The model cost change is
+      // estimated by accumulating the model cost changes since the
+      // reference iteration. The historical relative_decrease offers
+      // a boost to a step which is not too bad compared to the
+      // reference iteration, allowing for non-monotonic steps.
+      iteration_summary.relative_decrease =
+          options.use_nonmonotonic_steps
+          ? max(relative_decrease, historical_relative_decrease)
+          : relative_decrease;
+
       iteration_summary.step_is_successful =
           iteration_summary.relative_decrease > options_.min_relative_decrease;
+
+      if (iteration_summary.step_is_successful) {
+        accumulated_candidate_model_cost_change += model_cost_change;
+        accumulated_reference_model_cost_change += model_cost_change;
+        if (relative_decrease <= options_.min_relative_decrease) {
+          VLOG(2) << "Non-monotonic step! "
+                  << " relative_decrease: " << relative_decrease
+                  << " historical_relative_decrease: "
+                  << historical_relative_decrease;
+        }
+      }
     }
 
     if (iteration_summary.step_is_successful) {
@@ -377,6 +418,7 @@
       strategy->StepAccepted(iteration_summary.relative_decrease);
       x = x_plus_delta;
       x_norm = x.norm();
+
       // Step looks good, evaluate the residuals and Jacobian at this
       // point.
       if (!evaluator->Evaluate(x.data(),
@@ -405,6 +447,47 @@
       if (options_.jacobi_scaling) {
         jacobian->ScaleColumns(scale.data());
       }
+
+      // Update the best, reference and candidate iterates.
+      //
+      // Based on algorithm 10.1.2 (page 357) of "Trust Region
+      // Methods" by Conn Gould & Toint, or equations 33-40 of
+      // "Non-monotone trust-region algorithms for nonlinear
+      // optimization subject to convex constraints" by Phil Toint,
+      // Mathematical Programming, 77, 1997.
+      if (cost < minimum_cost) {
+        // A step that improves solution quality was found.
+        x_min = x;
+        minimum_cost = cost;
+        // Set the candidate iterate to the current point.
+        candidate_cost = cost;
+        num_consecutive_nonmonotonic_steps = 0;
+        accumulated_candidate_model_cost_change = 0.0;
+      } else {
+        ++num_consecutive_nonmonotonic_steps;
+        if (cost > candidate_cost) {
+          // The current iterate is has a higher cost than the
+          // candidate iterate. Set the candidate to this point.
+          VLOG(2) << "Updating the candidate iterate to the current point.";
+          candidate_cost = cost;
+          accumulated_candidate_model_cost_change = 0.0;
+        }
+
+        // At this point we have made too many non-monotonic steps and
+        // we are going to reset the value of the reference iterate so
+        // as to force the algorithm to descend.
+        //
+        // This is the case because the candidate iterate has a value
+        // greater than minimum_cost but smaller than the reference
+        // iterate.
+        if (num_consecutive_nonmonotonic_steps ==
+            options.max_consecutive_nonmonotonic_steps) {
+          VLOG(2) << "Resetting the reference point to the candidate point";
+          reference_cost = candidate_cost;
+          accumulated_reference_model_cost_change =
+              accumulated_candidate_model_cost_change;
+        }
+      }
     } else {
       ++summary->num_unsuccessful_steps;
       if (iteration_summary.step_is_valid) {