Non-monotonic trust region algorithm.
Non-monotonic trust region algorithm based on the work of Phil Toint, as
described in
Non-monotone trust region algorithms for nonlinear
optimization subject to convex constraints.
Philippe L. Toint
Mathematical Programming 77 (1997), 69-94.
Change-Id: I199ecc644e8d1a8cb43666052aef66fb93e15569
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index 5e1b3a6..bb8d3b4 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -86,6 +86,8 @@
DEFINE_bool(use_block_amd, true, "Use a block oriented fill reducing ordering.");
DEFINE_string(trust_region_strategy, "lm", "Options are: lm, dogleg");
DEFINE_double(max_solver_time, 1e32, "Maximum solve time in seconds.");
+DEFINE_bool(nonmonotonic_steps, false, "Trust region algorithm can use"
+ " nonmonotic steps");
namespace ceres {
namespace examples {
@@ -221,6 +223,7 @@
options->num_threads = FLAGS_num_threads;
options->eta = FLAGS_eta;
options->max_solver_time_in_seconds = FLAGS_max_solver_time;
+ options->use_nonmonotonic_steps = FLAGS_nonmonotonic_steps;
if (FLAGS_trust_region_strategy == "lm") {
options->trust_region_strategy_type = LEVENBERG_MARQUARDT;
} else if (FLAGS_trust_region_strategy == "dogleg") {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 4d59bde..8a58cb1 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -58,6 +58,8 @@
// Default constructor that sets up a generic sparse problem.
Options() {
trust_region_strategy_type = LEVENBERG_MARQUARDT;
+ use_nonmonotonic_steps = false;
+ max_consecutive_nonmonotonic_steps = 5;
max_num_iterations = 50;
max_solver_time_in_seconds = 1e9;
num_threads = 1;
@@ -119,6 +121,34 @@
TrustRegionStrategyType trust_region_strategy_type;
+ // The classical trust region methods are descent methods, in that
+ // they only accept a point if it strictly reduces the value of
+ // the objective function.
+ //
+ // Relaxing this requirement allows the algorithm to be more
+ // efficient in the long term at the cost of some local increase
+ // in the value of the objective function.
+ //
+ // This is because allowing for non-decreasing objective function
+ // values in a princpled manner allows the algorithm to "jump over
+ // boulders" as the method is not restricted to move into narrow
+ // valleys while preserving its convergence properties.
+ //
+ // Setting use_nonmonotonic_steps to true enables the
+ // non-monotonic trust region algorithm as described by Conn,
+ // Gould & Toint in "Trust Region Methods", Section 10.1.
+ //
+ // The parameter max_consecutive_nonmonotonic_steps controls the
+ // window size used by the step selection algorithm to accept
+ // non-monotonic steps.
+ //
+ // Even though the value of the objective function may be larger
+ // than the minimum value encountered over the course of the
+ // optimization, the final parameters returned to the user are the
+ // ones corresponding to the minimum cost over all iterations.
+ bool use_nonmonotonic_steps;
+ int max_consecutive_nonmonotonic_steps;
+
// Maximum number of iterations for the minimizer to run for.
int max_num_iterations;
diff --git a/internal/ceres/minimizer.h b/internal/ceres/minimizer.h
index 70b530f..28b76ce 100644
--- a/internal/ceres/minimizer.h
+++ b/internal/ceres/minimizer.h
@@ -68,6 +68,9 @@
min_relative_decrease = options.min_relative_decrease;
eta = options.eta;
jacobi_scaling = options.jacobi_scaling;
+ use_nonmonotonic_steps = options.use_nonmonotonic_steps;
+ max_consecutive_nonmonotonic_steps =
+ options.max_consecutive_nonmonotonic_steps;
lsqp_dump_directory = options.lsqp_dump_directory;
lsqp_iterations_to_dump = options.lsqp_iterations_to_dump;
lsqp_dump_format_type = options.lsqp_dump_format_type;
@@ -96,6 +99,8 @@
double min_relative_decrease;
double eta;
bool jacobi_scaling;
+ bool use_nonmonotonic_steps;
+ bool max_consecutive_nonmonotonic_steps;
vector<int> lsqp_iterations_to_dump;
DumpFormatType lsqp_dump_format_type;
string lsqp_dump_directory;
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index 357d822..a6cb830 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc
@@ -132,7 +132,8 @@
const int num_effective_parameters = evaluator->NumEffectiveParameters();
const int num_residuals = evaluator->NumResiduals();
- VectorRef x(parameters, num_parameters);
+ VectorRef x_min(parameters, num_parameters);
+ Vector x = x_min;
double x_norm = x.norm();
Vector residuals(num_residuals);
@@ -145,8 +146,8 @@
IterationSummary iteration_summary;
iteration_summary.iteration = 0;
- iteration_summary.step_is_valid=false;
- iteration_summary.step_is_successful=false;
+ iteration_summary.step_is_valid = false;
+ iteration_summary.step_is_successful = false;
iteration_summary.cost = summary->initial_cost;
iteration_summary.cost_change = 0.0;
iteration_summary.gradient_max_norm = 0.0;
@@ -167,6 +168,13 @@
return;
}
+ int num_consecutive_nonmonotonic_steps = 0;
+ double minimum_cost = cost;
+ double reference_cost = cost;
+ double accumulated_reference_model_cost_change = 0.0;
+ double candidate_cost = cost;
+ double accumulated_candidate_model_cost_change = 0.0;
+
gradient.setZero();
jacobian->LeftMultiply(residuals.data(), gradient.data());
iteration_summary.gradient_max_norm = gradient.lpNorm<Eigen::Infinity>();
@@ -366,10 +374,43 @@
return;
}
- iteration_summary.relative_decrease =
+ const double relative_decrease =
iteration_summary.cost_change / model_cost_change;
+
+ const double historical_relative_decrease =
+ (reference_cost - new_cost) /
+ (accumulated_reference_model_cost_change + model_cost_change);
+
+ // If monotonic steps are being used, then the relative_decrease
+ // is the usual ratio of the change in objective function value
+ // divided by the change in model cost.
+ //
+ // If non-monotonic steps are allowed, then we take the maximum
+ // of the relative_decrease and the
+ // historical_relative_decrease, which measures the increase
+ // from a reference iteration. The model cost change is
+ // estimated by accumulating the model cost changes since the
+ // reference iteration. The historical relative_decrease offers
+ // a boost to a step which is not too bad compared to the
+ // reference iteration, allowing for non-monotonic steps.
+ iteration_summary.relative_decrease =
+ options.use_nonmonotonic_steps
+ ? max(relative_decrease, historical_relative_decrease)
+ : relative_decrease;
+
iteration_summary.step_is_successful =
iteration_summary.relative_decrease > options_.min_relative_decrease;
+
+ if (iteration_summary.step_is_successful) {
+ accumulated_candidate_model_cost_change += model_cost_change;
+ accumulated_reference_model_cost_change += model_cost_change;
+ if (relative_decrease <= options_.min_relative_decrease) {
+ VLOG(2) << "Non-monotonic step! "
+ << " relative_decrease: " << relative_decrease
+ << " historical_relative_decrease: "
+ << historical_relative_decrease;
+ }
+ }
}
if (iteration_summary.step_is_successful) {
@@ -377,6 +418,7 @@
strategy->StepAccepted(iteration_summary.relative_decrease);
x = x_plus_delta;
x_norm = x.norm();
+
// Step looks good, evaluate the residuals and Jacobian at this
// point.
if (!evaluator->Evaluate(x.data(),
@@ -405,6 +447,47 @@
if (options_.jacobi_scaling) {
jacobian->ScaleColumns(scale.data());
}
+
+ // Update the best, reference and candidate iterates.
+ //
+ // Based on algorithm 10.1.2 (page 357) of "Trust Region
+ // Methods" by Conn Gould & Toint, or equations 33-40 of
+ // "Non-monotone trust-region algorithms for nonlinear
+ // optimization subject to convex constraints" by Phil Toint,
+ // Mathematical Programming, 77, 1997.
+ if (cost < minimum_cost) {
+ // A step that improves solution quality was found.
+ x_min = x;
+ minimum_cost = cost;
+ // Set the candidate iterate to the current point.
+ candidate_cost = cost;
+ num_consecutive_nonmonotonic_steps = 0;
+ accumulated_candidate_model_cost_change = 0.0;
+ } else {
+ ++num_consecutive_nonmonotonic_steps;
+ if (cost > candidate_cost) {
+ // The current iterate is has a higher cost than the
+ // candidate iterate. Set the candidate to this point.
+ VLOG(2) << "Updating the candidate iterate to the current point.";
+ candidate_cost = cost;
+ accumulated_candidate_model_cost_change = 0.0;
+ }
+
+ // At this point we have made too many non-monotonic steps and
+ // we are going to reset the value of the reference iterate so
+ // as to force the algorithm to descend.
+ //
+ // This is the case because the candidate iterate has a value
+ // greater than minimum_cost but smaller than the reference
+ // iterate.
+ if (num_consecutive_nonmonotonic_steps ==
+ options.max_consecutive_nonmonotonic_steps) {
+ VLOG(2) << "Resetting the reference point to the candidate point";
+ reference_cost = candidate_cost;
+ accumulated_reference_model_cost_change =
+ accumulated_candidate_model_cost_change;
+ }
+ }
} else {
++summary->num_unsuccessful_steps;
if (iteration_summary.step_is_valid) {