Do not implicitly negate the step in the TrustRegionMinimizer.
In the TrustRegionMinimizer, the step is currently implicitly negated.
This is done so that the linearized residual is |r - J*step|^2, which
corresponds to J*step = r, so neither J nor r have to be modified.
However, it leads to the rather unintuitive situation that the strategy
returns a step in positive gradient direction, which you would expect to
increase the function value. One way is to rename the "step" parameter in
the strategy to "negative_step" and document it.
This patch instead moves the negation inside the strategy, just around
the linear solver call, so that it is done in a local context and easier
to document.
Change-Id: Idb258149a01f61c64e22128ea221c5a30cd89c89
diff --git a/internal/ceres/dogleg_strategy.cc b/internal/ceres/dogleg_strategy.cc
index 620bda7..44d484f 100644
--- a/internal/ceres/dogleg_strategy.cc
+++ b/internal/ceres/dogleg_strategy.cc
@@ -142,7 +142,7 @@
}
void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
- // alpha * gradient is the Cauchy point.
+ // alpha * -gradient is the Cauchy point.
Vector Jg(jacobian->num_rows());
Jg.setZero();
// The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
@@ -173,7 +173,7 @@
// the trust region. Rescale the Cauchy point to the trust region
// and return.
if (gradient_norm * alpha_ >= radius_) {
- dogleg_step = (radius_ / gradient_norm) * gradient_;
+ dogleg_step = -(radius_ / gradient_norm) * gradient_;
dogleg_step_norm_ = radius_;
dogleg_step.array() /= diagonal_.array();
VLOG(3) << "Cauchy step size: " << dogleg_step_norm_
@@ -186,15 +186,15 @@
// points and the point on it which intersects the trust region
// boundary.
- // a = alpha * gradient
+ // a = alpha * -gradient
// b = gauss_newton_step
- const double b_dot_a = alpha_ * gradient_.dot(gauss_newton_step_);
+ const double b_dot_a = -alpha_ * gradient_.dot(gauss_newton_step_);
const double a_squared_norm = pow(alpha_ * gradient_norm, 2.0);
const double b_minus_a_squared_norm =
a_squared_norm - 2 * b_dot_a + pow(gauss_newton_norm, 2);
// c = a' (b - a)
- // = alpha * gradient' gauss_newton_step - alpha^2 |gradient|^2
+ // = alpha * -gradient' gauss_newton_step - alpha^2 |gradient|^2
const double c = b_dot_a - a_squared_norm;
const double d = sqrt(c * c + b_minus_a_squared_norm *
(pow(radius_, 2.0) - a_squared_norm));
@@ -203,7 +203,7 @@
(c <= 0)
? (d - c) / b_minus_a_squared_norm
: (radius_ * radius_ - a_squared_norm) / (d + c);
- dogleg_step = (alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_;
+ dogleg_step = (-alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_;
dogleg_step_norm_ = dogleg_step.norm();
dogleg_step.array() /= diagonal_.array();
VLOG(3) << "Dogleg step size: " << dogleg_step_norm_
@@ -255,6 +255,9 @@
lm_diagonal_ = diagonal_ * std::sqrt(mu_);
solve_options.D = lm_diagonal_.data();
+ // As in the LevenbergMarquardtStrategy, solve Jy = r instead
+ // of Jx = -r and later set x = -y to avoid having to modify
+ // either jacobian or residuals.
InvalidateArray(n, gauss_newton_step_.data());
linear_solver_summary = linear_solver_->Solve(jacobian,
residuals,
@@ -277,7 +280,7 @@
// = - D (J^T J)^-1 D D^-1 g
// = D -(J^T J)^-1 g
//
- gauss_newton_step_.array() *= diagonal_.array();
+ gauss_newton_step_.array() *= -diagonal_.array();
return linear_solver_summary;
}
diff --git a/internal/ceres/levenberg_marquardt_strategy.cc b/internal/ceres/levenberg_marquardt_strategy.cc
index 298748f..576648a 100644
--- a/internal/ceres/levenberg_marquardt_strategy.cc
+++ b/internal/ceres/levenberg_marquardt_strategy.cc
@@ -98,12 +98,18 @@
// to happen for the DENSE_QR and then DENSE_SCHUR solver when
// the Jacobin is severly rank deficient and mu is too small.
InvalidateArray(num_parameters, step);
+
+ // Instead of solving Jx = -r, solve Jy = r.
+ // Then x can be found as x = -y, but the inputs jacobian and residuals
+ // do not need to be modified.
LinearSolver::Summary linear_solver_summary =
linear_solver_->Solve(jacobian, residuals, solve_options, step);
if (linear_solver_summary.termination_type == FAILURE ||
!IsArrayValid(num_parameters, step)) {
LOG(WARNING) << "Linear solver failure. Failed to compute a finite step.";
linear_solver_summary.termination_type = FAILURE;
+ } else {
+ VectorRef(step, num_parameters) *= -1.0;
}
reuse_diagonal_ = true;
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index d08255f..476cc9d 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc
@@ -271,8 +271,8 @@
double new_model_cost = 0.0;
if (strategy_summary.termination_type != FAILURE) {
- // new_model_cost = 1/2 |J * step - f|^2
- model_residuals = -residuals;
+ // new_model_cost = 1/2 |f + J * step|^2
+ model_residuals = residuals;
jacobian->RightMultiply(trust_region_step.data(), model_residuals.data());
new_model_cost = model_residuals.squaredNorm() / 2.0;
@@ -328,7 +328,7 @@
const double model_cost_change = max(kEpsilon, cost - new_model_cost);
// Undo the Jacobian column scaling.
- delta = -(trust_region_step.array() * scale.array()).matrix();
+ delta = (trust_region_step.array() * scale.array()).matrix();
iteration_summary.step_norm = delta.norm();
// Convergence based on parameter_tolerance.