Do not implicitly negate the step in the TrustRegionMinimizer. In the TrustRegionMinimizer, the step is currently implicitly negated. This is done so that the linearized residual is |r - J*step|^2, which corresponds to J*step = r, so neither J nor r have to be modified. However, it leads to the rather unintuitive situation that the strategy returns a step in positive gradient direction, which you would expect to increase the function value. One way is to rename the "step" parameter in the strategy to "negative_step" and document it. This patch instead moves the negation inside the strategy, just around the linear solver call, so that it is done in a local context and easier to document. Change-Id: Idb258149a01f61c64e22128ea221c5a30cd89c89
diff --git a/internal/ceres/dogleg_strategy.cc b/internal/ceres/dogleg_strategy.cc index 620bda7..44d484f 100644 --- a/internal/ceres/dogleg_strategy.cc +++ b/internal/ceres/dogleg_strategy.cc
@@ -142,7 +142,7 @@ } void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) { - // alpha * gradient is the Cauchy point. + // alpha * -gradient is the Cauchy point. Vector Jg(jacobian->num_rows()); Jg.setZero(); // The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g)) @@ -173,7 +173,7 @@ // the trust region. Rescale the Cauchy point to the trust region // and return. if (gradient_norm * alpha_ >= radius_) { - dogleg_step = (radius_ / gradient_norm) * gradient_; + dogleg_step = -(radius_ / gradient_norm) * gradient_; dogleg_step_norm_ = radius_; dogleg_step.array() /= diagonal_.array(); VLOG(3) << "Cauchy step size: " << dogleg_step_norm_ @@ -186,15 +186,15 @@ // points and the point on it which intersects the trust region // boundary. - // a = alpha * gradient + // a = alpha * -gradient // b = gauss_newton_step - const double b_dot_a = alpha_ * gradient_.dot(gauss_newton_step_); + const double b_dot_a = -alpha_ * gradient_.dot(gauss_newton_step_); const double a_squared_norm = pow(alpha_ * gradient_norm, 2.0); const double b_minus_a_squared_norm = a_squared_norm - 2 * b_dot_a + pow(gauss_newton_norm, 2); // c = a' (b - a) - // = alpha * gradient' gauss_newton_step - alpha^2 |gradient|^2 + // = alpha * -gradient' gauss_newton_step - alpha^2 |gradient|^2 const double c = b_dot_a - a_squared_norm; const double d = sqrt(c * c + b_minus_a_squared_norm * (pow(radius_, 2.0) - a_squared_norm)); @@ -203,7 +203,7 @@ (c <= 0) ? (d - c) / b_minus_a_squared_norm : (radius_ * radius_ - a_squared_norm) / (d + c); - dogleg_step = (alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_; + dogleg_step = (-alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_; dogleg_step_norm_ = dogleg_step.norm(); dogleg_step.array() /= diagonal_.array(); VLOG(3) << "Dogleg step size: " << dogleg_step_norm_ @@ -255,6 +255,9 @@ lm_diagonal_ = diagonal_ * std::sqrt(mu_); solve_options.D = lm_diagonal_.data(); + // As in the LevenbergMarquardtStrategy, solve Jy = r instead + // of Jx = -r and later set x = -y to avoid having to modify + // either jacobian or residuals. InvalidateArray(n, gauss_newton_step_.data()); linear_solver_summary = linear_solver_->Solve(jacobian, residuals, @@ -277,7 +280,7 @@ // = - D (J^T J)^-1 D D^-1 g // = D -(J^T J)^-1 g // - gauss_newton_step_.array() *= diagonal_.array(); + gauss_newton_step_.array() *= -diagonal_.array(); return linear_solver_summary; }
diff --git a/internal/ceres/levenberg_marquardt_strategy.cc b/internal/ceres/levenberg_marquardt_strategy.cc index 298748f..576648a 100644 --- a/internal/ceres/levenberg_marquardt_strategy.cc +++ b/internal/ceres/levenberg_marquardt_strategy.cc
@@ -98,12 +98,18 @@ // to happen for the DENSE_QR and then DENSE_SCHUR solver when // the Jacobin is severly rank deficient and mu is too small. InvalidateArray(num_parameters, step); + + // Instead of solving Jx = -r, solve Jy = r. + // Then x can be found as x = -y, but the inputs jacobian and residuals + // do not need to be modified. LinearSolver::Summary linear_solver_summary = linear_solver_->Solve(jacobian, residuals, solve_options, step); if (linear_solver_summary.termination_type == FAILURE || !IsArrayValid(num_parameters, step)) { LOG(WARNING) << "Linear solver failure. Failed to compute a finite step."; linear_solver_summary.termination_type = FAILURE; + } else { + VectorRef(step, num_parameters) *= -1.0; } reuse_diagonal_ = true;
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc index d08255f..476cc9d 100644 --- a/internal/ceres/trust_region_minimizer.cc +++ b/internal/ceres/trust_region_minimizer.cc
@@ -271,8 +271,8 @@ double new_model_cost = 0.0; if (strategy_summary.termination_type != FAILURE) { - // new_model_cost = 1/2 |J * step - f|^2 - model_residuals = -residuals; + // new_model_cost = 1/2 |f + J * step|^2 + model_residuals = residuals; jacobian->RightMultiply(trust_region_step.data(), model_residuals.data()); new_model_cost = model_residuals.squaredNorm() / 2.0; @@ -328,7 +328,7 @@ const double model_cost_change = max(kEpsilon, cost - new_model_cost); // Undo the Jacobian column scaling. - delta = -(trust_region_step.array() * scale.array()).matrix(); + delta = (trust_region_step.array() * scale.array()).matrix(); iteration_summary.step_norm = delta.norm(); // Convergence based on parameter_tolerance.