Do not implicitly negate the step in the TrustRegionMinimizer. In the TrustRegionMinimizer, the step is currently implicitly negated. This is done so that the linearized residual is |r - J*step|^2, which corresponds to J*step = r, so neither J nor r have to be modified. However, it leads to the rather unintuitive situation that the strategy returns a step in positive gradient direction, which you would expect to increase the function value. One way is to rename the "step" parameter in the strategy to "negative_step" and document it. This patch instead moves the negation inside the strategy, just around the linear solver call, so that it is done in a local context and easier to document. Change-Id: Idb258149a01f61c64e22128ea221c5a30cd89c89

diff --git a/internal/ceres/dogleg_strategy.cc b/internal/ceres/dogleg_strategy.cc
index 620bda7..44d484f 100644
--- a/internal/ceres/dogleg_strategy.cc
+++ b/internal/ceres/dogleg_strategy.cc

@@ -142,7 +142,7 @@
 }
 
 void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
-  // alpha * gradient is the Cauchy point.
+  // alpha * -gradient is the Cauchy point.
   Vector Jg(jacobian->num_rows());
   Jg.setZero();
   // The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
@@ -173,7 +173,7 @@
   // the trust region. Rescale the Cauchy point to the trust region
   // and return.
   if  (gradient_norm * alpha_ >= radius_) {
-    dogleg_step = (radius_ / gradient_norm) * gradient_;
+    dogleg_step = -(radius_ / gradient_norm) * gradient_;
     dogleg_step_norm_ = radius_;
     dogleg_step.array() /= diagonal_.array();
     VLOG(3) << "Cauchy step size: " << dogleg_step_norm_
@@ -186,15 +186,15 @@
   // points and the point on it which intersects the trust region
   // boundary.
 
-  // a = alpha * gradient
+  // a = alpha * -gradient
   // b = gauss_newton_step
-  const double b_dot_a = alpha_ * gradient_.dot(gauss_newton_step_);
+  const double b_dot_a = -alpha_ * gradient_.dot(gauss_newton_step_);
   const double a_squared_norm = pow(alpha_ * gradient_norm, 2.0);
   const double b_minus_a_squared_norm =
       a_squared_norm - 2 * b_dot_a + pow(gauss_newton_norm, 2);
 
   // c = a' (b - a)
-  //   = alpha * gradient' gauss_newton_step - alpha^2 |gradient|^2
+  //   = alpha * -gradient' gauss_newton_step - alpha^2 |gradient|^2
   const double c = b_dot_a - a_squared_norm;
   const double d = sqrt(c * c + b_minus_a_squared_norm *
                         (pow(radius_, 2.0) - a_squared_norm));
@@ -203,7 +203,7 @@
       (c <= 0)
       ? (d - c) /  b_minus_a_squared_norm
       : (radius_ * radius_ - a_squared_norm) / (d + c);
-  dogleg_step = (alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_;
+  dogleg_step = (-alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_;
   dogleg_step_norm_ = dogleg_step.norm();
   dogleg_step.array() /= diagonal_.array();
   VLOG(3) << "Dogleg step size: " << dogleg_step_norm_
@@ -255,6 +255,9 @@
     lm_diagonal_ = diagonal_ * std::sqrt(mu_);
     solve_options.D = lm_diagonal_.data();
 
+    // As in the LevenbergMarquardtStrategy, solve Jy = r instead
+    // of Jx = -r and later set x = -y to avoid having to modify
+    // either jacobian or residuals.
     InvalidateArray(n, gauss_newton_step_.data());
     linear_solver_summary = linear_solver_->Solve(jacobian,
                                                   residuals,
@@ -277,7 +280,7 @@
   //   = - D (J^T J)^-1 D D^-1 g
   //   = D -(J^T J)^-1 g
   //
-  gauss_newton_step_.array() *= diagonal_.array();
+  gauss_newton_step_.array() *= -diagonal_.array();
 
   return linear_solver_summary;
 }

diff --git a/internal/ceres/levenberg_marquardt_strategy.cc b/internal/ceres/levenberg_marquardt_strategy.cc
index 298748f..576648a 100644
--- a/internal/ceres/levenberg_marquardt_strategy.cc
+++ b/internal/ceres/levenberg_marquardt_strategy.cc

@@ -98,12 +98,18 @@
   // to happen for the DENSE_QR and then DENSE_SCHUR solver when
   // the Jacobin is severly rank deficient and mu is too small.
   InvalidateArray(num_parameters, step);
+
+  // Instead of solving Jx = -r, solve Jy = r.
+  // Then x can be found as x = -y, but the inputs jacobian and residuals
+  // do not need to be modified.
   LinearSolver::Summary linear_solver_summary =
       linear_solver_->Solve(jacobian, residuals, solve_options, step);
   if (linear_solver_summary.termination_type == FAILURE ||
       !IsArrayValid(num_parameters, step)) {
     LOG(WARNING) << "Linear solver failure. Failed to compute a finite step.";
     linear_solver_summary.termination_type = FAILURE;
+  } else {
+    VectorRef(step, num_parameters) *= -1.0;
   }
 
   reuse_diagonal_ = true;

diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index d08255f..476cc9d 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc

@@ -271,8 +271,8 @@
 
     double new_model_cost = 0.0;
     if (strategy_summary.termination_type != FAILURE) {
-      // new_model_cost = 1/2 |J * step - f|^2
-      model_residuals = -residuals;
+      // new_model_cost = 1/2 |f + J * step|^2
+      model_residuals = residuals;
       jacobian->RightMultiply(trust_region_step.data(), model_residuals.data());
       new_model_cost = model_residuals.squaredNorm() / 2.0;
 
@@ -328,7 +328,7 @@
       const double model_cost_change = max(kEpsilon, cost - new_model_cost);
 
       // Undo the Jacobian column scaling.
-      delta =  -(trust_region_step.array() * scale.array()).matrix();
+      delta = (trust_region_step.array() * scale.array()).matrix();
       iteration_summary.step_norm = delta.norm();
 
       // Convergence based on parameter_tolerance.