Speedup corrector.cc

Add a specialization for the common case where the residual block
outputs exactly one residual.

The matrix routines used by Corrector can be then specialized to
a scalar and be made considerably faster.

For denoising upto 400% speedup is observed.

Change-Id: I8e3f24b8ba41caa8e62ad97c5f5e96ab6ea47150
diff --git a/internal/ceres/corrector.cc b/internal/ceres/corrector.cc
index eff4dff..c3858ab 100644
--- a/internal/ceres/corrector.cc
+++ b/internal/ceres/corrector.cc
@@ -113,12 +113,19 @@
                                 double* residuals, double* jacobian) {
   DCHECK(residuals != NULL);
   DCHECK(jacobian != NULL);
-  ConstVectorRef r_ref(residuals, nrow);
-  MatrixRef j_ref(jacobian, nrow, ncol);
 
-  // Equation 11 in BANS.
-  j_ref = sqrt_rho1_ * (j_ref - alpha_sq_norm_ *
-                        r_ref * (r_ref.transpose() * j_ref));
+  if (nrow == 1) {
+    // Specialization for the case where the residual is a scalar.
+    VectorRef j_ref(jacobian, ncol);
+    j_ref *= sqrt_rho1_ * (1.0 - alpha_sq_norm_ * pow(*residuals, 2));
+  } else {
+    ConstVectorRef r_ref(residuals, nrow);
+    MatrixRef j_ref(jacobian, nrow, ncol);
+
+    // Equation 11 in BANS.
+    j_ref = sqrt_rho1_ * (j_ref - alpha_sq_norm_ *
+                          r_ref * (r_ref.transpose() * j_ref));
+  }
 }
 
 }  // namespace internal