Speed up corrector.cc
Remove the Eigen temporary by revealing the columnwise nature
of the computation. This also allows us to get rid of the
special case for nrow = 1.
On problem-356-226730-pre.txt with -robustify evaluation times
change from:
Before:
Residual Evaluations 1.015
Jacobian Evaluations 18.313
After:
Residual Evaluations 1.005
Jacobian Evaluations 8.382
To give a sense of the overhead reduction, compare these numbers
when loss functions are disabled.
Residual Evaluations 0.955
Jacobian Evaluations 7.772
So, this is a 17.5x speedup!
The one dimensional specialization was motivated by denoising.cc.
The evaluation times there are essentially unchanged.
Before:
Residual Evaluations 2.774
Jacobian Evaluations 20.178
After:
Residual Evaluations 2.588
Jacobian Evaluations 19.781
Change-Id: Ic0efbaed75fe4489635039f17189ae24b97802c8
diff --git a/internal/ceres/corrector.cc b/internal/ceres/corrector.cc
index c3858ab..60269a6 100644
--- a/internal/ceres/corrector.cc
+++ b/internal/ceres/corrector.cc
@@ -32,7 +32,6 @@
#include <cstddef>
#include <cmath>
-#include "ceres/internal/eigen.h"
#include "glog/logging.h"
namespace ceres {
@@ -90,7 +89,7 @@
// 0.5 * alpha^2 - alpha - rho'' / rho' * z'z = 0.
//
// Start by calculating the discriminant D.
- const double D = 1.0 + 2.0 * sq_norm*rho[2] / rho[1];
+ const double D = 1.0 + 2.0 * sq_norm * rho[2] / rho[1];
// Since both rho[1] and rho[2] are guaranteed to be positive at
// this point, we know that D > 1.0.
@@ -102,29 +101,43 @@
alpha_sq_norm_ = alpha / sq_norm;
}
-void Corrector::CorrectResiduals(int nrow, double* residuals) {
+void Corrector::CorrectResiduals(int num_rows, double* residuals) {
DCHECK(residuals != NULL);
- VectorRef r_ref(residuals, nrow);
// Equation 11 in BANS.
- r_ref *= residual_scaling_;
+ for (int r = 0; r < num_rows; ++r) {
+ residuals[r] *= residual_scaling_;
+ }
}
-void Corrector::CorrectJacobian(int nrow, int ncol,
- double* residuals, double* jacobian) {
+void Corrector::CorrectJacobian(int num_rows,
+ int num_cols,
+ double* residuals,
+ double* jacobian) {
DCHECK(residuals != NULL);
DCHECK(jacobian != NULL);
+ // Equation 11 in BANS.
+ //
+ // J = sqrt(rho) * (J - alpha^2 r * r' J)
+ //
+ // In days gone by this loop used to be a single Eigen expression of
+ // the form
+ //
+ // J = sqrt_rho1_ * (J - alpha_sq_norm_ * r* (r.transpose() * J));
+ //
+ // Which turns out to about 17x slower on bal problems. The reason
+ // is that Eigen is unable to figure out that this expression can be
+ // evaluated columnwise and ends up creating a temporary.
+ for (int c = 0; c < num_cols; ++c) {
+ double r_transpose_j = 0.0;
+ for (int r = 0; r < num_rows; ++r) {
+ r_transpose_j += jacobian[r * num_cols + c] * residuals[r];
+ }
- if (nrow == 1) {
- // Specialization for the case where the residual is a scalar.
- VectorRef j_ref(jacobian, ncol);
- j_ref *= sqrt_rho1_ * (1.0 - alpha_sq_norm_ * pow(*residuals, 2));
- } else {
- ConstVectorRef r_ref(residuals, nrow);
- MatrixRef j_ref(jacobian, nrow, ncol);
-
- // Equation 11 in BANS.
- j_ref = sqrt_rho1_ * (j_ref - alpha_sq_norm_ *
- r_ref * (r_ref.transpose() * j_ref));
+ for (int r = 0; r < num_rows; ++r) {
+ jacobian[r * num_cols + c] = sqrt_rho1_ *
+ (jacobian[r * num_cols + c] -
+ alpha_sq_norm_ * residuals[r] * r_transpose_j);
+ }
}
}