Speed up corrector.cc Remove the Eigen temporary by revealing the columnwise nature of the computation. This also allows us to get rid of the special case for nrow = 1. On problem-356-226730-pre.txt with -robustify evaluation times change from: Before: Residual Evaluations 1.015 Jacobian Evaluations 18.313 After: Residual Evaluations 1.005 Jacobian Evaluations 8.382 To give a sense of the overhead reduction, compare these numbers when loss functions are disabled. Residual Evaluations 0.955 Jacobian Evaluations 7.772 So, this is a 17.5x speedup! The one dimensional specialization was motivated by denoising.cc. The evaluation times there are essentially unchanged. Before: Residual Evaluations 2.774 Jacobian Evaluations 20.178 After: Residual Evaluations 2.588 Jacobian Evaluations 19.781 Change-Id: Ic0efbaed75fe4489635039f17189ae24b97802c8
diff --git a/internal/ceres/corrector.cc b/internal/ceres/corrector.cc index c3858ab..60269a6 100644 --- a/internal/ceres/corrector.cc +++ b/internal/ceres/corrector.cc
@@ -32,7 +32,6 @@ #include <cstddef> #include <cmath> -#include "ceres/internal/eigen.h" #include "glog/logging.h" namespace ceres { @@ -90,7 +89,7 @@ // 0.5 * alpha^2 - alpha - rho'' / rho' * z'z = 0. // // Start by calculating the discriminant D. - const double D = 1.0 + 2.0 * sq_norm*rho[2] / rho[1]; + const double D = 1.0 + 2.0 * sq_norm * rho[2] / rho[1]; // Since both rho[1] and rho[2] are guaranteed to be positive at // this point, we know that D > 1.0. @@ -102,29 +101,43 @@ alpha_sq_norm_ = alpha / sq_norm; } -void Corrector::CorrectResiduals(int nrow, double* residuals) { +void Corrector::CorrectResiduals(int num_rows, double* residuals) { DCHECK(residuals != NULL); - VectorRef r_ref(residuals, nrow); // Equation 11 in BANS. - r_ref *= residual_scaling_; + for (int r = 0; r < num_rows; ++r) { + residuals[r] *= residual_scaling_; + } } -void Corrector::CorrectJacobian(int nrow, int ncol, - double* residuals, double* jacobian) { +void Corrector::CorrectJacobian(int num_rows, + int num_cols, + double* residuals, + double* jacobian) { DCHECK(residuals != NULL); DCHECK(jacobian != NULL); + // Equation 11 in BANS. + // + // J = sqrt(rho) * (J - alpha^2 r * r' J) + // + // In days gone by this loop used to be a single Eigen expression of + // the form + // + // J = sqrt_rho1_ * (J - alpha_sq_norm_ * r* (r.transpose() * J)); + // + // Which turns out to about 17x slower on bal problems. The reason + // is that Eigen is unable to figure out that this expression can be + // evaluated columnwise and ends up creating a temporary. + for (int c = 0; c < num_cols; ++c) { + double r_transpose_j = 0.0; + for (int r = 0; r < num_rows; ++r) { + r_transpose_j += jacobian[r * num_cols + c] * residuals[r]; + } - if (nrow == 1) { - // Specialization for the case where the residual is a scalar. - VectorRef j_ref(jacobian, ncol); - j_ref *= sqrt_rho1_ * (1.0 - alpha_sq_norm_ * pow(*residuals, 2)); - } else { - ConstVectorRef r_ref(residuals, nrow); - MatrixRef j_ref(jacobian, nrow, ncol); - - // Equation 11 in BANS. - j_ref = sqrt_rho1_ * (j_ref - alpha_sq_norm_ * - r_ref * (r_ref.transpose() * j_ref)); + for (int r = 0; r < num_rows; ++r) { + jacobian[r * num_cols + c] = sqrt_rho1_ * + (jacobian[r * num_cols + c] - + alpha_sq_norm_ * residuals[r] * r_transpose_j); + } } }
diff --git a/internal/ceres/corrector.h b/internal/ceres/corrector.h index 9914641..2137221 100644 --- a/internal/ceres/corrector.h +++ b/internal/ceres/corrector.h
@@ -66,7 +66,7 @@ explicit Corrector(double sq_norm, const double rho[3]); // residuals *= sqrt(rho[1]) / (1 - alpha) - void CorrectResiduals(int nrow, double* residuals); + void CorrectResiduals(int num_rows, double* residuals); // jacobian = sqrt(rho[1]) * jacobian - // sqrt(rho[1]) * alpha / sq_norm * residuals residuals' * jacobian. @@ -74,8 +74,10 @@ // The method assumes that the jacobian has row-major storage. It is // the caller's responsibility to ensure that the pointer to // jacobian is not null. - void CorrectJacobian(int nrow, int ncol, - double* residuals, double* jacobian); + void CorrectJacobian(int num_rows, + int num_cols, + double* residuals, + double* jacobian); private: double sqrt_rho1_;