Speed up DenseSparseMatrix::SquareColumnNorm.
Because we store the matrix as row major matrix, the
obvious Eigen expression performs rather poorly. A straight
c++ loop speeds things up considerably.
Also replace use of matrix() with direct use of m_.
Change-Id: I3d6166df4765ad8400ab9602a54b65fd21b1d50f
diff --git a/internal/ceres/dense_sparse_matrix.cc b/internal/ceres/dense_sparse_matrix.cc
index cef219f..e71546b 100644
--- a/internal/ceres/dense_sparse_matrix.cc
+++ b/internal/ceres/dense_sparse_matrix.cc
@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2015 Google Inc. All rights reserved.
+// Copyright 2022 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -60,16 +60,28 @@
void DenseSparseMatrix::SetZero() { m_.setZero(); }
void DenseSparseMatrix::RightMultiply(const double* x, double* y) const {
- VectorRef(y, num_rows()) += matrix() * ConstVectorRef(x, num_cols());
+ VectorRef(y, num_rows()).noalias() += m_ * ConstVectorRef(x, num_cols());
}
void DenseSparseMatrix::LeftMultiply(const double* x, double* y) const {
- VectorRef(y, num_cols()) +=
- matrix().transpose() * ConstVectorRef(x, num_rows());
+ VectorRef(y, num_cols()).noalias() +=
+ m_.transpose() * ConstVectorRef(x, num_rows());
}
void DenseSparseMatrix::SquaredColumnNorm(double* x) const {
- VectorRef(x, num_cols()) = m_.colwise().squaredNorm();
+ // This implementation is 3x faster than the naive version
+ // x = m_.colwise().square().sum(), likely because m_
+ // is a row major matrix.
+
+ const int num_rows = m_.rows();
+ const int num_cols = m_.cols();
+ std::fill_n(x, num_cols, 0.0);
+ const double* m = m_.data();
+ for (int i = 0; i < num_rows; ++i) {
+ for (int j = 0; j < num_cols; ++j, ++m) {
+ x[j] += (*m) * (*m);
+ }
+ }
}
void DenseSparseMatrix::ScaleColumns(const double* scale) {