Speed up DenseSparseMatrix::SquareColumnNorm. Because we store the matrix as row major matrix, the obvious Eigen expression performs rather poorly. A straight c++ loop speeds things up considerably. Also replace use of matrix() with direct use of m_. Change-Id: I3d6166df4765ad8400ab9602a54b65fd21b1d50f
diff --git a/internal/ceres/dense_sparse_matrix.cc b/internal/ceres/dense_sparse_matrix.cc index cef219f..e71546b 100644 --- a/internal/ceres/dense_sparse_matrix.cc +++ b/internal/ceres/dense_sparse_matrix.cc
@@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2022 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -60,16 +60,28 @@ void DenseSparseMatrix::SetZero() { m_.setZero(); } void DenseSparseMatrix::RightMultiply(const double* x, double* y) const { - VectorRef(y, num_rows()) += matrix() * ConstVectorRef(x, num_cols()); + VectorRef(y, num_rows()).noalias() += m_ * ConstVectorRef(x, num_cols()); } void DenseSparseMatrix::LeftMultiply(const double* x, double* y) const { - VectorRef(y, num_cols()) += - matrix().transpose() * ConstVectorRef(x, num_rows()); + VectorRef(y, num_cols()).noalias() += + m_.transpose() * ConstVectorRef(x, num_rows()); } void DenseSparseMatrix::SquaredColumnNorm(double* x) const { - VectorRef(x, num_cols()) = m_.colwise().squaredNorm(); + // This implementation is 3x faster than the naive version + // x = m_.colwise().square().sum(), likely because m_ + // is a row major matrix. + + const int num_rows = m_.rows(); + const int num_cols = m_.cols(); + std::fill_n(x, num_cols, 0.0); + const double* m = m_.data(); + for (int i = 0; i < num_rows; ++i) { + for (int j = 0; j < num_cols; ++j, ++m) { + x[j] += (*m) * (*m); + } + } } void DenseSparseMatrix::ScaleColumns(const double* scale) {