Caching the symbolic Cholesky factorization when using CXSparse Average factorization times for bundle adjustment test problem: SuiteSparse: 0.2794 s. CXSparse: 0.4039 s. CXSparse cached: 0.2399 s. CXSparse will still be slower, though, because it has to compute the transpose and J^T * J. Change-Id: If9cdaa3dd520bee84b56e5fd4953b56a93db6bde
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 46c9a96..16f25cf 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -45,6 +45,7 @@ conditioned_cost_function.cc conjugate_gradients_solver.cc corrector.cc + cxsparse.cc dense_normal_cholesky_solver.cc dense_qr_solver.cc dense_sparse_matrix.cc
diff --git a/internal/ceres/cxsparse.cc b/internal/ceres/cxsparse.cc new file mode 100644 index 0000000..f54d24f --- /dev/null +++ b/internal/ceres/cxsparse.cc
@@ -0,0 +1,127 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2012 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: strandmark@google.com (Petter Strandmark) + +#ifndef CERES_NO_CXSPARSE + +#include "ceres/cxsparse.h" + +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/triplet_sparse_matrix.h" +#include "glog/logging.h" + +namespace ceres { +namespace internal { + +CXSparse::CXSparse() : scratch_size_(0), scratch_(NULL) { +} + +CXSparse::~CXSparse() { + if (scratch_size_ > 0) { + cs_free(scratch_); + } +} + +bool CXSparse::SolveCholesky(cs_di* A, cs_dis* factor, double* b) { + // Make sure we have enough scratch space available. + if (scratch_size_ < A->n) { + if (scratch_size_ > 0) { + cs_free(scratch_); + } + scratch_ = reinterpret_cast<CS_ENTRY*>(cs_malloc(A->n, sizeof(CS_ENTRY))); + } + + // Solve using Cholesky factorization + csn* N = cs_chol(A, factor); + if (N == NULL) { + LOG(WARNING) << "Cholesky factorization failed."; + return false; + } + + // When the Cholesky factorization succeeded, these methods are guaranteed to + // succeed as well. In the comments below, "x" refers to the scratch space. + // Set x = P * b. + cs_ipvec(factor->pinv, b, scratch_, A->n); + + // Set x = L \ x. + cs_lsolve(N->L, scratch_); + + // Set x = L' \ x. + cs_ltsolve(N->L, scratch_); + + // Set b = P' * x. + cs_pvec(factor->pinv, scratch_, b, A->n); + + // Free Cholesky factorization. + cs_nfree(N); + return true; +} + +cs_dis* CXSparse::AnalyzeCholesky(cs_di* A) { + // order = 1 for Cholesky factorization. + return cs_schol(1, A); +} + +cs_di CXSparse::CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A) { + cs_di At; + At.m = A->num_cols(); + At.n = A->num_rows(); + At.nz = -1; + At.nzmax = A->num_nonzeros(); + At.p = A->mutable_rows(); + At.i = A->mutable_cols(); + At.x = A->mutable_values(); + return At; +} + +cs_di* CXSparse::CreateSparseMatrix(TripletSparseMatrix* tsm) { + cs_di_sparse tsm_wrapper; + tsm_wrapper.nzmax = tsm->num_nonzeros();; + tsm_wrapper.nz = tsm->num_nonzeros();; + tsm_wrapper.m = tsm->num_rows(); + tsm_wrapper.n = tsm->num_cols(); + tsm_wrapper.p = tsm->mutable_cols(); + tsm_wrapper.i = tsm->mutable_rows(); + tsm_wrapper.x = tsm->mutable_values(); + + return cs_compress(&tsm_wrapper); +} + +void CXSparse::Free(cs_di* factor) { + cs_free(factor); +} + +void CXSparse::Free(cs_dis* factor) { + cs_sfree(factor); +} + +} // namespace internal +} // namespace ceres + +#endif // CERES_NO_CXSPARSE
diff --git a/internal/ceres/cxsparse.h b/internal/ceres/cxsparse.h new file mode 100644 index 0000000..a126510 --- /dev/null +++ b/internal/ceres/cxsparse.h
@@ -0,0 +1,90 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2012 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: strandmark@google.com (Petter Strandmark) + +#ifndef CERES_INTERNAL_CXSPARSE_H_ +#define CERES_INTERNAL_CXSPARSE_H_ + +#ifndef CERES_NO_CXSPARSE + +#include "cs.h" + +namespace ceres { +namespace internal { + +class CompressedRowSparseMatrix; +class TripletSparseMatrix; + +// This object provides access to solving linear systems using Cholesky +// factorization with a known symbolic factorization. This features does not +// explicity exist in CXSparse. The methods in the class are nonstatic because +// the class manages internal scratch space. +class CXSparse { + public: + CXSparse(); + ~CXSparse(); + + // Solves a symmetric linear system A * x = b using Cholesky factorization. + // A - The system matrix. + // factor - The symbolic factorization of A. This is obtained from + // AnalyzeCholesky. + // b - The right hand size of the linear equation. This array will also + // recieve the solution. + // Returns false if Cholesky factorization of A fails. + bool SolveCholesky(cs_di* A, cs_dis* factor, double* b); + + // Creates a sparse matrix from a compressed-column form. No memory is + // allocated or copied; the structure A is filled out with info from the + // argument. + cs_di CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A); + + // Creates a new matrix from a triplet form. Deallocate the returned matrix + // with Free. May return NULL if the compression or allocation fails. + cs_di* CreateSparseMatrix(TripletSparseMatrix* A); + + // Computes a symbolic factorization of A that can be used in SolveCholesky. + // The returned matrix should be deallocated with Free when not used anymore. + cs_dis* AnalyzeCholesky(cs_di* A); + + // Deallocates the memory of a matrix obtained from AnalyzeCholesky. + void Free(cs_di* factor); + void Free(cs_dis* factor); + + private: + // Cached scratch space + CS_ENTRY* scratch_; + int scratch_size_; +}; + +} // namespace internal +} // namespace ceres + +#endif // CERES_NO_CXSPARSE + +#endif // CERES_INTERNAL_CXSPARSE_H_
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index f7597e0..b9224d8 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc
@@ -152,6 +152,10 @@ #ifndef CERES_NO_SUITESPARSE factor_ = NULL; #endif // CERES_NO_SUITESPARSE + +#ifndef CERES_NO_CXSPARSE + cxsparse_factor_ = NULL; +#endif // CERES_NO_CXSPARSE } SparseSchurComplementSolver::~SparseSchurComplementSolver() { @@ -161,6 +165,13 @@ factor_ = NULL; } #endif // CERES_NO_SUITESPARSE + +#ifndef CERES_NO_CXSPARSE + if (cxsparse_factor_ != NULL) { + cxsparse_.Free(cxsparse_factor_); + cxsparse_factor_ = NULL; + } +#endif // CERES_NO_CXSPARSE } // Determine the non-zero blocks in the Schur Complement matrix, and @@ -350,22 +361,18 @@ return true; } - cs_di_sparse tsm_wrapper; - tsm_wrapper.nzmax = tsm->num_nonzeros(); - tsm_wrapper.m = num_rows; - tsm_wrapper.n = num_rows; - tsm_wrapper.p = tsm->mutable_cols(); - tsm_wrapper.i = tsm->mutable_rows(); - tsm_wrapper.x = tsm->mutable_values(); - tsm_wrapper.nz = tsm->num_nonzeros(); - - cs_di_sparse* lhs = cs_compress(&tsm_wrapper); + cs_di* lhs = CHECK_NOTNULL(cxsparse_.CreateSparseMatrix(tsm)); VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows); - // It maybe worth caching the ordering here, but for now we are - // going to go with the simple cholsol based implementation. - int ok = cs_di_cholsol(1, lhs, solution); - cs_free(lhs); + // Compute symbolic factorization if not available. + if (cxsparse_factor_ == NULL) { + cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(lhs)); + } + + // Solve the linear system. + bool ok = cxsparse_.SolveCholesky(lhs, cxsparse_factor_, solution); + + cxsparse_.Free(lhs); return ok; } #else
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h index a8f0852..ea1b318 100644 --- a/internal/ceres/schur_complement_solver.h +++ b/internal/ceres/schur_complement_solver.h
@@ -33,9 +33,11 @@ #include <set> #include <utility> + #include "ceres/block_random_access_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/cxsparse.h" #include "ceres/linear_solver.h" #include "ceres/schur_eliminator.h" #include "ceres/suitesparse.h" @@ -169,6 +171,12 @@ // once and reused in subsequent calls. cholmod_factor* factor_; #endif // CERES_NO_SUITESPARSE + +#ifndef CERES_NO_CXSPARSE + CXSparse cxsparse_; + // Cached factorization + cs_dis* cxsparse_factor_; +#endif // CERES_NO_CXSPARSE CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver); };
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc index a7c43ef..9e00b44 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -55,6 +55,10 @@ #ifndef CERES_NO_SUITESPARSE factor_ = NULL; #endif + +#ifndef CERES_NO_CXSPARSE + cxsparse_factor_ = NULL; +#endif // CERES_NO_CXSPARSE } SparseNormalCholeskySolver::~SparseNormalCholeskySolver() { @@ -64,6 +68,13 @@ factor_ = NULL; } #endif + +#ifndef CERES_NO_CXSPARSE + if (cxsparse_factor_ != NULL) { + cxsparse_.Free(cxsparse_factor_); + cxsparse_factor_ = NULL; + } +#endif // CERES_NO_CXSPARSE } LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl( @@ -108,14 +119,7 @@ VectorRef(x, num_cols).setZero(); // Wrap the augmented Jacobian in a compressed sparse column matrix. - cs_di At; - At.m = A->num_cols(); - At.n = A->num_rows(); - At.nz = -1; - At.nzmax = A->num_nonzeros(); - At.p = A->mutable_rows(); - At.i = A->mutable_cols(); - At.x = A->mutable_values(); + cs_di At = cxsparse_.CreateSparseMatrixTransposeView(A); // Compute the normal equations. J'J delta = J'f and solve them // using a sparse Cholesky factorization. Notice that when compared @@ -127,20 +131,23 @@ cs_di* A2 = cs_transpose(&At, 1); cs_di* AtA = cs_multiply(&At,A2); - cs_free(A2); + cxsparse_.Free(A2); if (per_solve_options.D != NULL) { A->DeleteRows(num_cols); } - // This recomputes the symbolic factorization every time it is - // invoked. It will perhaps be worth it to cache the symbolic - // factorization the way we do for SuiteSparse. - if (cs_cholsol(1, AtA, Atb.data())) { + // Compute symbolic factorization if not available. + if (cxsparse_factor_ == NULL) { + cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(AtA)); + } + + // Solve the linear system. + if (cxsparse_.SolveCholesky(AtA, cxsparse_factor_, Atb.data())) { VectorRef(x, Atb.rows()) = Atb; summary.termination_type = TOLERANCE; } - cs_free(AtA); + cxsparse_.Free(AtA); return summary; } #else
diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h index 6366f86..40d9e0a 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.h +++ b/internal/ceres/sparse_normal_cholesky_solver.h
@@ -34,13 +34,10 @@ #ifndef CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_ #define CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_ -#ifndef CERES_NO_SUITESPARSE -#include "cholmod.h" -#include "ceres/suitesparse.h" -#endif // CERES_NO_SUITESPARSE - +#include "ceres/cxsparse.h" #include "ceres/linear_solver.h" #include "ceres/internal/macros.h" +#include "ceres/suitesparse.h" namespace ceres { namespace internal { @@ -80,6 +77,11 @@ cholmod_factor* factor_; #endif // CERES_NO_SUITESPARSE +#ifndef CERES_NO_CXSPARSE + CXSparse cxsparse_; + // Cached factorization + cs_dis* cxsparse_factor_; +#endif // CERES_NO_CXSPARSE const LinearSolver::Options options_; CERES_DISALLOW_COPY_AND_ASSIGN(SparseNormalCholeskySolver);