Block ordering for SPARSE_SCHUR + CX_SPARSE. Uptil now only SuiteSparse when used with SPARSE_SCHUR would use the block structure of the reduced camera matrix to find a fill-reducing ordering. This leads to substantial speedup for some bundle adjustment problems. Credit for this technique goes to the authors of g2o. I learned about it from reading their source code. Change-Id: I5403efefd4d9552c9c6fc6e02a65498bdf171584
diff --git a/internal/ceres/cxsparse.cc b/internal/ceres/cxsparse.cc index 3fbc271..b7f2520 100644 --- a/internal/ceres/cxsparse.cc +++ b/internal/ceres/cxsparse.cc
@@ -32,7 +32,10 @@ #include "ceres/cxsparse.h" +#include <vector> +#include "ceres/compressed_col_sparse_matrix_utils.h" #include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/internal/port.h" #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h" @@ -44,24 +47,25 @@ CXSparse::~CXSparse() { if (scratch_size_ > 0) { - cs_free(scratch_); + cs_di_free(scratch_); } } + bool CXSparse::SolveCholesky(cs_di* A, cs_dis* symbolic_factorization, double* b) { // Make sure we have enough scratch space available. if (scratch_size_ < A->n) { if (scratch_size_ > 0) { - cs_free(scratch_); + cs_di_free(scratch_); } - scratch_ = reinterpret_cast<CS_ENTRY*>(cs_malloc(A->n, sizeof(CS_ENTRY))); + scratch_ = reinterpret_cast<CS_ENTRY*>(cs_di_malloc(A->n, sizeof(CS_ENTRY))); scratch_size_ = A->n; } // Solve using Cholesky factorization - csn* numeric_factorization = cs_chol(A, symbolic_factorization); + csn* numeric_factorization = cs_di_chol(A, symbolic_factorization); if (numeric_factorization == NULL) { LOG(WARNING) << "Cholesky factorization failed."; return false; @@ -71,19 +75,16 @@ // succeeded as well. In the comments below, "x" refers to the scratch space. // // Set x = P * b. - cs_ipvec(symbolic_factorization->pinv, b, scratch_, A->n); - + cs_di_ipvec(symbolic_factorization->pinv, b, scratch_, A->n); // Set x = L \ x. - cs_lsolve(numeric_factorization->L, scratch_); - + cs_di_lsolve(numeric_factorization->L, scratch_); // Set x = L' \ x. - cs_ltsolve(numeric_factorization->L, scratch_); - + cs_di_ltsolve(numeric_factorization->L, scratch_); // Set b = P' * x. - cs_pvec(symbolic_factorization->pinv, scratch_, b, A->n); + cs_di_pvec(symbolic_factorization->pinv, scratch_, b, A->n); // Free Cholesky factorization. - cs_nfree(numeric_factorization); + cs_di_nfree(numeric_factorization); return true; } @@ -92,6 +93,61 @@ return cs_schol(1, A); } +cs_dis* CXSparse::BlockAnalyzeCholesky(cs_di* A, + const vector<int>& row_blocks, + const vector<int>& col_blocks) { + const int num_row_blocks = row_blocks.size(); + const int num_col_blocks = col_blocks.size(); + + vector<int> block_rows; + vector<int> block_cols; + CompressedColumnScalarMatrixToBlockMatrix(A->i, + A->p, + row_blocks, + col_blocks, + &block_rows, + &block_cols); + cs_di block_matrix; + block_matrix.m = num_row_blocks; + block_matrix.n = num_col_blocks; + block_matrix.nz = -1; + block_matrix.nzmax = block_rows.size(); + block_matrix.p = &block_cols[0]; + block_matrix.i = &block_rows[0]; + block_matrix.x = NULL; + + int* ordering = cs_amd(1, &block_matrix); + vector<int> block_ordering(num_row_blocks, -1); + copy(ordering, ordering + num_row_blocks, &block_ordering[0]); + cs_free(ordering); + + vector<int> scalar_ordering; + BlockOrderingToScalarOrdering(row_blocks, block_ordering, &scalar_ordering); + + cs_dis* symbolic_factorization = reinterpret_cast<cs_dis*>(cs_calloc(1, sizeof(cs_dis))); + symbolic_factorization->pinv = cs_pinv(&scalar_ordering[0], A->n); + cs* permuted_A = cs_symperm(A, symbolic_factorization->pinv, 0); + + symbolic_factorization->parent = cs_etree(permuted_A, 0); + int* postordering = cs_post(symbolic_factorization->parent, A->n); + int* column_counts = cs_counts(permuted_A, symbolic_factorization->parent, postordering, 0); + cs_free(postordering); + cs_spfree(permuted_A); + + symbolic_factorization->cp = (int*) cs_malloc(A->n+1, sizeof(int)); + symbolic_factorization->lnz = cs_cumsum(symbolic_factorization->cp, column_counts, A->n); + symbolic_factorization->unz = symbolic_factorization->lnz; + + cs_free(column_counts); + + if (symbolic_factorization->lnz < 0) { + cs_sfree(symbolic_factorization); + symbolic_factorization = NULL; + } + + return symbolic_factorization; +} + cs_di CXSparse::CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A) { cs_di At; At.m = A->num_cols();