Block oriented fill reducing orderings.
By virtue of the modeling layer in Ceres being block oriented,
all the matrices used by Ceres are also block oriented.
When doing sparse direct factorization of these matrices, the
fill-reducing ordering algorithms can either be run on the
block or the scalar form of these matrices. Running it on the
block form exposes more of the super-nodal structure of the
matrix to the Cholesky factorization routines. This leads to
substantial gains in factorization performance.
This changelist adds support for approximate minimium degree
orderings to be computed on the block structure of the
Schur complement matrix. This affects, SchurComplementSolver
and VisibilityBasedPreconditioner and SparseNormalCholesky
when using SuiteSparse.
A bool, use_block_amd has been added to Solver::Options and
bundle_adjuster.cc has been updated to allow testing with it.
When combined with a multithreaded Schur elimination, speed ups
can be seen quite uniformly across the board. For some problems
this can be dramatic, reducing the factorization time from 70
seconds down to 17 seconds.
Change-Id: I15ebb0afcbc85ada032ec8d179ee3a2f7c8d3e46
diff --git a/internal/ceres/suitesparse.cc b/internal/ceres/suitesparse.cc
index 1cf6a74..c02d305 100644
--- a/internal/ceres/suitesparse.cc
+++ b/internal/ceres/suitesparse.cc
@@ -29,15 +29,14 @@
// Author: sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_NO_SUITESPARSE
-
#include "ceres/suitesparse.h"
+#include <vector>
#include "cholmod.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/triplet_sparse_matrix.h"
namespace ceres {
namespace internal {
-
cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
cholmod_triplet triplet;
@@ -111,6 +110,13 @@
}
cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
+ // Cholmod can try multiple re-ordering strategies to find a fill
+ // reducing ordering. Here we just tell it use AMD with automatic
+ // matrix dependence choice of supernodal versus simplicial
+ // factorization.
+ cc_.nmethods = 1;
+ cc_.method[0].ordering = CHOLMOD_AMD;
+ cc_.supernodal = CHOLMOD_AUTO;
cholmod_factor* factor = cholmod_analyze(A, &cc_);
CHECK_EQ(cc_.status, CHOLMOD_OK)
<< "Cholmod symbolic analysis failed " << cc_.status;
@@ -118,6 +124,146 @@
return factor;
}
+cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(
+ cholmod_sparse* A,
+ const vector<int>& row_blocks,
+ const vector<int>& col_blocks) {
+ vector<int> ordering;
+ if (!BlockAMDOrdering(A, row_blocks, col_blocks, &ordering)) {
+ return NULL;
+ }
+ return AnalyzeCholeskyWithUserOrdering(A, ordering);
+}
+
+cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(cholmod_sparse* A,
+ const vector<int>& ordering) {
+ CHECK_EQ(ordering.size(), A->nrow);
+ cc_.nmethods = 1 ;
+ cc_.method[0].ordering = CHOLMOD_GIVEN;
+ cholmod_factor* factor =
+ cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
+ CHECK_EQ(cc_.status, CHOLMOD_OK)
+ << "Cholmod symbolic analysis failed " << cc_.status;
+ CHECK_NOTNULL(factor);
+ return factor;
+}
+
+bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
+ const vector<int>& row_blocks,
+ const vector<int>& col_blocks,
+ vector<int>* ordering) {
+ const int num_row_blocks = row_blocks.size();
+ const int num_col_blocks = col_blocks.size();
+
+ // Arrays storing the compressed column structure of the matrix
+ // incoding the block sparsity of A.
+ vector<int> block_cols;
+ vector<int> block_rows;
+
+ ScalarMatrixToBlockMatrix(A,
+ row_blocks,
+ col_blocks,
+ &block_rows,
+ &block_cols);
+
+ cholmod_sparse_struct block_matrix;
+ block_matrix.nrow = num_row_blocks;
+ block_matrix.ncol = num_col_blocks;
+ block_matrix.nzmax = block_rows.size();
+ block_matrix.p = reinterpret_cast<void*>(&block_cols[0]);
+ block_matrix.i = reinterpret_cast<void*>(&block_rows[0]);
+ block_matrix.x = NULL;
+ block_matrix.stype = A->stype;
+ block_matrix.itype = CHOLMOD_INT;
+ block_matrix.xtype = CHOLMOD_PATTERN;
+ block_matrix.dtype = CHOLMOD_DOUBLE;
+ block_matrix.sorted = 1;
+ block_matrix.packed = 1;
+
+ vector<int> block_ordering(num_row_blocks);
+ if (!cholmod_amd(&block_matrix, NULL, 0, &block_ordering[0], &cc_)) {
+ return false;
+ }
+
+ BlockOrderingToScalarOrdering(row_blocks, block_ordering, ordering);
+ return true;
+}
+
+void SuiteSparse::ScalarMatrixToBlockMatrix(const cholmod_sparse* A,
+ const vector<int>& row_blocks,
+ const vector<int>& col_blocks,
+ vector<int>* block_rows,
+ vector<int>* block_cols) {
+ CHECK_NOTNULL(block_rows)->clear();
+ CHECK_NOTNULL(block_cols)->clear();
+ const int num_row_blocks = row_blocks.size();
+ const int num_col_blocks = col_blocks.size();
+
+ vector<int> row_block_starts(num_row_blocks);
+ for (int i = 0, cursor = 0; i < num_row_blocks; ++i) {
+ row_block_starts[i] = cursor;
+ cursor += row_blocks[i];
+ }
+
+ // The reinterpret_cast is needed here because CHOLMOD stores arrays
+ // as void*.
+ const int* scalar_cols = reinterpret_cast<const int*>(A->p);
+ const int* scalar_rows = reinterpret_cast<const int*>(A->i);
+
+ // This loop extracts the block sparsity of the scalar sparse matrix
+ // A. It does so by iterating over the columns, but only considering
+ // the columns corresponding to the first element of each column
+ // block. Within each column, the inner loop iterates over the rows,
+ // and detects the presence of a row block by checking for the
+ // presence of a non-zero entry corresponding to its first element.
+ block_cols->push_back(0);
+ int c = 0;
+ for (int col_block = 0; col_block < num_col_blocks; ++col_block) {
+ int column_size = 0;
+ for (int idx = scalar_cols[c]; idx < scalar_cols[c + 1]; ++idx) {
+ vector<int>::const_iterator it = lower_bound(row_block_starts.begin(),
+ row_block_starts.end(),
+ scalar_rows[idx]);
+ DCHECK(it != row_block_starts.end());
+ // Only consider the first row of each row block.
+ if (*it != scalar_rows[idx]) {
+ continue;
+ }
+
+ block_rows->push_back(it - row_block_starts.begin());
+ ++column_size;
+ }
+ block_cols->push_back(block_cols->back() + column_size);
+ c += col_blocks[col_block];
+ }
+}
+
+void SuiteSparse::BlockOrderingToScalarOrdering(
+ const vector<int>& blocks,
+ const vector<int>& block_ordering,
+ vector<int>* scalar_ordering) {
+ CHECK_EQ(blocks.size(), block_ordering.size());
+ const int num_blocks = blocks.size();
+
+ // block_starts = [0, block1, block1 + block2 ..]
+ vector<int> block_starts(num_blocks);
+ for (int i = 0, cursor = 0; i < num_blocks ; ++i) {
+ block_starts[i] = cursor;
+ cursor += blocks[i];
+ }
+
+ scalar_ordering->resize(block_starts.back() + blocks.back());
+ int cursor = 0;
+ for (int i = 0; i < num_blocks; ++i) {
+ const int block_id = block_ordering[i];
+ const int block_size = blocks[block_id];
+ int block_position = block_starts[block_id];
+ for (int j = 0; j < block_size; ++j) {
+ (*scalar_ordering)[cursor++] = block_position++;
+ }
+ }
+}
+
bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
CHECK_NOTNULL(A);
CHECK_NOTNULL(L);