Block oriented fill reducing orderings. By virtue of the modeling layer in Ceres being block oriented, all the matrices used by Ceres are also block oriented. When doing sparse direct factorization of these matrices, the fill-reducing ordering algorithms can either be run on the block or the scalar form of these matrices. Running it on the block form exposes more of the super-nodal structure of the matrix to the Cholesky factorization routines. This leads to substantial gains in factorization performance. This changelist adds support for approximate minimium degree orderings to be computed on the block structure of the Schur complement matrix. This affects, SchurComplementSolver and VisibilityBasedPreconditioner and SparseNormalCholesky when using SuiteSparse. A bool, use_block_amd has been added to Solver::Options and bundle_adjuster.cc has been updated to allow testing with it. When combined with a multithreaded Schur elimination, speed ups can be seen quite uniformly across the board. For some problems this can be dramatic, reducing the factorization time from 70 seconds down to 17 seconds. Change-Id: I15ebb0afcbc85ada032ec8d179ee3a2f7c8d3e46

commit: 7a3c43b847a63a2c45d3c58c84081b0a6867f582 [log] [tgz]
author: Sameer Agarwal <sameeragarwal@google.com> Tue Jun 05 23:10:59 2012 -0700
committer: Sameer Agarwal <sameeragarwal@google.com> Wed Jun 06 22:02:17 2012 -0700
tree: ef2e427daf88c697171e4e580b31b8b77b02a36b
parent: d28b3c86d36088aabc2331b8bab8dc15fb4f0eaa [diff] [blame]
diff --git a/internal/ceres/suitesparse.cc b/internal/ceres/suitesparse.cc
index 1cf6a74..c02d305 100644
--- a/internal/ceres/suitesparse.cc
+++ b/internal/ceres/suitesparse.cc

@@ -29,15 +29,14 @@
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 
 #ifndef CERES_NO_SUITESPARSE
-
 #include "ceres/suitesparse.h"
 
+#include <vector>
 #include "cholmod.h"
 #include "ceres/compressed_row_sparse_matrix.h"
 #include "ceres/triplet_sparse_matrix.h"
 namespace ceres {
 namespace internal {
-
 cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
   cholmod_triplet triplet;
 
@@ -111,6 +110,13 @@
 }
 
 cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
+  // Cholmod can try multiple re-ordering strategies to find a fill
+  // reducing ordering. Here we just tell it use AMD with automatic
+  // matrix dependence choice of supernodal versus simplicial
+  // factorization.
+  cc_.nmethods = 1;
+  cc_.method[0].ordering = CHOLMOD_AMD;
+  cc_.supernodal = CHOLMOD_AUTO;
   cholmod_factor* factor = cholmod_analyze(A, &cc_);
   CHECK_EQ(cc_.status, CHOLMOD_OK)
       << "Cholmod symbolic analysis failed " << cc_.status;
@@ -118,6 +124,146 @@
   return factor;
 }
 
+cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(
+    cholmod_sparse* A,
+    const vector<int>& row_blocks,
+    const vector<int>& col_blocks) {
+  vector<int> ordering;
+  if (!BlockAMDOrdering(A, row_blocks, col_blocks, &ordering)) {
+    return NULL;
+  }
+  return AnalyzeCholeskyWithUserOrdering(A, ordering);
+}
+
+cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(cholmod_sparse* A,
+                                                             const vector<int>& ordering) {
+  CHECK_EQ(ordering.size(), A->nrow);
+  cc_.nmethods = 1 ;
+  cc_.method[0].ordering = CHOLMOD_GIVEN;
+  cholmod_factor* factor  =
+      cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
+  CHECK_EQ(cc_.status, CHOLMOD_OK)
+      << "Cholmod symbolic analysis failed " << cc_.status;
+  CHECK_NOTNULL(factor);
+  return factor;
+}
+
+bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
+                                   const vector<int>& row_blocks,
+                                   const vector<int>& col_blocks,
+                                   vector<int>* ordering) {
+  const int num_row_blocks = row_blocks.size();
+  const int num_col_blocks = col_blocks.size();
+
+  // Arrays storing the compressed column structure of the matrix
+  // incoding the block sparsity of A.
+  vector<int> block_cols;
+  vector<int> block_rows;
+
+  ScalarMatrixToBlockMatrix(A,
+                            row_blocks,
+                            col_blocks,
+                            &block_rows,
+                            &block_cols);
+
+  cholmod_sparse_struct block_matrix;
+  block_matrix.nrow = num_row_blocks;
+  block_matrix.ncol = num_col_blocks;
+  block_matrix.nzmax = block_rows.size();
+  block_matrix.p = reinterpret_cast<void*>(&block_cols[0]);
+  block_matrix.i = reinterpret_cast<void*>(&block_rows[0]);
+  block_matrix.x = NULL;
+  block_matrix.stype = A->stype;
+  block_matrix.itype = CHOLMOD_INT;
+  block_matrix.xtype = CHOLMOD_PATTERN;
+  block_matrix.dtype = CHOLMOD_DOUBLE;
+  block_matrix.sorted = 1;
+  block_matrix.packed = 1;
+
+  vector<int> block_ordering(num_row_blocks);
+  if (!cholmod_amd(&block_matrix, NULL, 0, &block_ordering[0], &cc_)) {
+    return false;
+  }
+
+  BlockOrderingToScalarOrdering(row_blocks, block_ordering, ordering);
+  return true;
+}
+
+void SuiteSparse::ScalarMatrixToBlockMatrix(const cholmod_sparse* A,
+                                            const vector<int>& row_blocks,
+                                            const vector<int>& col_blocks,
+                                            vector<int>* block_rows,
+                                            vector<int>* block_cols) {
+  CHECK_NOTNULL(block_rows)->clear();
+  CHECK_NOTNULL(block_cols)->clear();
+  const int num_row_blocks = row_blocks.size();
+  const int num_col_blocks = col_blocks.size();
+
+  vector<int> row_block_starts(num_row_blocks);
+  for (int i = 0, cursor = 0; i < num_row_blocks; ++i) {
+    row_block_starts[i] = cursor;
+    cursor += row_blocks[i];
+  }
+
+  // The reinterpret_cast is needed here because CHOLMOD stores arrays
+  // as void*.
+  const int* scalar_cols =  reinterpret_cast<const int*>(A->p);
+  const int* scalar_rows =  reinterpret_cast<const int*>(A->i);
+
+  // This loop extracts the block sparsity of the scalar sparse matrix
+  // A. It does so by iterating over the columns, but only considering
+  // the columns corresponding to the first element of each column
+  // block. Within each column, the inner loop iterates over the rows,
+  // and detects the presence of a row block by checking for the
+  // presence of a non-zero entry corresponding to its first element.
+  block_cols->push_back(0);
+  int c = 0;
+  for (int col_block = 0; col_block < num_col_blocks; ++col_block) {
+    int column_size = 0;
+    for (int idx = scalar_cols[c]; idx < scalar_cols[c + 1]; ++idx) {
+      vector<int>::const_iterator it = lower_bound(row_block_starts.begin(),
+                                                   row_block_starts.end(),
+                                                   scalar_rows[idx]);
+      DCHECK(it != row_block_starts.end());
+      // Only consider the first row of each row block.
+      if (*it != scalar_rows[idx]) {
+        continue;
+      }
+
+      block_rows->push_back(it - row_block_starts.begin());
+      ++column_size;
+    }
+    block_cols->push_back(block_cols->back() + column_size);
+    c += col_blocks[col_block];
+  }
+}
+
+void SuiteSparse::BlockOrderingToScalarOrdering(
+    const vector<int>& blocks,
+    const vector<int>& block_ordering,
+    vector<int>* scalar_ordering) {
+  CHECK_EQ(blocks.size(), block_ordering.size());
+  const int num_blocks = blocks.size();
+
+  // block_starts = [0, block1, block1 + block2 ..]
+  vector<int> block_starts(num_blocks);
+  for (int i = 0, cursor = 0; i < num_blocks ; ++i) {
+    block_starts[i] = cursor;
+    cursor += blocks[i];
+  }
+
+  scalar_ordering->resize(block_starts.back() + blocks.back());
+  int cursor = 0;
+  for (int i = 0; i < num_blocks; ++i) {
+    const int block_id = block_ordering[i];
+    const int block_size = blocks[block_id];
+    int block_position = block_starts[block_id];
+    for (int j = 0; j < block_size; ++j) {
+      (*scalar_ordering)[cursor++] = block_position++;
+    }
+  }
+}
+
 bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
   CHECK_NOTNULL(A);
   CHECK_NOTNULL(L);
commit	7a3c43b847a63a2c45d3c58c84081b0a6867f582	[log] [tgz]
author	Sameer Agarwal <sameeragarwal@google.com>	Tue Jun 05 23:10:59 2012 -0700
committer	Sameer Agarwal <sameeragarwal@google.com>	Wed Jun 06 22:02:17 2012 -0700
tree	ef2e427daf88c697171e4e580b31b8b77b02a36b
parent	d28b3c86d36088aabc2331b8bab8dc15fb4f0eaa [diff] [blame]