Enable pre-ordering for SPARSE_NORMAL_CHOLESKY.
Sparse Cholesky factorization algorithms use a fill-reducing
ordering to permute the columns of the Jacobian matrix. There
are two ways of doing this.
1. Compute the Jacobian matrix in some order and then have the
factorization algorithm permute the columns of the Jacobian.
2. Compute the Jacobian with its columns already permuted.
The first option incurs a significant memory penalty. The
factorization algorithm has to make a copy of the permuted
Jacobian matrix.
Starting with this change Ceres pre-permutes the columns of the
Jacobian matrix and generally speaking, there is no performance
penalty for doing so.
In some rare cases, it is worth using a more complicated
reordering algorithm which has slightly better runtime
performance at the expense of an extra copy of the Jacobian
matrix. Setting Solver::Options::use_postordering to true
enables this tradeoff.
This change also removes Solver::Options::use_block_amd
as an option. All matrices are ordered using their block
structure. The ability to order them by their scalar
sparsity structure has been removed.
Here is what performance on looks like on some BAL problems.
Memory
======
HEAD pre-ordering
16-22106 137957376.0 113516544.0
49-7776 56688640.0 46628864.0
245-198739 1718005760.0 1383550976.0
257-65132 387715072.0 319512576.0
356-226730 2014826496.0 1626087424.0
744-543562 4903358464.0 3957878784.0
1024-110968 968626176.0 822071296.0
Time
====
HEAD pre-ordering
16-22106 3.8 3.7
49-7776 1.9 1.8
245-198739 82.6 81.9
257-65132 14.0 13.4
356-226730 98.8 95.8
744-543562 325.2 301.6
1024-110968 42.1 37.1
Change-Id: I6b2e25f3fed7310f88905386a7898ac94d37467e
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index 8fff9f0..59420c9 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -96,9 +96,6 @@
"accuracy of each linear solve of the truncated newton step. "
"Changing this parameter can affect solve performance.");
-DEFINE_bool(use_block_amd, true, "Use a block oriented fill reducing "
- "ordering.");
-
DEFINE_int32(num_threads, 1, "Number of threads.");
DEFINE_int32(num_iterations, 5, "Number of iterations.");
DEFINE_double(max_solver_time, 1e32, "Maximum solve time in seconds.");
@@ -139,8 +136,6 @@
const int camera_block_size = bal_problem->camera_block_size();
double* cameras = bal_problem->mutable_cameras();
- options->use_block_amd = FLAGS_use_block_amd;
-
if (options->use_inner_iterations) {
if (FLAGS_blocks_for_inner_iterations == "cameras") {
LOG(INFO) << "Camera blocks for inner iterations";
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 8c2ff32..97d082d 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -95,13 +95,8 @@
#endif
num_linear_solver_threads = 1;
-
-#if defined(CERES_NO_SUITESPARSE)
- use_block_amd = false;
-#else
- use_block_amd = true;
-#endif
linear_solver_ordering = NULL;
+ use_postordering = false;
use_inner_iterations = false;
inner_iteration_ordering = NULL;
linear_solver_min_num_iterations = 1;
@@ -356,19 +351,29 @@
// deallocate the memory when destroyed.
ParameterBlockOrdering* linear_solver_ordering;
- // By virtue of the modeling layer in Ceres being block oriented,
- // all the matrices used by Ceres are also block oriented. When
- // doing sparse direct factorization of these matrices (for
- // SPARSE_NORMAL_CHOLESKY, SPARSE_SCHUR and ITERATIVE in
- // conjunction with CLUSTER_TRIDIAGONAL AND CLUSTER_JACOBI
- // preconditioners), the fill-reducing ordering algorithms can
- // either be run on the block or the scalar form of these matrices.
- // Running it on the block form exposes more of the super-nodal
- // structure of the matrix to the factorization routines. Setting
- // this parameter to true runs the ordering algorithms in block
- // form. Currently this option only makes sense with
- // sparse_linear_algebra_library = SUITE_SPARSE.
- bool use_block_amd;
+ // Note: This option only applies to the SPARSE_NORMAL_CHOLESKY
+ // solver when used with SUITE_SPARSE.
+
+ // Sparse Cholesky factorization algorithms use a fill-reducing
+ // ordering to permute the columns of the Jacobian matrix. There
+ // are two ways of doing this.
+
+ // 1. Compute the Jacobian matrix in some order and then have the
+ // factorization algorithm permute the columns of the Jacobian.
+
+ // 2. Compute the Jacobian with its columns already permuted.
+
+ // The first option incurs a significant memory penalty. The
+ // factorization algorithm has to make a copy of the permuted
+ // Jacobian matrix, thus Ceres pre-permutes the columns of the
+ // Jacobian matrix and generally speaking, there is no performance
+ // penalty for doing so.
+
+ // In some rare cases, it is worth using a more complicated
+ // reordering algorithm which has slightly better runtime
+ // performance at the expense of an extra copy of the Jacobian
+ // // matrix. Setting use_postordering to true enables this tradeoff.
+ bool use_postordering;
// Some non-linear least squares problems have additional
// structure in the way the parameter blocks interact that it is
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index cf5562a..15e0bdc 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -99,7 +99,6 @@
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.sparse_linear_algebra_library =
options_.sparse_linear_algebra_library;
- preconditioner_options.use_block_amd = options_.use_block_amd;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.row_block_size = options_.row_block_size;
preconditioner_options.e_block_size = options_.e_block_size;
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index f4bd0fb..ca10faa 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -74,7 +74,7 @@
: type(SPARSE_NORMAL_CHOLESKY),
preconditioner_type(JACOBI),
sparse_linear_algebra_library(SUITE_SPARSE),
- use_block_amd(true),
+ use_postordering(false),
min_num_iterations(1),
max_num_iterations(1),
num_threads(1),
@@ -90,8 +90,8 @@
SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
- // See solver.h for explanation of this option.
- bool use_block_amd;
+ // See solver.h for information about this flag.
+ bool use_postordering;
// Number of internal iterations that the solver uses. This
// parameter only makes sense for iterative solvers like CG.
diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h
index bfc8464..d7c8829 100644
--- a/internal/ceres/preconditioner.h
+++ b/internal/ceres/preconditioner.h
@@ -47,7 +47,6 @@
Options()
: type(JACOBI),
sparse_linear_algebra_library(SUITE_SPARSE),
- use_block_amd(true),
num_threads(1),
row_block_size(Eigen::Dynamic),
e_block_size(Eigen::Dynamic),
@@ -58,9 +57,6 @@
SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
- // See solver.h for explanation of this option.
- bool use_block_amd;
-
// If possible, how many threads the preconditioner can use.
int num_threads;
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 875f455..8afb121 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -286,11 +286,7 @@
// Symbolic factorization is computed if we don't already have one handy.
if (factor_ == NULL) {
- if (options().use_block_amd) {
- factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs, blocks_, blocks_);
- } else {
- factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
- }
+ factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs, blocks_, blocks_);
}
cholmod_dense* cholmod_solution =
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 0d7b83c..8217484 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -1004,7 +1004,8 @@
return transformed_program.release();
}
- if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY) {
+ if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY &&
+ options->sparse_linear_algebra_library == SUITE_SPARSE) {
ReorderProgramForSparseNormalCholesky(transformed_program.get());
return transformed_program.release();
}
@@ -1091,11 +1092,10 @@
linear_solver_options.preconditioner_type = options->preconditioner_type;
linear_solver_options.sparse_linear_algebra_library =
options->sparse_linear_algebra_library;
-
+ linear_solver_options.use_postordering = options->use_postordering;
linear_solver_options.num_threads = options->num_linear_solver_threads;
options->num_linear_solver_threads = linear_solver_options.num_threads;
- linear_solver_options.use_block_amd = options->use_block_amd;
const map<int, set<double*> >& groups =
options->linear_solver_ordering->group_to_elements();
for (map<int, set<double*> >::const_iterator it = groups.begin();
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc
index 0cd064b..bc1f983 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -204,14 +204,15 @@
event_logger.AddEvent("Setup");
if (factor_ == NULL) {
- if (options_.use_block_amd) {
+ if (options_.use_postordering) {
factor_ = ss_.BlockAnalyzeCholesky(&lhs,
A->col_blocks(),
A->row_blocks());
} else {
- factor_ = ss_.AnalyzeCholesky(&lhs);
+ factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(&lhs);
}
}
+
event_logger.AddEvent("Analysis");
cholmod_dense* sol = ss_.SolveCholesky(&lhs, factor_, rhs);
diff --git a/internal/ceres/unsymmetric_linear_solver_test.cc b/internal/ceres/unsymmetric_linear_solver_test.cc
index 0b0d593..c8a15c0 100644
--- a/internal/ceres/unsymmetric_linear_solver_test.cc
+++ b/internal/ceres/unsymmetric_linear_solver_test.cc
@@ -62,7 +62,6 @@
LinearSolver::Options options;
options.type = linear_solver_type;
options.sparse_linear_algebra_library = sparse_linear_algebra_library;
- options.use_block_amd = false;
scoped_ptr<LinearSolver> solver(LinearSolver::Create(options));
LinearSolver::PerSolveOptions per_solve_options;
diff --git a/internal/ceres/visibility_based_preconditioner.cc b/internal/ceres/visibility_based_preconditioner.cc
index f5c46bf..4b1e26a 100644
--- a/internal/ceres/visibility_based_preconditioner.cc
+++ b/internal/ceres/visibility_based_preconditioner.cc
@@ -421,11 +421,7 @@
// Symbolic factorization is computed if we don't already have one handy.
if (factor_ == NULL) {
- if (options_.use_block_amd) {
- factor_ = ss_.BlockAnalyzeCholesky(lhs, block_size_, block_size_);
- } else {
- factor_ = ss_.AnalyzeCholesky(lhs);
- }
+ factor_ = ss_.BlockAnalyzeCholesky(lhs, block_size_, block_size_);
}
bool status = ss_.Cholesky(lhs, factor_);