Let ITERATIVE_SCHUR use an explicit Schur Complement matrix.
Up till now ITERATIVE_SCHUR evaluates matrix-vector products
between the Schur complement and a vector implicitly by exploiting
the algebraic expression for the Schur complement.
This cost of this evaluation scales with the number of non-zeros
in the Jacobian.
For small to medium sized problems there is a sweet spot where
computing the Schur complement is cheap enough that it is much
more efficient to explicitly compute it and use it for evaluating
the matrix-vector products.
This changes implements support for an explicit Schur complement
in ITERATIVE_SCHUR in combination with the SCHUR_JACOBI preconditioner.
API wise a new bool Solver::Options::use_explicit_schur_complement
has been added.
The implementation extends the SparseSchurComplementSolver to use
Conjugate Gradients.
Example speedup:
use_explicit_schur_complement = false
Time (in seconds):
Preprocessor 0.585
Residual evaluation 0.319
Jacobian evaluation 1.590
Linear solver 25.685
Minimizer 27.990
Postprocessor 0.010
Total 28.585
use_explicit_schur_complement = true
Time (in seconds):
Preprocessor 0.638
Residual evaluation 0.318
Jacobian evaluation 1.507
Linear solver 5.930
Minimizer 8.144
Postprocessor 0.010
Total 8.791
Which indicates an end-to-end speedup of more than 3x, with the linear
solver being sped up by > 4x.
The idea to explore this optimization was inspired by the recent paper:
Mining structure fragments for smart bundle adjustment
L. Carlone, P. Alcantarilla, H. Chiu, K. Zsolt, F. Dellaert
British Machine Vision Conference, 2014
which uses a more complicated algorithm to compute parts of the
Schur complement to speed up the matrix-vector product.
Change-Id: I95324af0ab351faa1600f5204039a1d2a64ae61d
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index 296611f..7787d93 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -79,6 +79,8 @@
DEFINE_string(linear_solver, "sparse_schur", "Options are: "
"sparse_schur, dense_schur, iterative_schur, sparse_normal_cholesky, "
"dense_qr, dense_normal_cholesky and cgnr.");
+DEFINE_bool(explicit_schur_complement, false, "If using ITERATIVE_SCHUR "
+ "then explicitly compute the Schur complement.");
DEFINE_string(preconditioner, "jacobi", "Options are: "
"identity, jacobi, schur_jacobi, cluster_jacobi, "
"cluster_tridiagonal.");
@@ -136,6 +138,7 @@
FLAGS_dense_linear_algebra_library,
&options->dense_linear_algebra_library_type));
options->num_linear_solver_threads = FLAGS_num_threads;
+ options->use_explicit_schur_complement = FLAGS_explicit_schur_complement;
}
void SetOrdering(BALProblem* bal_problem, Solver::Options* options) {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 0af34ca..a5efa2a 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -118,6 +118,7 @@
#endif
num_linear_solver_threads = 1;
+ use_explicit_schur_complement = false;
use_postordering = false;
dynamic_sparsity = false;
min_linear_solver_iterations = 0;
@@ -496,6 +497,29 @@
// smaller than the group containing cameras.
shared_ptr<ParameterBlockOrdering> linear_solver_ordering;
+ // Use an explicitly computed Schur complement matrix with
+ // ITERATIVE_SCHUR.
+ //
+ // By default this option is disabled and ITERATIVE_SCHUR
+ // evaluates evaluates matrix-vector products between the Schur
+ // complement and a vector implicitly by exploiting the algebraic
+ // expression for the Schur complement.
+ //
+ // The cost of this evaluation scales with the number of non-zeros
+ // in the Jacobian.
+ //
+ // For small to medium sized problems there is a sweet spot where
+ // computing the Schur complement is cheap enough that it is much
+ // more efficient to explicitly compute it and use it for evaluating
+ // the matrix-vector products.
+ //
+ // Enabling this option tells ITERATIVE_SCHUR to use an explicitly
+ // computed Schur complement.
+ //
+ // NOTE: This option can only be used with the SCHUR_JACOBI
+ // preconditioner.
+ bool use_explicit_schur_complement;
+
// Sparse Cholesky factorization algorithms use a fill-reducing
// ordering to permute the columns of the Jacobian matrix. There
// are two ways of doing this.
diff --git a/internal/ceres/linear_solver.cc b/internal/ceres/linear_solver.cc
index d905ec2..e479b75 100644
--- a/internal/ceres/linear_solver.cc
+++ b/internal/ceres/linear_solver.cc
@@ -96,7 +96,11 @@
return new DenseSchurComplementSolver(options);
case ITERATIVE_SCHUR:
- return new IterativeSchurComplementSolver(options);
+ if (options.use_explicit_schur_complement) {
+ return new SparseSchurComplementSolver(options);
+ } else {
+ return new IterativeSchurComplementSolver(options);
+ }
case DENSE_QR:
return new DenseQRSolver(options);
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 58b9044..5f59765 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -99,6 +99,7 @@
sparse_linear_algebra_library_type(SUITE_SPARSE),
use_postordering(false),
dynamic_sparsity(false),
+ use_explicit_schur_complement(false),
min_num_iterations(1),
max_num_iterations(1),
num_threads(1),
@@ -114,9 +115,10 @@
DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
- // See solver.h for information about this flag.
+ // See solver.h for information about these flags.
bool use_postordering;
bool dynamic_sparsity;
+ bool use_explicit_schur_complement;
// Number of internal iterations that the solver uses. This
// parameter only makes sense for iterative solvers like CG.
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index d2aa168..33f812b 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -40,6 +40,7 @@
#include "ceres/block_random_access_sparse_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
+#include "ceres/conjugate_gradients_solver.h"
#include "ceres/cxsparse.h"
#include "ceres/detect_structure.h"
#include "ceres/internal/eigen.h"
@@ -56,6 +57,61 @@
namespace ceres {
namespace internal {
+namespace {
+
+class BlockRandomAccessSparseMatrixAdapter : public LinearOperator {
+ public:
+ explicit BlockRandomAccessSparseMatrixAdapter(
+ const BlockRandomAccessSparseMatrix& m)
+ : m_(m) {
+ }
+
+ virtual ~BlockRandomAccessSparseMatrixAdapter() {}
+
+ // y = y + Ax;
+ virtual void RightMultiply(const double* x, double* y) const {
+ m_.SymmetricRightMultiply(x, y);
+ }
+
+ // y = y + A'x;
+ virtual void LeftMultiply(const double* x, double* y) const {
+ m_.SymmetricRightMultiply(x, y);
+ }
+
+ virtual int num_rows() const { return m_.num_rows(); }
+ virtual int num_cols() const { return m_.num_rows(); }
+
+ private:
+ const BlockRandomAccessSparseMatrix& m_;
+};
+
+class BlockRandomAccessDiagonalMatrixAdapter : public LinearOperator {
+ public:
+ explicit BlockRandomAccessDiagonalMatrixAdapter(
+ const BlockRandomAccessDiagonalMatrix& m)
+ : m_(m) {
+ }
+
+ virtual ~BlockRandomAccessDiagonalMatrixAdapter() {}
+
+ // y = y + Ax;
+ virtual void RightMultiply(const double* x, double* y) const {
+ m_.RightMultiply(x, y);
+ }
+
+ // y = y + A'x;
+ virtual void LeftMultiply(const double* x, double* y) const {
+ m_.RightMultiply(x, y);
+ }
+
+ virtual int num_rows() const { return m_.num_rows(); }
+ virtual int num_cols() const { return m_.num_rows(); }
+
+ private:
+ const BlockRandomAccessDiagonalMatrix& m_;
+};
+
+} // namespace
LinearSolver::Summary SchurComplementSolver::SolveImpl(
BlockSparseMatrix* A,
@@ -82,7 +138,7 @@
double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
const LinearSolver::Summary summary =
- SolveReducedLinearSystem(reduced_solution);
+ SolveReducedLinearSystem(per_solve_options, reduced_solution);
event_logger.AddEvent("ReducedSolve");
if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
@@ -115,7 +171,9 @@
// BlockRandomAccessDenseMatrix. The linear system is solved using
// Eigen's Cholesky factorization.
LinearSolver::Summary
-DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
+DenseSchurComplementSolver::SolveReducedLinearSystem(
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* solution) {
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
@@ -249,14 +307,25 @@
}
LinearSolver::Summary
-SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
+SparseSchurComplementSolver::SolveReducedLinearSystem(
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* solution) {
+ if (options().type == ITERATIVE_SCHUR) {
+ CHECK(options().use_explicit_schur_complement);
+ return SolveReducedLinearSystemUsingConjugateGradients(per_solve_options,
+ solution);
+ }
+
switch (options().sparse_linear_algebra_library_type) {
case SUITE_SPARSE:
- return SolveReducedLinearSystemUsingSuiteSparse(solution);
+ return SolveReducedLinearSystemUsingSuiteSparse(per_solve_options,
+ solution);
case CX_SPARSE:
- return SolveReducedLinearSystemUsingCXSparse(solution);
+ return SolveReducedLinearSystemUsingCXSparse(per_solve_options,
+ solution);
case EIGEN_SPARSE:
- return SolveReducedLinearSystemUsingEigen(solution);
+ return SolveReducedLinearSystemUsingEigen(per_solve_options,
+ solution);
default:
LOG(FATAL) << "Unknown sparse linear algebra library : "
<< options().sparse_linear_algebra_library_type;
@@ -270,6 +339,7 @@
// CHOLMOD's sparse cholesky factorization routines.
LinearSolver::Summary
SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution) {
#ifdef CERES_NO_SUITESPARSE
@@ -377,6 +447,7 @@
// CXSparse's sparse cholesky factorization routines.
LinearSolver::Summary
SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution) {
#ifdef CERES_NO_CXSPARSE
@@ -433,6 +504,7 @@
// Eigen's sparse cholesky factorization routines.
LinearSolver::Summary
SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution) {
#ifndef CERES_USE_EIGEN_SPARSE
@@ -514,5 +586,79 @@
#endif // CERES_USE_EIGEN_SPARSE
}
+LinearSolver::Summary
+SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* solution) {
+ const int num_rows = lhs()->num_rows();
+ // The case where there are no f blocks, and the system is block
+ // diagonal.
+ if (num_rows == 0) {
+ LinearSolver::Summary summary;
+ summary.num_iterations = 0;
+ summary.termination_type = LINEAR_SOLVER_SUCCESS;
+ summary.message = "Success.";
+ return summary;
+ }
+
+ // Only SCHUR_JACOBI is supported over here right now.
+ CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI);
+
+ if (preconditioner_.get() == NULL) {
+ preconditioner_.reset(new BlockRandomAccessDiagonalMatrix(blocks_));
+ }
+
+ BlockRandomAccessSparseMatrix* sc =
+ down_cast<BlockRandomAccessSparseMatrix*>(
+ const_cast<BlockRandomAccessMatrix*>(lhs()));
+
+ // Extract block diagonal from the Schur complement to construct the
+ // schur_jacobi preconditioner.
+ for (int i = 0; i < blocks_.size(); ++i) {
+ const int block_size = blocks_[i];
+
+ int sc_r, sc_c, sc_row_stride, sc_col_stride;
+ CellInfo* sc_cell_info =
+ CHECK_NOTNULL(sc->GetCell(i, i,
+ &sc_r, &sc_c,
+ &sc_row_stride, &sc_col_stride));
+ MatrixRef sc_m(sc_cell_info->values, sc_row_stride, sc_col_stride);
+
+ int pre_r, pre_c, pre_row_stride, pre_col_stride;
+ CellInfo* pre_cell_info = CHECK_NOTNULL(
+ preconditioner_->GetCell(i, i,
+ &pre_r, &pre_c,
+ &pre_row_stride, &pre_col_stride));
+ MatrixRef pre_m(pre_cell_info->values, pre_row_stride, pre_col_stride);
+
+ pre_m.block(pre_r, pre_c, block_size, block_size) =
+ sc_m.block(sc_r, sc_c, block_size, block_size);
+ }
+ preconditioner_->Invert();
+
+ VectorRef(solution, num_rows).setZero();
+
+ scoped_ptr<LinearOperator> lhs_adapter(
+ new BlockRandomAccessSparseMatrixAdapter(*sc));
+ scoped_ptr<LinearOperator> preconditioner_adapter(
+ new BlockRandomAccessDiagonalMatrixAdapter(*preconditioner_));
+
+
+ LinearSolver::Options cg_options;
+ cg_options.min_num_iterations = options().min_num_iterations;
+ cg_options.max_num_iterations = options().max_num_iterations;
+ ConjugateGradientsSolver cg_solver(cg_options);
+
+ LinearSolver::PerSolveOptions cg_per_solve_options;
+ cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
+ cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
+ cg_per_solve_options.preconditioner = preconditioner_adapter.get();
+
+ return cg_solver.Solve(lhs_adapter.get(),
+ rhs(),
+ cg_per_solve_options,
+ solution);
+}
+
} // namespace internal
} // namespace ceres
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h
index 1b431dc..93d23e3 100644
--- a/internal/ceres/schur_complement_solver.h
+++ b/internal/ceres/schur_complement_solver.h
@@ -46,6 +46,7 @@
#include "ceres/suitesparse.h"
#include "ceres/internal/scoped_ptr.h"
#include "ceres/types.h"
+#include "ceres/block_random_access_diagonal_matrix.h"
#ifdef CERES_USE_EIGEN_SPARSE
#include "Eigen/SparseCholesky"
@@ -134,6 +135,7 @@
private:
virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0;
virtual LinearSolver::Summary SolveReducedLinearSystem(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution) = 0;
LinearSolver::Options options_;
@@ -155,6 +157,7 @@
private:
virtual void InitStorage(const CompressedRowBlockStructure* bs);
virtual LinearSolver::Summary SolveReducedLinearSystem(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution);
CERES_DISALLOW_COPY_AND_ASSIGN(DenseSchurComplementSolver);
@@ -169,12 +172,19 @@
private:
virtual void InitStorage(const CompressedRowBlockStructure* bs);
virtual LinearSolver::Summary SolveReducedLinearSystem(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution);
LinearSolver::Summary SolveReducedLinearSystemUsingSuiteSparse(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution);
LinearSolver::Summary SolveReducedLinearSystemUsingCXSparse(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution);
LinearSolver::Summary SolveReducedLinearSystemUsingEigen(
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* solution);
+ LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients(
+ const LinearSolver::PerSolveOptions& per_solve_options,
double* solution);
// Size of the blocks in the Schur complement.
@@ -206,6 +216,7 @@
scoped_ptr<SimplicialLDLT> simplicial_ldlt_;
#endif
+ scoped_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_;
CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver);
};
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index f90045b..e1c5ee3 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -120,6 +120,14 @@
OPTION_GT(max_consecutive_nonmonotonic_steps, 0);
}
+ if (options.linear_solver_type == ITERATIVE_SCHUR &&
+ options.use_explicit_schur_complement &&
+ options.preconditioner_type != SCHUR_JACOBI) {
+ *error = "use_explicit_schur_complement only supports"
+ "SCHUR_JACOBI as the preconditioner.";
+ return false;
+ }
+
if (options.preconditioner_type == CLUSTER_JACOBI &&
options.sparse_linear_algebra_library_type != SUITE_SPARSE) {
*error = "CLUSTER_JACOBI requires "
diff --git a/internal/ceres/trust_region_preprocessor.cc b/internal/ceres/trust_region_preprocessor.cc
index ba3baa1..22ea1ec 100644
--- a/internal/ceres/trust_region_preprocessor.cc
+++ b/internal/ceres/trust_region_preprocessor.cc
@@ -184,6 +184,8 @@
options.sparse_linear_algebra_library_type;
pp->linear_solver_options.dense_linear_algebra_library_type =
options.dense_linear_algebra_library_type;
+ pp->linear_solver_options.use_explicit_schur_complement =
+ options.use_explicit_schur_complement;
pp->linear_solver_options.dynamic_sparsity = options.dynamic_sparsity;
pp->linear_solver_options.num_threads = options.num_linear_solver_threads;