Let ITERATIVE_SCHUR use an explicit Schur Complement matrix. Up till now ITERATIVE_SCHUR evaluates matrix-vector products between the Schur complement and a vector implicitly by exploiting the algebraic expression for the Schur complement. This cost of this evaluation scales with the number of non-zeros in the Jacobian. For small to medium sized problems there is a sweet spot where computing the Schur complement is cheap enough that it is much more efficient to explicitly compute it and use it for evaluating the matrix-vector products. This changes implements support for an explicit Schur complement in ITERATIVE_SCHUR in combination with the SCHUR_JACOBI preconditioner. API wise a new bool Solver::Options::use_explicit_schur_complement has been added. The implementation extends the SparseSchurComplementSolver to use Conjugate Gradients. Example speedup: use_explicit_schur_complement = false Time (in seconds): Preprocessor 0.585 Residual evaluation 0.319 Jacobian evaluation 1.590 Linear solver 25.685 Minimizer 27.990 Postprocessor 0.010 Total 28.585 use_explicit_schur_complement = true Time (in seconds): Preprocessor 0.638 Residual evaluation 0.318 Jacobian evaluation 1.507 Linear solver 5.930 Minimizer 8.144 Postprocessor 0.010 Total 8.791 Which indicates an end-to-end speedup of more than 3x, with the linear solver being sped up by > 4x. The idea to explore this optimization was inspired by the recent paper: Mining structure fragments for smart bundle adjustment L. Carlone, P. Alcantarilla, H. Chiu, K. Zsolt, F. Dellaert British Machine Vision Conference, 2014 which uses a more complicated algorithm to compute parts of the Schur complement to speed up the matrix-vector product. Change-Id: I95324af0ab351faa1600f5204039a1d2a64ae61d
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc index 296611f..7787d93 100644 --- a/examples/bundle_adjuster.cc +++ b/examples/bundle_adjuster.cc
@@ -79,6 +79,8 @@ DEFINE_string(linear_solver, "sparse_schur", "Options are: " "sparse_schur, dense_schur, iterative_schur, sparse_normal_cholesky, " "dense_qr, dense_normal_cholesky and cgnr."); +DEFINE_bool(explicit_schur_complement, false, "If using ITERATIVE_SCHUR " + "then explicitly compute the Schur complement."); DEFINE_string(preconditioner, "jacobi", "Options are: " "identity, jacobi, schur_jacobi, cluster_jacobi, " "cluster_tridiagonal."); @@ -136,6 +138,7 @@ FLAGS_dense_linear_algebra_library, &options->dense_linear_algebra_library_type)); options->num_linear_solver_threads = FLAGS_num_threads; + options->use_explicit_schur_complement = FLAGS_explicit_schur_complement; } void SetOrdering(BALProblem* bal_problem, Solver::Options* options) {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h index 0af34ca..a5efa2a 100644 --- a/include/ceres/solver.h +++ b/include/ceres/solver.h
@@ -118,6 +118,7 @@ #endif num_linear_solver_threads = 1; + use_explicit_schur_complement = false; use_postordering = false; dynamic_sparsity = false; min_linear_solver_iterations = 0; @@ -496,6 +497,29 @@ // smaller than the group containing cameras. shared_ptr<ParameterBlockOrdering> linear_solver_ordering; + // Use an explicitly computed Schur complement matrix with + // ITERATIVE_SCHUR. + // + // By default this option is disabled and ITERATIVE_SCHUR + // evaluates evaluates matrix-vector products between the Schur + // complement and a vector implicitly by exploiting the algebraic + // expression for the Schur complement. + // + // The cost of this evaluation scales with the number of non-zeros + // in the Jacobian. + // + // For small to medium sized problems there is a sweet spot where + // computing the Schur complement is cheap enough that it is much + // more efficient to explicitly compute it and use it for evaluating + // the matrix-vector products. + // + // Enabling this option tells ITERATIVE_SCHUR to use an explicitly + // computed Schur complement. + // + // NOTE: This option can only be used with the SCHUR_JACOBI + // preconditioner. + bool use_explicit_schur_complement; + // Sparse Cholesky factorization algorithms use a fill-reducing // ordering to permute the columns of the Jacobian matrix. There // are two ways of doing this.
diff --git a/internal/ceres/linear_solver.cc b/internal/ceres/linear_solver.cc index d905ec2..e479b75 100644 --- a/internal/ceres/linear_solver.cc +++ b/internal/ceres/linear_solver.cc
@@ -96,7 +96,11 @@ return new DenseSchurComplementSolver(options); case ITERATIVE_SCHUR: - return new IterativeSchurComplementSolver(options); + if (options.use_explicit_schur_complement) { + return new SparseSchurComplementSolver(options); + } else { + return new IterativeSchurComplementSolver(options); + } case DENSE_QR: return new DenseQRSolver(options);
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h index 58b9044..5f59765 100644 --- a/internal/ceres/linear_solver.h +++ b/internal/ceres/linear_solver.h
@@ -99,6 +99,7 @@ sparse_linear_algebra_library_type(SUITE_SPARSE), use_postordering(false), dynamic_sparsity(false), + use_explicit_schur_complement(false), min_num_iterations(1), max_num_iterations(1), num_threads(1), @@ -114,9 +115,10 @@ DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; - // See solver.h for information about this flag. + // See solver.h for information about these flags. bool use_postordering; bool dynamic_sparsity; + bool use_explicit_schur_complement; // Number of internal iterations that the solver uses. This // parameter only makes sense for iterative solvers like CG.
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index d2aa168..33f812b 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc
@@ -40,6 +40,7 @@ #include "ceres/block_random_access_sparse_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/conjugate_gradients_solver.h" #include "ceres/cxsparse.h" #include "ceres/detect_structure.h" #include "ceres/internal/eigen.h" @@ -56,6 +57,61 @@ namespace ceres { namespace internal { +namespace { + +class BlockRandomAccessSparseMatrixAdapter : public LinearOperator { + public: + explicit BlockRandomAccessSparseMatrixAdapter( + const BlockRandomAccessSparseMatrix& m) + : m_(m) { + } + + virtual ~BlockRandomAccessSparseMatrixAdapter() {} + + // y = y + Ax; + virtual void RightMultiply(const double* x, double* y) const { + m_.SymmetricRightMultiply(x, y); + } + + // y = y + A'x; + virtual void LeftMultiply(const double* x, double* y) const { + m_.SymmetricRightMultiply(x, y); + } + + virtual int num_rows() const { return m_.num_rows(); } + virtual int num_cols() const { return m_.num_rows(); } + + private: + const BlockRandomAccessSparseMatrix& m_; +}; + +class BlockRandomAccessDiagonalMatrixAdapter : public LinearOperator { + public: + explicit BlockRandomAccessDiagonalMatrixAdapter( + const BlockRandomAccessDiagonalMatrix& m) + : m_(m) { + } + + virtual ~BlockRandomAccessDiagonalMatrixAdapter() {} + + // y = y + Ax; + virtual void RightMultiply(const double* x, double* y) const { + m_.RightMultiply(x, y); + } + + // y = y + A'x; + virtual void LeftMultiply(const double* x, double* y) const { + m_.RightMultiply(x, y); + } + + virtual int num_rows() const { return m_.num_rows(); } + virtual int num_cols() const { return m_.num_rows(); } + + private: + const BlockRandomAccessDiagonalMatrix& m_; +}; + +} // namespace LinearSolver::Summary SchurComplementSolver::SolveImpl( BlockSparseMatrix* A, @@ -82,7 +138,7 @@ double* reduced_solution = x + A->num_cols() - lhs_->num_cols(); const LinearSolver::Summary summary = - SolveReducedLinearSystem(reduced_solution); + SolveReducedLinearSystem(per_solve_options, reduced_solution); event_logger.AddEvent("ReducedSolve"); if (summary.termination_type == LINEAR_SOLVER_SUCCESS) { @@ -115,7 +171,9 @@ // BlockRandomAccessDenseMatrix. The linear system is solved using // Eigen's Cholesky factorization. LinearSolver::Summary -DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { +DenseSchurComplementSolver::SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { LinearSolver::Summary summary; summary.num_iterations = 0; summary.termination_type = LINEAR_SOLVER_SUCCESS; @@ -249,14 +307,25 @@ } LinearSolver::Summary -SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { +SparseSchurComplementSolver::SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { + if (options().type == ITERATIVE_SCHUR) { + CHECK(options().use_explicit_schur_complement); + return SolveReducedLinearSystemUsingConjugateGradients(per_solve_options, + solution); + } + switch (options().sparse_linear_algebra_library_type) { case SUITE_SPARSE: - return SolveReducedLinearSystemUsingSuiteSparse(solution); + return SolveReducedLinearSystemUsingSuiteSparse(per_solve_options, + solution); case CX_SPARSE: - return SolveReducedLinearSystemUsingCXSparse(solution); + return SolveReducedLinearSystemUsingCXSparse(per_solve_options, + solution); case EIGEN_SPARSE: - return SolveReducedLinearSystemUsingEigen(solution); + return SolveReducedLinearSystemUsingEigen(per_solve_options, + solution); default: LOG(FATAL) << "Unknown sparse linear algebra library : " << options().sparse_linear_algebra_library_type; @@ -270,6 +339,7 @@ // CHOLMOD's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifdef CERES_NO_SUITESPARSE @@ -377,6 +447,7 @@ // CXSparse's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifdef CERES_NO_CXSPARSE @@ -433,6 +504,7 @@ // Eigen's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifndef CERES_USE_EIGEN_SPARSE @@ -514,5 +586,79 @@ #endif // CERES_USE_EIGEN_SPARSE } +LinearSolver::Summary +SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { + const int num_rows = lhs()->num_rows(); + // The case where there are no f blocks, and the system is block + // diagonal. + if (num_rows == 0) { + LinearSolver::Summary summary; + summary.num_iterations = 0; + summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.message = "Success."; + return summary; + } + + // Only SCHUR_JACOBI is supported over here right now. + CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI); + + if (preconditioner_.get() == NULL) { + preconditioner_.reset(new BlockRandomAccessDiagonalMatrix(blocks_)); + } + + BlockRandomAccessSparseMatrix* sc = + down_cast<BlockRandomAccessSparseMatrix*>( + const_cast<BlockRandomAccessMatrix*>(lhs())); + + // Extract block diagonal from the Schur complement to construct the + // schur_jacobi preconditioner. + for (int i = 0; i < blocks_.size(); ++i) { + const int block_size = blocks_[i]; + + int sc_r, sc_c, sc_row_stride, sc_col_stride; + CellInfo* sc_cell_info = + CHECK_NOTNULL(sc->GetCell(i, i, + &sc_r, &sc_c, + &sc_row_stride, &sc_col_stride)); + MatrixRef sc_m(sc_cell_info->values, sc_row_stride, sc_col_stride); + + int pre_r, pre_c, pre_row_stride, pre_col_stride; + CellInfo* pre_cell_info = CHECK_NOTNULL( + preconditioner_->GetCell(i, i, + &pre_r, &pre_c, + &pre_row_stride, &pre_col_stride)); + MatrixRef pre_m(pre_cell_info->values, pre_row_stride, pre_col_stride); + + pre_m.block(pre_r, pre_c, block_size, block_size) = + sc_m.block(sc_r, sc_c, block_size, block_size); + } + preconditioner_->Invert(); + + VectorRef(solution, num_rows).setZero(); + + scoped_ptr<LinearOperator> lhs_adapter( + new BlockRandomAccessSparseMatrixAdapter(*sc)); + scoped_ptr<LinearOperator> preconditioner_adapter( + new BlockRandomAccessDiagonalMatrixAdapter(*preconditioner_)); + + + LinearSolver::Options cg_options; + cg_options.min_num_iterations = options().min_num_iterations; + cg_options.max_num_iterations = options().max_num_iterations; + ConjugateGradientsSolver cg_solver(cg_options); + + LinearSolver::PerSolveOptions cg_per_solve_options; + cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance; + cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance; + cg_per_solve_options.preconditioner = preconditioner_adapter.get(); + + return cg_solver.Solve(lhs_adapter.get(), + rhs(), + cg_per_solve_options, + solution); +} + } // namespace internal } // namespace ceres
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h index 1b431dc..93d23e3 100644 --- a/internal/ceres/schur_complement_solver.h +++ b/internal/ceres/schur_complement_solver.h
@@ -46,6 +46,7 @@ #include "ceres/suitesparse.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/types.h" +#include "ceres/block_random_access_diagonal_matrix.h" #ifdef CERES_USE_EIGEN_SPARSE #include "Eigen/SparseCholesky" @@ -134,6 +135,7 @@ private: virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0; virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) = 0; LinearSolver::Options options_; @@ -155,6 +157,7 @@ private: virtual void InitStorage(const CompressedRowBlockStructure* bs); virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); CERES_DISALLOW_COPY_AND_ASSIGN(DenseSchurComplementSolver); @@ -169,12 +172,19 @@ private: virtual void InitStorage(const CompressedRowBlockStructure* bs); virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingSuiteSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingCXSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingEigen( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution); + LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); // Size of the blocks in the Schur complement. @@ -206,6 +216,7 @@ scoped_ptr<SimplicialLDLT> simplicial_ldlt_; #endif + scoped_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_; CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver); };
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc index f90045b..e1c5ee3 100644 --- a/internal/ceres/solver.cc +++ b/internal/ceres/solver.cc
@@ -120,6 +120,14 @@ OPTION_GT(max_consecutive_nonmonotonic_steps, 0); } + if (options.linear_solver_type == ITERATIVE_SCHUR && + options.use_explicit_schur_complement && + options.preconditioner_type != SCHUR_JACOBI) { + *error = "use_explicit_schur_complement only supports" + "SCHUR_JACOBI as the preconditioner."; + return false; + } + if (options.preconditioner_type == CLUSTER_JACOBI && options.sparse_linear_algebra_library_type != SUITE_SPARSE) { *error = "CLUSTER_JACOBI requires "
diff --git a/internal/ceres/trust_region_preprocessor.cc b/internal/ceres/trust_region_preprocessor.cc index ba3baa1..22ea1ec 100644 --- a/internal/ceres/trust_region_preprocessor.cc +++ b/internal/ceres/trust_region_preprocessor.cc
@@ -184,6 +184,8 @@ options.sparse_linear_algebra_library_type; pp->linear_solver_options.dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; + pp->linear_solver_options.use_explicit_schur_complement = + options.use_explicit_schur_complement; pp->linear_solver_options.dynamic_sparsity = options.dynamic_sparsity; pp->linear_solver_options.num_threads = options.num_linear_solver_threads;