Improve the performance of DenseQRSolver 1. Reduce amount of reallocations. Change-Id: I91b17c781ae94ed12014d647f0162cfce4f6ed7b
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc index 370b004..72edfe2 100644 --- a/internal/ceres/coordinate_descent_minimizer.cc +++ b/internal/ceres/coordinate_descent_minimizer.cc
@@ -106,11 +106,6 @@ } } - LinearSolver::Options linear_solver_options; - linear_solver_options.type = DENSE_QR; - linear_solver_.reset(LinearSolver::Create(linear_solver_options)); - CHECK_NOTNULL(linear_solver_.get()); - evaluator_options_.linear_solver_type = DENSE_QR; evaluator_options_.num_eliminate_blocks = 0; evaluator_options_.num_threads = 1; @@ -129,6 +124,15 @@ parameter_block->SetConstant(); } + scoped_array<LinearSolver*> linear_solvers(new LinearSolver*[options.num_threads]); + + LinearSolver::Options linear_solver_options; + linear_solver_options.type = DENSE_QR; + + for (int i = 0; i < options.num_threads; ++i) { + linear_solvers[i] = LinearSolver::Create(linear_solver_options); + } + for (int i = 0; i < independent_set_offsets_.size() - 1; ++i) { // No point paying the price for an OpemMP call if the set if of // size zero. @@ -142,6 +146,11 @@ for (int j = independent_set_offsets_[i]; j < independent_set_offsets_[i + 1]; ++j) { +#ifdef CERES_USE_OPENMP + int thread_id = omp_get_thread_num(); +#else + int thread_id = 0; +#endif ParameterBlock* parameter_block = parameter_blocks_[j]; const int old_index = parameter_block->index(); @@ -164,6 +173,7 @@ // we are fine. Solver::Summary inner_summary; Solve(&inner_program, + linear_solvers[thread_id], parameters + parameter_block->state_offset(), &inner_summary); @@ -177,10 +187,15 @@ for (int i = 0; i < parameter_blocks_.size(); ++i) { parameter_blocks_[i]->SetVarying(); } + + for (int i = 0; i < options.num_threads; ++i) { + delete linear_solvers[i]; + } } // Solve the optimization problem for one parameter block. void CoordinateDescentMinimizer::Solve(Program* program, + LinearSolver* linear_solver, double* parameter, Solver::Summary* summary) { *summary = Solver::Summary(); @@ -196,11 +211,11 @@ scoped_ptr<SparseMatrix> jacobian(evaluator->CreateJacobian()); CHECK_NOTNULL(jacobian.get()); - TrustRegionStrategy::Options trust_region_strategy_options; - trust_region_strategy_options.linear_solver = linear_solver_.get(); + TrustRegionStrategy::Options trs_options; + trs_options.linear_solver = linear_solver; + scoped_ptr<TrustRegionStrategy>trust_region_strategy( - TrustRegionStrategy::Create(trust_region_strategy_options)); - CHECK_NOTNULL(trust_region_strategy.get()); + CHECK_NOTNULL(TrustRegionStrategy::Create(trs_options))); Minimizer::Options minimizer_options; minimizer_options.evaluator = evaluator.get();
diff --git a/internal/ceres/coordinate_descent_minimizer.h b/internal/ceres/coordinate_descent_minimizer.h index d7ff4db..fb96b3c 100644 --- a/internal/ceres/coordinate_descent_minimizer.h +++ b/internal/ceres/coordinate_descent_minimizer.h
@@ -65,6 +65,7 @@ private: void Solve(Program* program, + LinearSolver* linear_solver, double* parameters, Solver::Summary* summary); @@ -77,7 +78,6 @@ vector<int> independent_set_offsets_; Evaluator::Options evaluator_options_; - scoped_ptr<LinearSolver> linear_solver_; }; } // namespace internal
diff --git a/internal/ceres/dense_jacobian_writer.h b/internal/ceres/dense_jacobian_writer.h index 1177b83..be743a8 100644 --- a/internal/ceres/dense_jacobian_writer.h +++ b/internal/ceres/dense_jacobian_writer.h
@@ -62,7 +62,8 @@ SparseMatrix* CreateJacobian() const { return new DenseSparseMatrix(program_->NumResiduals(), - program_->NumEffectiveParameters()); + program_->NumEffectiveParameters(), + true); } void Write(int residual_id, @@ -87,10 +88,10 @@ continue; } - int parameter_block_size = parameter_block->LocalSize(); - MatrixRef parameter_jacobian(jacobians[j], - num_residuals, - parameter_block_size); + const int parameter_block_size = parameter_block->LocalSize(); + ConstMatrixRef parameter_jacobian(jacobians[j], + num_residuals, + parameter_block_size); dense_jacobian->mutable_matrix().block( residual_offset,
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc index 2b329ee..e2463c8 100644 --- a/internal/ceres/dense_qr_solver.cc +++ b/internal/ceres/dense_qr_solver.cc
@@ -62,17 +62,20 @@ } // rhs = [b;0] to account for the additional rows in the lhs. - Vector rhs(num_rows + ((per_solve_options.D != NULL) ? num_cols : 0)); - rhs.setZero(); - rhs.head(num_rows) = ConstVectorRef(b, num_rows); + const int augmented_num_rows = num_rows + ((per_solve_options.D != NULL) ? num_cols : 0); + if (rhs_.rows() != augmented_num_rows) { + rhs_.resize(augmented_num_rows); + rhs_.setZero(); + } + rhs_.head(num_rows) = ConstVectorRef(b, num_rows); // Solve the system. - VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs); + VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_); VLOG(3) << "A:\n" << A->matrix(); VLOG(3) << "x:\n" << VectorRef(x, num_cols); - VLOG(3) << "b:\n" << rhs; - VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs).norm(); + VLOG(3) << "b:\n" << rhs_; + VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs_).norm(); if (per_solve_options.D != NULL) {
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h index dd683a8..f78fa72 100644 --- a/internal/ceres/dense_qr_solver.h +++ b/internal/ceres/dense_qr_solver.h
@@ -33,6 +33,7 @@ #define CERES_INTERNAL_DENSE_QR_SOLVER_H_ #include "ceres/linear_solver.h" +#include "ceres/internal/eigen.h" #include "ceres/internal/macros.h" namespace ceres { @@ -90,6 +91,7 @@ double* x); const LinearSolver::Options options_; + Vector rhs_; CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver); };
diff --git a/internal/ceres/dense_sparse_matrix.cc b/internal/ceres/dense_sparse_matrix.cc index 86730cc..7fb7b83 100644 --- a/internal/ceres/dense_sparse_matrix.cc +++ b/internal/ceres/dense_sparse_matrix.cc
@@ -47,6 +47,18 @@ m_.setZero(); } +DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal) + : has_diagonal_appended_(false), + has_diagonal_reserved_(reserve_diagonal) { + // Allocate enough space for the diagonal. + if (reserve_diagonal) { + m_.resize(num_rows + num_cols, num_cols); + } else { + m_.resize(num_rows, num_cols); + } + m_.setZero(); +} + DenseSparseMatrix::DenseSparseMatrix(const TripletSparseMatrix& m) : m_(Eigen::MatrixXd::Zero(m.num_rows(), m.num_cols())), has_diagonal_appended_(false),
diff --git a/internal/ceres/dense_sparse_matrix.h b/internal/ceres/dense_sparse_matrix.h index e7ad14d..1e4d499 100644 --- a/internal/ceres/dense_sparse_matrix.h +++ b/internal/ceres/dense_sparse_matrix.h
@@ -57,6 +57,7 @@ #endif DenseSparseMatrix(int num_rows, int num_cols); + DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal); virtual ~DenseSparseMatrix() {}