Improve the performance of DenseQRSolver
1. Reduce amount of reallocations.
Change-Id: I91b17c781ae94ed12014d647f0162cfce4f6ed7b
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc
index 370b004..72edfe2 100644
--- a/internal/ceres/coordinate_descent_minimizer.cc
+++ b/internal/ceres/coordinate_descent_minimizer.cc
@@ -106,11 +106,6 @@
}
}
- LinearSolver::Options linear_solver_options;
- linear_solver_options.type = DENSE_QR;
- linear_solver_.reset(LinearSolver::Create(linear_solver_options));
- CHECK_NOTNULL(linear_solver_.get());
-
evaluator_options_.linear_solver_type = DENSE_QR;
evaluator_options_.num_eliminate_blocks = 0;
evaluator_options_.num_threads = 1;
@@ -129,6 +124,15 @@
parameter_block->SetConstant();
}
+ scoped_array<LinearSolver*> linear_solvers(new LinearSolver*[options.num_threads]);
+
+ LinearSolver::Options linear_solver_options;
+ linear_solver_options.type = DENSE_QR;
+
+ for (int i = 0; i < options.num_threads; ++i) {
+ linear_solvers[i] = LinearSolver::Create(linear_solver_options);
+ }
+
for (int i = 0; i < independent_set_offsets_.size() - 1; ++i) {
// No point paying the price for an OpemMP call if the set if of
// size zero.
@@ -142,6 +146,11 @@
for (int j = independent_set_offsets_[i];
j < independent_set_offsets_[i + 1];
++j) {
+#ifdef CERES_USE_OPENMP
+ int thread_id = omp_get_thread_num();
+#else
+ int thread_id = 0;
+#endif
ParameterBlock* parameter_block = parameter_blocks_[j];
const int old_index = parameter_block->index();
@@ -164,6 +173,7 @@
// we are fine.
Solver::Summary inner_summary;
Solve(&inner_program,
+ linear_solvers[thread_id],
parameters + parameter_block->state_offset(),
&inner_summary);
@@ -177,10 +187,15 @@
for (int i = 0; i < parameter_blocks_.size(); ++i) {
parameter_blocks_[i]->SetVarying();
}
+
+ for (int i = 0; i < options.num_threads; ++i) {
+ delete linear_solvers[i];
+ }
}
// Solve the optimization problem for one parameter block.
void CoordinateDescentMinimizer::Solve(Program* program,
+ LinearSolver* linear_solver,
double* parameter,
Solver::Summary* summary) {
*summary = Solver::Summary();
@@ -196,11 +211,11 @@
scoped_ptr<SparseMatrix> jacobian(evaluator->CreateJacobian());
CHECK_NOTNULL(jacobian.get());
- TrustRegionStrategy::Options trust_region_strategy_options;
- trust_region_strategy_options.linear_solver = linear_solver_.get();
+ TrustRegionStrategy::Options trs_options;
+ trs_options.linear_solver = linear_solver;
+
scoped_ptr<TrustRegionStrategy>trust_region_strategy(
- TrustRegionStrategy::Create(trust_region_strategy_options));
- CHECK_NOTNULL(trust_region_strategy.get());
+ CHECK_NOTNULL(TrustRegionStrategy::Create(trs_options)));
Minimizer::Options minimizer_options;
minimizer_options.evaluator = evaluator.get();
diff --git a/internal/ceres/coordinate_descent_minimizer.h b/internal/ceres/coordinate_descent_minimizer.h
index d7ff4db..fb96b3c 100644
--- a/internal/ceres/coordinate_descent_minimizer.h
+++ b/internal/ceres/coordinate_descent_minimizer.h
@@ -65,6 +65,7 @@
private:
void Solve(Program* program,
+ LinearSolver* linear_solver,
double* parameters,
Solver::Summary* summary);
@@ -77,7 +78,6 @@
vector<int> independent_set_offsets_;
Evaluator::Options evaluator_options_;
- scoped_ptr<LinearSolver> linear_solver_;
};
} // namespace internal
diff --git a/internal/ceres/dense_jacobian_writer.h b/internal/ceres/dense_jacobian_writer.h
index 1177b83..be743a8 100644
--- a/internal/ceres/dense_jacobian_writer.h
+++ b/internal/ceres/dense_jacobian_writer.h
@@ -62,7 +62,8 @@
SparseMatrix* CreateJacobian() const {
return new DenseSparseMatrix(program_->NumResiduals(),
- program_->NumEffectiveParameters());
+ program_->NumEffectiveParameters(),
+ true);
}
void Write(int residual_id,
@@ -87,10 +88,10 @@
continue;
}
- int parameter_block_size = parameter_block->LocalSize();
- MatrixRef parameter_jacobian(jacobians[j],
- num_residuals,
- parameter_block_size);
+ const int parameter_block_size = parameter_block->LocalSize();
+ ConstMatrixRef parameter_jacobian(jacobians[j],
+ num_residuals,
+ parameter_block_size);
dense_jacobian->mutable_matrix().block(
residual_offset,
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 2b329ee..e2463c8 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -62,17 +62,20 @@
}
// rhs = [b;0] to account for the additional rows in the lhs.
- Vector rhs(num_rows + ((per_solve_options.D != NULL) ? num_cols : 0));
- rhs.setZero();
- rhs.head(num_rows) = ConstVectorRef(b, num_rows);
+ const int augmented_num_rows = num_rows + ((per_solve_options.D != NULL) ? num_cols : 0);
+ if (rhs_.rows() != augmented_num_rows) {
+ rhs_.resize(augmented_num_rows);
+ rhs_.setZero();
+ }
+ rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
// Solve the system.
- VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs);
+ VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_);
VLOG(3) << "A:\n" << A->matrix();
VLOG(3) << "x:\n" << VectorRef(x, num_cols);
- VLOG(3) << "b:\n" << rhs;
- VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs).norm();
+ VLOG(3) << "b:\n" << rhs_;
+ VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs_).norm();
if (per_solve_options.D != NULL) {
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h
index dd683a8..f78fa72 100644
--- a/internal/ceres/dense_qr_solver.h
+++ b/internal/ceres/dense_qr_solver.h
@@ -33,6 +33,7 @@
#define CERES_INTERNAL_DENSE_QR_SOLVER_H_
#include "ceres/linear_solver.h"
+#include "ceres/internal/eigen.h"
#include "ceres/internal/macros.h"
namespace ceres {
@@ -90,6 +91,7 @@
double* x);
const LinearSolver::Options options_;
+ Vector rhs_;
CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver);
};
diff --git a/internal/ceres/dense_sparse_matrix.cc b/internal/ceres/dense_sparse_matrix.cc
index 86730cc..7fb7b83 100644
--- a/internal/ceres/dense_sparse_matrix.cc
+++ b/internal/ceres/dense_sparse_matrix.cc
@@ -47,6 +47,18 @@
m_.setZero();
}
+DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal)
+ : has_diagonal_appended_(false),
+ has_diagonal_reserved_(reserve_diagonal) {
+ // Allocate enough space for the diagonal.
+ if (reserve_diagonal) {
+ m_.resize(num_rows + num_cols, num_cols);
+ } else {
+ m_.resize(num_rows, num_cols);
+ }
+ m_.setZero();
+}
+
DenseSparseMatrix::DenseSparseMatrix(const TripletSparseMatrix& m)
: m_(Eigen::MatrixXd::Zero(m.num_rows(), m.num_cols())),
has_diagonal_appended_(false),
diff --git a/internal/ceres/dense_sparse_matrix.h b/internal/ceres/dense_sparse_matrix.h
index e7ad14d..1e4d499 100644
--- a/internal/ceres/dense_sparse_matrix.h
+++ b/internal/ceres/dense_sparse_matrix.h
@@ -57,6 +57,7 @@
#endif
DenseSparseMatrix(int num_rows, int num_cols);
+ DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal);
virtual ~DenseSparseMatrix() {}