Multithread DENSE_SCHUR
Replace the global lock in BlockRandomAccessDenseMatrix
with a per cell lock.
Change-Id: Iddbe38616157b6e0d3770eede3335a056c3ba18c
diff --git a/internal/ceres/block_random_access_dense_matrix.cc b/internal/ceres/block_random_access_dense_matrix.cc
index aedfc74..e582279 100644
--- a/internal/ceres/block_random_access_dense_matrix.cc
+++ b/internal/ceres/block_random_access_dense_matrix.cc
@@ -40,16 +40,21 @@
BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
const vector<int>& blocks) {
- block_layout_.resize(blocks.size(), 0);
+ const int num_blocks = blocks.size();
+ block_layout_.resize(num_blocks, 0);
num_rows_ = 0;
- for (int i = 0; i < blocks.size(); ++i) {
+ for (int i = 0; i < num_blocks; ++i) {
block_layout_[i] = num_rows_;
num_rows_ += blocks[i];
}
values_.reset(new double[num_rows_ * num_rows_]);
- CHECK_NOTNULL(values_.get());
- cell_info_.values = values_.get();
+
+ cell_infos_.reset(new CellInfo[num_blocks * num_blocks]);
+ for (int i = 0; i < num_blocks * num_blocks; ++i) {
+ cell_infos_[i].values = values_.get();
+ }
+
SetZero();
}
@@ -68,7 +73,7 @@
*col = block_layout_[col_block_id];
*row_stride = num_rows_;
*col_stride = num_rows_;
- return &cell_info_;
+ return &cell_infos_[row_block_id * block_layout_.size() + col_block_id];
}
// Assume that the user does not hold any locks on any cell blocks
diff --git a/internal/ceres/block_random_access_dense_matrix.h b/internal/ceres/block_random_access_dense_matrix.h
index 9f27a4c..d160fd9 100644
--- a/internal/ceres/block_random_access_dense_matrix.h
+++ b/internal/ceres/block_random_access_dense_matrix.h
@@ -84,10 +84,10 @@
double* mutable_values() { return values_.get(); }
private:
- CellInfo cell_info_;
int num_rows_;
vector<int> block_layout_;
scoped_array<double> values_;
+ scoped_array<CellInfo> cell_infos_;
CERES_DISALLOW_COPY_AND_ASSIGN(BlockRandomAccessDenseMatrix);
};
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 0ef0a27..e18d3b9 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -1153,20 +1153,6 @@
options->sparse_linear_algebra_library;
linear_solver_options.num_threads = options->num_linear_solver_threads;
- // The matrix used for storing the dense Schur complement has a
- // single lock guarding the whole matrix. Running the
- // SchurComplementSolver with multiple threads leads to maximum
- // contention and slowdown. If the problem is large enough to
- // benefit from a multithreaded schur eliminator, you should be
- // using a SPARSE_SCHUR solver anyways.
- if ((linear_solver_options.num_threads > 1) &&
- (linear_solver_options.type == DENSE_SCHUR)) {
- LOG(WARNING) << "Warning: Solver::Options::num_linear_solver_threads = "
- << options->num_linear_solver_threads
- << " with DENSE_SCHUR will result in poor performance; "
- << "switching to single-threaded.";
- linear_solver_options.num_threads = 1;
- }
options->num_linear_solver_threads = linear_solver_options.num_threads;
linear_solver_options.use_block_amd = options->use_block_amd;
diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc
index 2471ea2..24860d2 100644
--- a/internal/ceres/solver_impl_test.cc
+++ b/internal/ceres/solver_impl_test.cc
@@ -561,7 +561,7 @@
SolverImpl::CreateLinearSolver(&options, &error));
EXPECT_TRUE(solver != NULL);
EXPECT_EQ(options.linear_solver_type, DENSE_SCHUR);
- EXPECT_EQ(options.num_linear_solver_threads, 1);
+ EXPECT_EQ(options.num_linear_solver_threads, 2);
}
TEST(SolverImpl, CreateIterativeLinearSolverForDogleg) {