Multithread DENSE_SCHUR

Replace the global lock in BlockRandomAccessDenseMatrix
with a per cell lock.

Change-Id: Iddbe38616157b6e0d3770eede3335a056c3ba18c
diff --git a/internal/ceres/block_random_access_dense_matrix.cc b/internal/ceres/block_random_access_dense_matrix.cc
index aedfc74..e582279 100644
--- a/internal/ceres/block_random_access_dense_matrix.cc
+++ b/internal/ceres/block_random_access_dense_matrix.cc
@@ -40,16 +40,21 @@
 
 BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
     const vector<int>& blocks) {
-  block_layout_.resize(blocks.size(), 0);
+  const int num_blocks = blocks.size();
+  block_layout_.resize(num_blocks, 0);
   num_rows_ = 0;
-  for (int i = 0; i < blocks.size(); ++i) {
+  for (int i = 0; i < num_blocks; ++i) {
     block_layout_[i] = num_rows_;
     num_rows_ += blocks[i];
   }
 
   values_.reset(new double[num_rows_ * num_rows_]);
-  CHECK_NOTNULL(values_.get());
-  cell_info_.values = values_.get();
+
+  cell_infos_.reset(new CellInfo[num_blocks * num_blocks]);
+  for (int i = 0; i < num_blocks * num_blocks; ++i) {
+    cell_infos_[i].values = values_.get();
+  }
+
   SetZero();
 }
 
@@ -68,7 +73,7 @@
   *col = block_layout_[col_block_id];
   *row_stride = num_rows_;
   *col_stride = num_rows_;
-  return &cell_info_;
+  return &cell_infos_[row_block_id * block_layout_.size() + col_block_id];
 }
 
 // Assume that the user does not hold any locks on any cell blocks
diff --git a/internal/ceres/block_random_access_dense_matrix.h b/internal/ceres/block_random_access_dense_matrix.h
index 9f27a4c..d160fd9 100644
--- a/internal/ceres/block_random_access_dense_matrix.h
+++ b/internal/ceres/block_random_access_dense_matrix.h
@@ -84,10 +84,10 @@
   double* mutable_values() { return values_.get(); }
 
  private:
-  CellInfo cell_info_;
   int num_rows_;
   vector<int> block_layout_;
   scoped_array<double> values_;
+  scoped_array<CellInfo> cell_infos_;
 
   CERES_DISALLOW_COPY_AND_ASSIGN(BlockRandomAccessDenseMatrix);
 };
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 0ef0a27..e18d3b9 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -1153,20 +1153,6 @@
       options->sparse_linear_algebra_library;
 
   linear_solver_options.num_threads = options->num_linear_solver_threads;
-  // The matrix used for storing the dense Schur complement has a
-  // single lock guarding the whole matrix. Running the
-  // SchurComplementSolver with multiple threads leads to maximum
-  // contention and slowdown. If the problem is large enough to
-  // benefit from a multithreaded schur eliminator, you should be
-  // using a SPARSE_SCHUR solver anyways.
-  if ((linear_solver_options.num_threads > 1) &&
-      (linear_solver_options.type == DENSE_SCHUR)) {
-    LOG(WARNING) << "Warning: Solver::Options::num_linear_solver_threads = "
-                 << options->num_linear_solver_threads
-                 << " with DENSE_SCHUR will result in poor performance; "
-                 << "switching to single-threaded.";
-    linear_solver_options.num_threads = 1;
-  }
   options->num_linear_solver_threads = linear_solver_options.num_threads;
 
   linear_solver_options.use_block_amd = options->use_block_amd;
diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc
index 2471ea2..24860d2 100644
--- a/internal/ceres/solver_impl_test.cc
+++ b/internal/ceres/solver_impl_test.cc
@@ -561,7 +561,7 @@
       SolverImpl::CreateLinearSolver(&options, &error));
   EXPECT_TRUE(solver != NULL);
   EXPECT_EQ(options.linear_solver_type, DENSE_SCHUR);
-  EXPECT_EQ(options.num_linear_solver_threads, 1);
+  EXPECT_EQ(options.num_linear_solver_threads, 2);
 }
 
 TEST(SolverImpl, CreateIterativeLinearSolverForDogleg) {