Improve multithreading when using inner iterations. Inner iterations by default would use problems where the evaluator was configured to use exactly one thread for doing the evaluation. This is fine when there are multiple inner iteration problems being executed concurrently, but every now and then there are problem decompositions where there is just one parameter block in the current independent set and it touches every single residual block. In such cases it is essential that the evaluator be configured to use multiple threads. We now pay attention to the size of the independent set and dynamically configure the number of threads being used by the outer loop and the evaluator loop. Thanks to William Rucklidge for reporting this issue and providing a test problem to debug. Change-Id: Iaff9a4ab6d2658cf7b61ea213575d23aab604e3b

commit: 94c6e7d27b5d48d81ab54ed9cdcbc55c3c099311 [log] [tgz]
author: Sameer Agarwal <sameeragarwal@google.com> Wed Oct 01 15:55:13 2014 -0700
committer: Sameer Agarwal <sameeragarwal@google.com> Wed Oct 01 16:00:26 2014 -0700
tree: e1c25985f9007006048313721164cdac4e12a8f4
parent: 9e11cd16d09403b9270e621e839d5948b6a74b8d [diff]
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc
index 1d55458..535d6e1 100644
--- a/internal/ceres/coordinate_descent_minimizer.cc
+++ b/internal/ceres/coordinate_descent_minimizer.cc

@@ -1,5 +1,5 @@
 // Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2012 Google Inc. All rights reserved.
+// Copyright 2014 Google Inc. All rights reserved.
 // http://code.google.com/p/ceres-solver/
 //
 // Redistribution and use in source and binary forms, with or without
@@ -140,15 +140,23 @@
   }
 
   for (int i = 0; i < independent_set_offsets_.size() - 1; ++i) {
-    // No point paying the price for an OpemMP call if the set if of
+    const int num_problems =
+        independent_set_offsets_[i + 1] - independent_set_offsets_[i];
+    // No point paying the price for an OpemMP call if the set is of
     // size zero.
-    if (independent_set_offsets_[i] ==  independent_set_offsets_[i + 1]) {
+    if (num_problems == 0) {
       continue;
     }
 
+#ifdef CERES_USE_OPENMP
+    const int num_inner_iteration_threads = min(options.num_threads, num_problems);
+    evaluator_options_.num_threads =
+        max(1, options.num_threads / num_inner_iteration_threads);
+
     // The parameter blocks in each independent set can be optimized
     // in parallel, since they do not co-occur in any residual block.
-#pragma omp parallel for num_threads(options.num_threads)
+#pragma omp parallel for num_threads(num_inner_iteration_threads)
+#endif
     for (int j = independent_set_offsets_[i];
          j < independent_set_offsets_[i + 1];
          ++j) {
commit	94c6e7d27b5d48d81ab54ed9cdcbc55c3c099311	[log] [tgz]
author	Sameer Agarwal <sameeragarwal@google.com>	Wed Oct 01 15:55:13 2014 -0700
committer	Sameer Agarwal <sameeragarwal@google.com>	Wed Oct 01 16:00:26 2014 -0700
tree	e1c25985f9007006048313721164cdac4e12a8f4
parent	9e11cd16d09403b9270e621e839d5948b6a74b8d [diff]