Fix calculation of Solver::Summary::num_threads_used.

- Previously we were only bounding num_threads_used based on whether
  CERES_NO_THREADS was defined, meaning that we could erroneously report
  a value larger than the number of threads actually used.

Change-Id: I7373c0c968f9be268c8b7ab0b9561ae31700fda6
diff --git a/internal/ceres/parallel_for.h b/internal/ceres/parallel_for.h
index e54a1b3..2da2320 100644
--- a/internal/ceres/parallel_for.h
+++ b/internal/ceres/parallel_for.h
@@ -38,6 +38,10 @@
 namespace ceres {
 namespace internal {
 
+// Returns the maximum number of threads supported by the threading backend
+// Ceres was compiled with.
+int MaxNumThreadsAvailable();
+
 // Execute the function for every element in the range [start, end) with at most
 // num_threads. It will execute all the work on the calling thread if
 // num_threads is 1.
diff --git a/internal/ceres/parallel_for_cxx.cc b/internal/ceres/parallel_for_cxx.cc
index 20a689d..b6ef709 100644
--- a/internal/ceres/parallel_for_cxx.cc
+++ b/internal/ceres/parallel_for_cxx.cc
@@ -117,6 +117,10 @@
 
 }  // namespace
 
+int MaxNumThreadsAvailable() {
+  return ThreadPool::MaxNumThreadsAvailable();
+}
+
 // See ParallelFor (below) for more details.
 void ParallelFor(ContextImpl* context,
                  int start,
diff --git a/internal/ceres/parallel_for_nothreads.cc b/internal/ceres/parallel_for_nothreads.cc
index 73f6d30..e8f450a 100644
--- a/internal/ceres/parallel_for_nothreads.cc
+++ b/internal/ceres/parallel_for_nothreads.cc
@@ -39,6 +39,8 @@
 namespace ceres {
 namespace internal {
 
+int MaxNumThreadsAvailable() { return 1; }
+
 void ParallelFor(ContextImpl* context,
                  int start,
                  int end,
diff --git a/internal/ceres/parallel_for_openmp.cc b/internal/ceres/parallel_for_openmp.cc
index ae35d6b..8afe3b1 100644
--- a/internal/ceres/parallel_for_openmp.cc
+++ b/internal/ceres/parallel_for_openmp.cc
@@ -38,10 +38,15 @@
 #include "ceres/scoped_thread_token.h"
 #include "ceres/thread_token_provider.h"
 #include "glog/logging.h"
+#include "omp.h"
 
 namespace ceres {
 namespace internal {
 
+int MaxNumThreadsAvailable() {
+  return omp_get_max_threads();
+}
+
 void ParallelFor(ContextImpl* context,
                  int start,
                  int end,
diff --git a/internal/ceres/preprocessor.cc b/internal/ceres/preprocessor.cc
index 08eacc8..0221914 100644
--- a/internal/ceres/preprocessor.cc
+++ b/internal/ceres/preprocessor.cc
@@ -31,6 +31,7 @@
 #include "ceres/callbacks.h"
 #include "ceres/gradient_checking_cost_function.h"
 #include "ceres/line_search_preprocessor.h"
+#include "ceres/parallel_for.h"
 #include "ceres/preprocessor.h"
 #include "ceres/problem_impl.h"
 #include "ceres/solver.h"
@@ -56,15 +57,15 @@
 }
 
 void ChangeNumThreadsIfNeeded(Solver::Options* options) {
-#ifdef CERES_NO_THREADS
-  if (options->num_threads > 1) {
+  const int num_threads_available = MaxNumThreadsAvailable();
+  if (options->num_threads > num_threads_available) {
     LOG(WARNING)
-        << "No threading support is compiled into this binary; "
-        << "only options.num_threads = 1 is supported. Switching "
-        << "to single threaded mode.";
-    options->num_threads = 1;
+        << "Specified options.num_threads: " << options->num_threads
+        << " exceeds maximum available from the threading model Ceres "
+        << "was compiled with: " << num_threads_available
+        << ".  Bounding to maximum number available.";
+    options->num_threads = num_threads_available;
   }
-#endif  // CERES_NO_THREADS
 }
 
 void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
diff --git a/internal/ceres/preprocessor.h b/internal/ceres/preprocessor.h
index 37e4204..99bd6c0 100644
--- a/internal/ceres/preprocessor.h
+++ b/internal/ceres/preprocessor.h
@@ -107,8 +107,9 @@
 
 // Common functions used by various preprocessors.
 
-// If OpenMP support is not available and user has requested more than
-// one thread, then set the *_num_threads options as needed to 1.
+// If the user has specified a num_threads > the maximum number of threads
+// available from the compiled threading model, bound the number of threads
+// to the maximum.
 void ChangeNumThreadsIfNeeded(Solver::Options* options);
 
 // Extract the effective parameter vector from the preprocessed
diff --git a/internal/ceres/thread_pool.cc b/internal/ceres/thread_pool.cc
index 8fc7f83..991da30 100644
--- a/internal/ceres/thread_pool.cc
+++ b/internal/ceres/thread_pool.cc
@@ -36,6 +36,7 @@
 #include "ceres/thread_pool.h"
 
 #include <cmath>
+#include <limits>
 
 namespace ceres {
 namespace internal {
@@ -43,18 +44,20 @@
 
 // Constrain the total number of threads to the amount the hardware can support.
 int GetNumAllowedThreads(int requested_num_threads) {
-  const int num_hardware_threads = std::thread::hardware_concurrency();
-  // hardware_concurrency() can return 0 if the value is not well defined or not
-  // computable.
-  if (num_hardware_threads == 0) {
-    return requested_num_threads;
-  }
-
-  return std::min(requested_num_threads, num_hardware_threads);
+  return std::min(requested_num_threads, ThreadPool::MaxNumThreadsAvailable());
 }
 
 }  // namespace
 
+int ThreadPool::MaxNumThreadsAvailable() {
+  const int num_hardware_threads = std::thread::hardware_concurrency();
+  // hardware_concurrency() can return 0 if the value is not well defined or not
+  // computable.
+  return num_hardware_threads == 0
+      ? std::numeric_limits<int>::max()
+      : num_hardware_threads;
+}
+
 ThreadPool::ThreadPool() { }
 
 ThreadPool::ThreadPool(int num_threads) {
diff --git a/internal/ceres/thread_pool.h b/internal/ceres/thread_pool.h
index 228f344..87c58c2 100644
--- a/internal/ceres/thread_pool.h
+++ b/internal/ceres/thread_pool.h
@@ -59,6 +59,9 @@
 //
 class ThreadPool {
  public:
+  // Returns the maximum number of hardware threads.
+  static int MaxNumThreadsAvailable();
+
   // Default constructor with no active threads.  We allow instantiating a
   // thread pool with no threads to support the use case of single threaded
   // Ceres where everything will be executed on the main thread. For single
@@ -66,7 +69,7 @@
   // are expensive to create, and no unused threads shown in the debugger.
   ThreadPool();
 
-  // Instantiates a thread pool with min(num_hardware_threads, num_threads)
+  // Instantiates a thread pool with min(MaxNumThreadsAvailable, num_threads)
   // number of threads.
   explicit ThreadPool(int num_threads);
 
@@ -75,7 +78,7 @@
   ~ThreadPool();
 
   // Resizes the thread pool if it is currently less than the requested number
-  // of threads.  The thread pool will be resized to min(num_hardware_threads,
+  // of threads.  The thread pool will be resized to min(MaxNumThreadsAvailable,
   // num_threads) number of threads.  Resize does not support reducing the
   // thread pool size.  If a smaller number of threads is requested, the thread
   // pool remains the same size.  The thread pool is reused within Ceres with