Fix calculation of Solver::Summary::num_threads_used. - Previously we were only bounding num_threads_used based on whether CERES_NO_THREADS was defined, meaning that we could erroneously report a value larger than the number of threads actually used. Change-Id: I7373c0c968f9be268c8b7ab0b9561ae31700fda6
diff --git a/internal/ceres/parallel_for.h b/internal/ceres/parallel_for.h index e54a1b3..2da2320 100644 --- a/internal/ceres/parallel_for.h +++ b/internal/ceres/parallel_for.h
@@ -38,6 +38,10 @@ namespace ceres { namespace internal { +// Returns the maximum number of threads supported by the threading backend +// Ceres was compiled with. +int MaxNumThreadsAvailable(); + // Execute the function for every element in the range [start, end) with at most // num_threads. It will execute all the work on the calling thread if // num_threads is 1.
diff --git a/internal/ceres/parallel_for_cxx.cc b/internal/ceres/parallel_for_cxx.cc index 20a689d..b6ef709 100644 --- a/internal/ceres/parallel_for_cxx.cc +++ b/internal/ceres/parallel_for_cxx.cc
@@ -117,6 +117,10 @@ } // namespace +int MaxNumThreadsAvailable() { + return ThreadPool::MaxNumThreadsAvailable(); +} + // See ParallelFor (below) for more details. void ParallelFor(ContextImpl* context, int start,
diff --git a/internal/ceres/parallel_for_nothreads.cc b/internal/ceres/parallel_for_nothreads.cc index 73f6d30..e8f450a 100644 --- a/internal/ceres/parallel_for_nothreads.cc +++ b/internal/ceres/parallel_for_nothreads.cc
@@ -39,6 +39,8 @@ namespace ceres { namespace internal { +int MaxNumThreadsAvailable() { return 1; } + void ParallelFor(ContextImpl* context, int start, int end,
diff --git a/internal/ceres/parallel_for_openmp.cc b/internal/ceres/parallel_for_openmp.cc index ae35d6b..8afe3b1 100644 --- a/internal/ceres/parallel_for_openmp.cc +++ b/internal/ceres/parallel_for_openmp.cc
@@ -38,10 +38,15 @@ #include "ceres/scoped_thread_token.h" #include "ceres/thread_token_provider.h" #include "glog/logging.h" +#include "omp.h" namespace ceres { namespace internal { +int MaxNumThreadsAvailable() { + return omp_get_max_threads(); +} + void ParallelFor(ContextImpl* context, int start, int end,
diff --git a/internal/ceres/preprocessor.cc b/internal/ceres/preprocessor.cc index 08eacc8..0221914 100644 --- a/internal/ceres/preprocessor.cc +++ b/internal/ceres/preprocessor.cc
@@ -31,6 +31,7 @@ #include "ceres/callbacks.h" #include "ceres/gradient_checking_cost_function.h" #include "ceres/line_search_preprocessor.h" +#include "ceres/parallel_for.h" #include "ceres/preprocessor.h" #include "ceres/problem_impl.h" #include "ceres/solver.h" @@ -56,15 +57,15 @@ } void ChangeNumThreadsIfNeeded(Solver::Options* options) { -#ifdef CERES_NO_THREADS - if (options->num_threads > 1) { + const int num_threads_available = MaxNumThreadsAvailable(); + if (options->num_threads > num_threads_available) { LOG(WARNING) - << "No threading support is compiled into this binary; " - << "only options.num_threads = 1 is supported. Switching " - << "to single threaded mode."; - options->num_threads = 1; + << "Specified options.num_threads: " << options->num_threads + << " exceeds maximum available from the threading model Ceres " + << "was compiled with: " << num_threads_available + << ". Bounding to maximum number available."; + options->num_threads = num_threads_available; } -#endif // CERES_NO_THREADS } void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
diff --git a/internal/ceres/preprocessor.h b/internal/ceres/preprocessor.h index 37e4204..99bd6c0 100644 --- a/internal/ceres/preprocessor.h +++ b/internal/ceres/preprocessor.h
@@ -107,8 +107,9 @@ // Common functions used by various preprocessors. -// If OpenMP support is not available and user has requested more than -// one thread, then set the *_num_threads options as needed to 1. +// If the user has specified a num_threads > the maximum number of threads +// available from the compiled threading model, bound the number of threads +// to the maximum. void ChangeNumThreadsIfNeeded(Solver::Options* options); // Extract the effective parameter vector from the preprocessed
diff --git a/internal/ceres/thread_pool.cc b/internal/ceres/thread_pool.cc index 8fc7f83..991da30 100644 --- a/internal/ceres/thread_pool.cc +++ b/internal/ceres/thread_pool.cc
@@ -36,6 +36,7 @@ #include "ceres/thread_pool.h" #include <cmath> +#include <limits> namespace ceres { namespace internal { @@ -43,18 +44,20 @@ // Constrain the total number of threads to the amount the hardware can support. int GetNumAllowedThreads(int requested_num_threads) { - const int num_hardware_threads = std::thread::hardware_concurrency(); - // hardware_concurrency() can return 0 if the value is not well defined or not - // computable. - if (num_hardware_threads == 0) { - return requested_num_threads; - } - - return std::min(requested_num_threads, num_hardware_threads); + return std::min(requested_num_threads, ThreadPool::MaxNumThreadsAvailable()); } } // namespace +int ThreadPool::MaxNumThreadsAvailable() { + const int num_hardware_threads = std::thread::hardware_concurrency(); + // hardware_concurrency() can return 0 if the value is not well defined or not + // computable. + return num_hardware_threads == 0 + ? std::numeric_limits<int>::max() + : num_hardware_threads; +} + ThreadPool::ThreadPool() { } ThreadPool::ThreadPool(int num_threads) {
diff --git a/internal/ceres/thread_pool.h b/internal/ceres/thread_pool.h index 228f344..87c58c2 100644 --- a/internal/ceres/thread_pool.h +++ b/internal/ceres/thread_pool.h
@@ -59,6 +59,9 @@ // class ThreadPool { public: + // Returns the maximum number of hardware threads. + static int MaxNumThreadsAvailable(); + // Default constructor with no active threads. We allow instantiating a // thread pool with no threads to support the use case of single threaded // Ceres where everything will be executed on the main thread. For single @@ -66,7 +69,7 @@ // are expensive to create, and no unused threads shown in the debugger. ThreadPool(); - // Instantiates a thread pool with min(num_hardware_threads, num_threads) + // Instantiates a thread pool with min(MaxNumThreadsAvailable, num_threads) // number of threads. explicit ThreadPool(int num_threads); @@ -75,7 +78,7 @@ ~ThreadPool(); // Resizes the thread pool if it is currently less than the requested number - // of threads. The thread pool will be resized to min(num_hardware_threads, + // of threads. The thread pool will be resized to min(MaxNumThreadsAvailable, // num_threads) number of threads. Resize does not support reducing the // thread pool size. If a smaller number of threads is requested, the thread // pool remains the same size. The thread pool is reused within Ceres with