Fix calculation of Solver::Summary::num_threads_used.
- Previously we were only bounding num_threads_used based on whether
CERES_NO_THREADS was defined, meaning that we could erroneously report
a value larger than the number of threads actually used.
Change-Id: I7373c0c968f9be268c8b7ab0b9561ae31700fda6
diff --git a/internal/ceres/parallel_for.h b/internal/ceres/parallel_for.h
index e54a1b3..2da2320 100644
--- a/internal/ceres/parallel_for.h
+++ b/internal/ceres/parallel_for.h
@@ -38,6 +38,10 @@
namespace ceres {
namespace internal {
+// Returns the maximum number of threads supported by the threading backend
+// Ceres was compiled with.
+int MaxNumThreadsAvailable();
+
// Execute the function for every element in the range [start, end) with at most
// num_threads. It will execute all the work on the calling thread if
// num_threads is 1.
diff --git a/internal/ceres/parallel_for_cxx.cc b/internal/ceres/parallel_for_cxx.cc
index 20a689d..b6ef709 100644
--- a/internal/ceres/parallel_for_cxx.cc
+++ b/internal/ceres/parallel_for_cxx.cc
@@ -117,6 +117,10 @@
} // namespace
+int MaxNumThreadsAvailable() {
+ return ThreadPool::MaxNumThreadsAvailable();
+}
+
// See ParallelFor (below) for more details.
void ParallelFor(ContextImpl* context,
int start,
diff --git a/internal/ceres/parallel_for_nothreads.cc b/internal/ceres/parallel_for_nothreads.cc
index 73f6d30..e8f450a 100644
--- a/internal/ceres/parallel_for_nothreads.cc
+++ b/internal/ceres/parallel_for_nothreads.cc
@@ -39,6 +39,8 @@
namespace ceres {
namespace internal {
+int MaxNumThreadsAvailable() { return 1; }
+
void ParallelFor(ContextImpl* context,
int start,
int end,
diff --git a/internal/ceres/parallel_for_openmp.cc b/internal/ceres/parallel_for_openmp.cc
index ae35d6b..8afe3b1 100644
--- a/internal/ceres/parallel_for_openmp.cc
+++ b/internal/ceres/parallel_for_openmp.cc
@@ -38,10 +38,15 @@
#include "ceres/scoped_thread_token.h"
#include "ceres/thread_token_provider.h"
#include "glog/logging.h"
+#include "omp.h"
namespace ceres {
namespace internal {
+int MaxNumThreadsAvailable() {
+ return omp_get_max_threads();
+}
+
void ParallelFor(ContextImpl* context,
int start,
int end,
diff --git a/internal/ceres/preprocessor.cc b/internal/ceres/preprocessor.cc
index 08eacc8..0221914 100644
--- a/internal/ceres/preprocessor.cc
+++ b/internal/ceres/preprocessor.cc
@@ -31,6 +31,7 @@
#include "ceres/callbacks.h"
#include "ceres/gradient_checking_cost_function.h"
#include "ceres/line_search_preprocessor.h"
+#include "ceres/parallel_for.h"
#include "ceres/preprocessor.h"
#include "ceres/problem_impl.h"
#include "ceres/solver.h"
@@ -56,15 +57,15 @@
}
void ChangeNumThreadsIfNeeded(Solver::Options* options) {
-#ifdef CERES_NO_THREADS
- if (options->num_threads > 1) {
+ const int num_threads_available = MaxNumThreadsAvailable();
+ if (options->num_threads > num_threads_available) {
LOG(WARNING)
- << "No threading support is compiled into this binary; "
- << "only options.num_threads = 1 is supported. Switching "
- << "to single threaded mode.";
- options->num_threads = 1;
+ << "Specified options.num_threads: " << options->num_threads
+ << " exceeds maximum available from the threading model Ceres "
+ << "was compiled with: " << num_threads_available
+ << ". Bounding to maximum number available.";
+ options->num_threads = num_threads_available;
}
-#endif // CERES_NO_THREADS
}
void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
diff --git a/internal/ceres/preprocessor.h b/internal/ceres/preprocessor.h
index 37e4204..99bd6c0 100644
--- a/internal/ceres/preprocessor.h
+++ b/internal/ceres/preprocessor.h
@@ -107,8 +107,9 @@
// Common functions used by various preprocessors.
-// If OpenMP support is not available and user has requested more than
-// one thread, then set the *_num_threads options as needed to 1.
+// If the user has specified a num_threads > the maximum number of threads
+// available from the compiled threading model, bound the number of threads
+// to the maximum.
void ChangeNumThreadsIfNeeded(Solver::Options* options);
// Extract the effective parameter vector from the preprocessed
diff --git a/internal/ceres/thread_pool.cc b/internal/ceres/thread_pool.cc
index 8fc7f83..991da30 100644
--- a/internal/ceres/thread_pool.cc
+++ b/internal/ceres/thread_pool.cc
@@ -36,6 +36,7 @@
#include "ceres/thread_pool.h"
#include <cmath>
+#include <limits>
namespace ceres {
namespace internal {
@@ -43,18 +44,20 @@
// Constrain the total number of threads to the amount the hardware can support.
int GetNumAllowedThreads(int requested_num_threads) {
- const int num_hardware_threads = std::thread::hardware_concurrency();
- // hardware_concurrency() can return 0 if the value is not well defined or not
- // computable.
- if (num_hardware_threads == 0) {
- return requested_num_threads;
- }
-
- return std::min(requested_num_threads, num_hardware_threads);
+ return std::min(requested_num_threads, ThreadPool::MaxNumThreadsAvailable());
}
} // namespace
+int ThreadPool::MaxNumThreadsAvailable() {
+ const int num_hardware_threads = std::thread::hardware_concurrency();
+ // hardware_concurrency() can return 0 if the value is not well defined or not
+ // computable.
+ return num_hardware_threads == 0
+ ? std::numeric_limits<int>::max()
+ : num_hardware_threads;
+}
+
ThreadPool::ThreadPool() { }
ThreadPool::ThreadPool(int num_threads) {
diff --git a/internal/ceres/thread_pool.h b/internal/ceres/thread_pool.h
index 228f344..87c58c2 100644
--- a/internal/ceres/thread_pool.h
+++ b/internal/ceres/thread_pool.h
@@ -59,6 +59,9 @@
//
class ThreadPool {
public:
+ // Returns the maximum number of hardware threads.
+ static int MaxNumThreadsAvailable();
+
// Default constructor with no active threads. We allow instantiating a
// thread pool with no threads to support the use case of single threaded
// Ceres where everything will be executed on the main thread. For single
@@ -66,7 +69,7 @@
// are expensive to create, and no unused threads shown in the debugger.
ThreadPool();
- // Instantiates a thread pool with min(num_hardware_threads, num_threads)
+ // Instantiates a thread pool with min(MaxNumThreadsAvailable, num_threads)
// number of threads.
explicit ThreadPool(int num_threads);
@@ -75,7 +78,7 @@
~ThreadPool();
// Resizes the thread pool if it is currently less than the requested number
- // of threads. The thread pool will be resized to min(num_hardware_threads,
+ // of threads. The thread pool will be resized to min(MaxNumThreadsAvailable,
// num_threads) number of threads. Resize does not support reducing the
// thread pool size. If a smaller number of threads is requested, the thread
// pool remains the same size. The thread pool is reused within Ceres with