Adds a Ceres Context structure.

A Ceres Context holds common global state that can be re-used within
Ceres.  The Context current contains a thread pool if compiling with
C++11 threading support.  Threads are expensive to create and destroy so
it is good to maintain across multiple Ceres solves.

Tested by compiling with and without TBB support and ran unit tests. Ran
bazel as well.

Change-Id: I82f598dfae642aa0e81a6039dc174608a5e8dbfb
diff --git a/bazel/ceres.bzl b/bazel/ceres.bzl
index ef5969a..f0660f1 100644
--- a/bazel/ceres.bzl
+++ b/bazel/ceres.bzl
@@ -49,6 +49,8 @@
     "compressed_row_sparse_matrix.cc",
     "conditioned_cost_function.cc",
     "conjugate_gradients_solver.cc",
+    "context.cc",
+    "context_impl.cc",
     "coordinate_descent_minimizer.cc",
     "corrector.cc",
     "covariance.cc",
diff --git a/include/ceres/context.h b/include/ceres/context.h
new file mode 100644
index 0000000..63c0a16
--- /dev/null
+++ b/include/ceres/context.h
@@ -0,0 +1,58 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2018 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vitus@google.com (Michael Vitus)
+
+#ifndef CERES_PUBLIC_CONTEXT_H_
+#define CERES_PUBLIC_CONTEXT_H_
+
+#include "ceres/internal/macros.h"
+
+namespace ceres {
+
+// A global context for processing data in Ceres.  This provides a mechanism to
+// allow Ceres to reuse items that are expensive to create between multiple
+// calls; for example, thread pools.  The same Context can be used on multiple
+// Problems, either serially or in parallel. When using it with multiple
+// Problems at the same time, they may end up contending for resources
+// (e.g. threads) managed by the Context.
+class Context {
+ public:
+  Context() {}
+  virtual ~Context() {}
+
+  // Creates a context object and the caller takes ownership.
+  static Context* Create();
+
+ private:
+  CERES_DISALLOW_COPY_AND_ASSIGN(Context);
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_CONTEXT_H_
diff --git a/include/ceres/covariance.h b/include/ceres/covariance.h
index 0538522..685e9f0 100644
--- a/include/ceres/covariance.h
+++ b/include/ceres/covariance.h
@@ -33,10 +33,10 @@
 
 #include <utility>
 #include <vector>
+#include "ceres/internal/disable_warnings.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
-#include "ceres/internal/disable_warnings.h"
 
 namespace ceres {
 
diff --git a/include/ceres/problem.h b/include/ceres/problem.h
index 27ed4ef..e941fff 100644
--- a/include/ceres/problem.h
+++ b/include/ceres/problem.h
@@ -39,13 +39,13 @@
 #include <set>
 #include <vector>
 
-#include "glog/logging.h"
+#include "ceres/context.h"
+#include "ceres/internal/disable_warnings.h"
 #include "ceres/internal/macros.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
-#include "ceres/internal/disable_warnings.h"
-
+#include "glog/logging.h"
 
 namespace ceres {
 
@@ -126,7 +126,8 @@
           loss_function_ownership(TAKE_OWNERSHIP),
           local_parameterization_ownership(TAKE_OWNERSHIP),
           enable_fast_removal(false),
-          disable_all_safety_checks(false) {}
+          disable_all_safety_checks(false),
+          context(NULL) {}
 
     // These flags control whether the Problem object owns the cost
     // functions, loss functions, and parameterizations passed into
@@ -165,6 +166,13 @@
     // WARNING: Do not set this to true, unless you are absolutely sure of what
     // you are doing.
     bool disable_all_safety_checks;
+
+    // A Ceres global context to use for solving this problem. This may help to
+    // reduce computation time as Ceres can reuse expensive objects to create.
+    // The context object can be NULL, in which case Ceres may create one.
+    //
+    // Ceres does NOT take ownership of the pointer.
+    Context* context;
   };
 
   // The default constructor is equivalent to the
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 2ed3fc1..13f520b 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -35,12 +35,12 @@
 #include <string>
 #include <vector>
 #include "ceres/crs_matrix.h"
+#include "ceres/internal/disable_warnings.h"
 #include "ceres/internal/macros.h"
 #include "ceres/internal/port.h"
 #include "ceres/iteration_callback.h"
 #include "ceres/ordered_groups.h"
 #include "ceres/types.h"
-#include "ceres/internal/disable_warnings.h"
 
 namespace ceres {
 
@@ -1059,9 +1059,8 @@
 };
 
 // Helper function which avoids going through the interface.
-CERES_EXPORT void Solve(const Solver::Options& options,
-           Problem* problem,
-           Solver::Summary* summary);
+CERES_EXPORT void Solve(const Solver::Options& options, Problem* problem,
+                        Solver::Summary* summary);
 
 }  // namespace ceres
 
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index bc15bc1..c13e041 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -49,6 +49,8 @@
     compressed_row_sparse_matrix.cc
     conditioned_cost_function.cc
     conjugate_gradients_solver.cc
+    context.cc
+    context_impl.cc
     coordinate_descent_minimizer.cc
     corrector.cc
     covariance.cc
diff --git a/internal/ceres/context.cc b/internal/ceres/context.cc
new file mode 100644
index 0000000..e223201
--- /dev/null
+++ b/internal/ceres/context.cc
@@ -0,0 +1,41 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2018 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vitus@google.com (Michael Vitus)
+
+#include "ceres/context.h"
+
+#include "ceres/context_impl.h"
+
+namespace ceres {
+
+Context* Context::Create() {
+  return new internal::ContextImpl();
+}
+
+}  // namespace ceres
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc
new file mode 100644
index 0000000..1b9662f
--- /dev/null
+++ b/internal/ceres/context_impl.cc
@@ -0,0 +1,43 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2018 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vitus@google.com (Michael Vitus)
+
+#include "ceres/context_impl.h"
+
+namespace ceres {
+namespace internal {
+
+void ContextImpl::EnsureMinimumThreads(int num_threads) {
+#ifdef CERES_USE_CXX11_THREADS
+  thread_pool.Resize(num_threads);
+#endif  // CERES_USE_CXX11_THREADS
+
+}
+}  // namespace internal
+}  // namespace ceres
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h
new file mode 100644
index 0000000..8219ec2
--- /dev/null
+++ b/internal/ceres/context_impl.h
@@ -0,0 +1,68 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2018 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vitus@google.com (Michael Vitus)
+
+#ifndef CERES_INTERNAL_CONTEXT_IMPL_H_
+#define CERES_INTERNAL_CONTEXT_IMPL_H_
+
+// This include must come before any #ifndef check on Ceres compile options.
+#include "ceres/internal/port.h"
+
+#include "ceres/context.h"
+#include "ceres/internal/macros.h"
+
+#ifdef CERES_USE_CXX11_THREADS
+#include "ceres/thread_pool.h"
+#endif  // CERES_USE_CXX11_THREADS
+
+namespace ceres {
+namespace internal {
+
+class ContextImpl : public Context {
+ public:
+  ContextImpl() {}
+  virtual ~ContextImpl() {}
+
+  // When compiled with C++11 threading support, resize the thread pool to have
+  // at min(num_thread, num_hardware_threads) where num_hardware_threads is
+  // defined by the hardware.  Otherwise this call is a no-op.
+  void EnsureMinimumThreads(int num_threads);
+
+#ifdef CERES_USE_CXX11_THREADS
+  ThreadPool thread_pool;
+#endif  // CERES_USE_CXX11_THREADS
+
+ private:
+  CERES_DISALLOW_COPY_AND_ASSIGN(ContextImpl);
+};
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_INTERNAL_CONTEXT_IMPL_H_
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc
index 884fbd2..a334dde 100644
--- a/internal/ceres/coordinate_descent_minimizer.cc
+++ b/internal/ceres/coordinate_descent_minimizer.cc
@@ -30,7 +30,7 @@
 
 #include "ceres/coordinate_descent_minimizer.h"
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 #include "ceres/parallel_for.h"
 #endif
 
@@ -45,11 +45,11 @@
 #include "ceres/problem_impl.h"
 #include "ceres/program.h"
 #include "ceres/residual_block.h"
+#include "ceres/scoped_thread_token.h"
 #include "ceres/solver.h"
+#include "ceres/thread_token_provider.h"
 #include "ceres/trust_region_minimizer.h"
 #include "ceres/trust_region_strategy.h"
-#include "ceres/thread_token_provider.h"
-#include "ceres/scoped_thread_token.h"
 
 namespace ceres {
 namespace internal {
@@ -61,6 +61,9 @@
 using std::string;
 using std::vector;
 
+CoordinateDescentMinimizer::CoordinateDescentMinimizer(ContextImpl* context)
+    : context_(CHECK_NOTNULL(context)) {}
+
 CoordinateDescentMinimizer::~CoordinateDescentMinimizer() {
 }
 
@@ -122,6 +125,7 @@
   evaluator_options_.linear_solver_type = DENSE_QR;
   evaluator_options_.num_eliminate_blocks = 0;
   evaluator_options_.num_threads = 1;
+  evaluator_options_.context = context_;
 
   return true;
 }
@@ -142,6 +146,7 @@
 
   LinearSolver::Options linear_solver_options;
   linear_solver_options.type = DENSE_QR;
+  linear_solver_options.context = context_;
 
   for (int i = 0; i < options.num_threads; ++i) {
     linear_solvers[i] = LinearSolver::Create(linear_solver_options);
@@ -168,16 +173,17 @@
 #pragma omp parallel for num_threads(num_inner_iteration_threads)
 #endif
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
     for (int j = independent_set_offsets_[i];
          j < independent_set_offsets_[i + 1];
          ++j) {
 #else
-    ParallelFor(independent_set_offsets_[i],
+    ParallelFor(context_,
+                independent_set_offsets_[i],
                 independent_set_offsets_[i + 1],
                 num_inner_iteration_threads,
                 [&](int j) {
-#endif // !CERES_USE_TBB
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
       const ScopedThreadToken scoped_thread_token(&thread_token_provider);
       const int thread_id = scoped_thread_token.token();
@@ -212,7 +218,7 @@
       parameter_block->SetState(parameters + parameter_block->state_offset());
       parameter_block->SetConstant();
     }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   );
 #endif
   }
@@ -239,7 +245,7 @@
 
   Minimizer::Options minimizer_options;
   minimizer_options.evaluator.reset(
-      CHECK_NOTNULL(Evaluator::Create(evaluator_options_, program,  &error)));
+      CHECK_NOTNULL(Evaluator::Create(evaluator_options_, program, &error)));
   minimizer_options.jacobian.reset(
       CHECK_NOTNULL(minimizer_options.evaluator->CreateJacobian()));
 
diff --git a/internal/ceres/coordinate_descent_minimizer.h b/internal/ceres/coordinate_descent_minimizer.h
index 25ea04c..0ee193f 100644
--- a/internal/ceres/coordinate_descent_minimizer.h
+++ b/internal/ceres/coordinate_descent_minimizer.h
@@ -34,6 +34,7 @@
 #include <string>
 #include <vector>
 
+#include "ceres/context_impl.h"
 #include "ceres/evaluator.h"
 #include "ceres/minimizer.h"
 #include "ceres/problem_impl.h"
@@ -57,6 +58,8 @@
 // program are constant.
 class CoordinateDescentMinimizer : public Minimizer {
  public:
+  explicit CoordinateDescentMinimizer(ContextImpl* context);
+
   bool Init(const Program& program,
             const ProblemImpl::ParameterMap& parameter_map,
             const ParameterBlockOrdering& ordering,
@@ -64,6 +67,7 @@
 
   // Minimizer interface.
   virtual ~CoordinateDescentMinimizer();
+
   virtual void Minimize(const Minimizer::Options& options,
                         double* parameters,
                         Solver::Summary* summary);
@@ -94,6 +98,8 @@
   std::vector<int> independent_set_offsets_;
 
   Evaluator::Options evaluator_options_;
+
+  ContextImpl* context_;
 };
 
 }  // namespace internal
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc
index f8c510c..f7c7126 100644
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -30,7 +30,7 @@
 
 #include "ceres/covariance_impl.h"
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 #include "ceres/parallel_for.h"
 #endif
 
@@ -86,6 +86,7 @@
     options_.num_threads = 1;
   }
 #endif
+
   evaluate_options_.num_threads = options_.num_threads;
   evaluate_options_.apply_loss_function = options_.apply_loss_function;
 }
@@ -365,18 +366,27 @@
     for (int j = i; j < num_parameters; ++j) {
 #endif // CERES_NO_THREADS
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
+
   // The parallel for abstraction does not have support for constraining the
   // number of workers in nested parallel for loops. Consequently, we will try
   // to evenly distribute the number of workers between the each parallel for
   // loop.
   // TODO(vitus): consolidate the nested for loops into a single loop which can
   // be properly split between the threads.
+  problem_->context()->EnsureMinimumThreads(num_threads);
   const int num_outer_threads = std::sqrt(num_threads);
   const int num_inner_threads = num_threads / num_outer_threads;
-  ParallelFor(0, num_parameters, num_outer_threads, [&](int i) {
-    ParallelFor(i, num_parameters, num_inner_threads, [&](int j) {
-#endif // CERES_USE_TBB
+  ParallelFor(problem_->context(),
+              0,
+              num_parameters,
+              num_outer_threads,
+              [&](int i) {
+    ParallelFor(problem_->context(), i,
+                num_parameters,
+                num_inner_threads,
+                [&](int j) {
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
       int covariance_row_idx = cum_parameter_size[i];
       int covariance_col_idx = cum_parameter_size[j];
@@ -404,12 +414,12 @@
 
       }
     }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
     );
   });
 #else
   }
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   return success;
 }
 
@@ -730,11 +740,12 @@
 #pragma omp parallel for num_threads(num_threads) schedule(dynamic)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
   for (int r = 0; r < num_cols; ++r) {
 #else
-  ParallelFor(0, num_cols, num_threads, [&](int r) {
-#endif // !CERES_USE_TBB
+  problem_->context()->EnsureMinimumThreads(num_threads);
+  ParallelFor(problem_->context(), 0, num_cols, num_threads, [&](int r) {
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
     const int row_begin = rows[r];
     const int row_end = rows[r + 1];
@@ -756,9 +767,9 @@
       }
     }
   }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
   free(permutation);
   cholmod_l_free_sparse(&R, &cc);
@@ -930,11 +941,12 @@
 #pragma omp parallel for num_threads(num_threads) schedule(dynamic)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
   for (int r = 0; r < num_cols; ++r) {
 #else
-  ParallelFor(0, num_cols, num_threads, [&](int r) {
-#endif // !CERES_USE_TBB
+  problem_->context()->EnsureMinimumThreads(num_threads);
+  ParallelFor(problem_->context(), 0, num_cols, num_threads, [&](int r) {
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
     const int row_begin = rows[r];
     const int row_end = rows[r + 1];
@@ -960,9 +972,9 @@
     }
   }
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
   event_logger.AddEvent("Inverse");
 
diff --git a/internal/ceres/dense_linear_solver_test.cc b/internal/ceres/dense_linear_solver_test.cc
index 48da2bd..f8b64f8 100644
--- a/internal/ceres/dense_linear_solver_test.cc
+++ b/internal/ceres/dense_linear_solver_test.cc
@@ -29,6 +29,7 @@
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
 #include "ceres/linear_solver.h"
@@ -73,6 +74,8 @@
   LinearSolver::Options options;
   options.type = ::testing::get<0>(param);
   options.dense_linear_algebra_library_type = ::testing::get<1>(param);
+  ContextImpl context;
+  options.context = &context;
   scoped_ptr<LinearSolver> solver(LinearSolver::Create(options));
 
   LinearSolver::PerSolveOptions per_solve_options;
diff --git a/internal/ceres/dynamic_sparse_normal_cholesky_solver_test.cc b/internal/ceres/dynamic_sparse_normal_cholesky_solver_test.cc
index 3fdf6a1..1e5d179 100644
--- a/internal/ceres/dynamic_sparse_normal_cholesky_solver_test.cc
+++ b/internal/ceres/dynamic_sparse_normal_cholesky_solver_test.cc
@@ -30,6 +30,7 @@
 
 #include "ceres/casts.h"
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
 #include "ceres/linear_solver.h"
@@ -97,6 +98,8 @@
     options.dynamic_sparsity = true;
     options.sparse_linear_algebra_library_type =
         sparse_linear_algebra_library_type;
+    ContextImpl context;
+    options.context = &context;
     TestSolver(options, NULL);
     TestSolver(options, D_.get());
   }
diff --git a/internal/ceres/evaluator.cc b/internal/ceres/evaluator.cc
index 6193ae8..8387983 100644
--- a/internal/ceres/evaluator.cc
+++ b/internal/ceres/evaluator.cc
@@ -51,6 +51,8 @@
 Evaluator* Evaluator::Create(const Evaluator::Options& options,
                              Program* program,
                              std::string* error) {
+  CHECK(options.context != NULL);
+
   switch (options.linear_solver_type) {
     case DENSE_QR:
     case DENSE_NORMAL_CHOLESKY:
diff --git a/internal/ceres/evaluator.h b/internal/ceres/evaluator.h
index 2c79b5f..cbb7aa2 100644
--- a/internal/ceres/evaluator.h
+++ b/internal/ceres/evaluator.h
@@ -36,6 +36,7 @@
 #include <string>
 #include <vector>
 
+#include "ceres/context_impl.h"
 #include "ceres/execution_summary.h"
 #include "ceres/internal/port.h"
 #include "ceres/types.h"
@@ -62,12 +63,14 @@
         : num_threads(1),
           num_eliminate_blocks(-1),
           linear_solver_type(DENSE_QR),
-          dynamic_sparsity(false) {}
+          dynamic_sparsity(false),
+          context(NULL) {}
 
     int num_threads;
     int num_eliminate_blocks;
     LinearSolverType linear_solver_type;
     bool dynamic_sparsity;
+    ContextImpl* context;
   };
 
   static Evaluator* Create(const Options& options,
diff --git a/internal/ceres/evaluator_test.cc b/internal/ceres/evaluator_test.cc
index 48dc78a..7cec00a 100644
--- a/internal/ceres/evaluator_test.cc
+++ b/internal/ceres/evaluator_test.cc
@@ -131,6 +131,7 @@
     options.linear_solver_type = GetParam().linear_solver_type;
     options.num_eliminate_blocks = GetParam().num_eliminate_blocks;
     options.dynamic_sparsity = GetParam().dynamic_sparsity;
+    options.context = problem.context();
     string error;
     return Evaluator::Create(options, program, &error);
   }
@@ -604,6 +605,7 @@
   Evaluator::Options options;
   options.linear_solver_type = DENSE_QR;
   options.num_eliminate_blocks = 0;
+  options.context = problem.context();
   string error;
   scoped_ptr<Evaluator> evaluator(Evaluator::Create(options, program, &error));
   scoped_ptr<SparseMatrix> jacobian(evaluator->CreateJacobian());
diff --git a/internal/ceres/gradient_checking_cost_function.cc b/internal/ceres/gradient_checking_cost_function.cc
index f2c7336..4632c1d 100644
--- a/internal/ceres/gradient_checking_cost_function.cc
+++ b/internal/ceres/gradient_checking_cost_function.cc
@@ -189,6 +189,7 @@
       DO_NOT_TAKE_OWNERSHIP;
   gradient_checking_problem_options.local_parameterization_ownership =
       DO_NOT_TAKE_OWNERSHIP;
+  gradient_checking_problem_options.context = problem_impl->context();
 
   NumericDiffOptions numeric_diff_options;
   numeric_diff_options.relative_step_size = relative_step_size;
diff --git a/internal/ceres/implicit_schur_complement_test.cc b/internal/ceres/implicit_schur_complement_test.cc
index 21401f7..4f54a1e 100644
--- a/internal/ceres/implicit_schur_complement_test.cc
+++ b/internal/ceres/implicit_schur_complement_test.cc
@@ -35,6 +35,7 @@
 #include "ceres/block_random_access_dense_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
@@ -85,6 +86,8 @@
     LinearSolver::Options options;
     options.elimination_groups.push_back(num_eliminate_blocks_);
     options.type = DENSE_SCHUR;
+    ContextImpl context;
+    options.context = &context;
 
     scoped_ptr<SchurEliminatorBase> eliminator(
         SchurEliminatorBase::Create(options));
@@ -124,6 +127,8 @@
     LinearSolver::Options options;
     options.elimination_groups.push_back(num_eliminate_blocks_);
     options.preconditioner_type = JACOBI;
+    ContextImpl context;
+    options.context = &context;
     ImplicitSchurComplement isc(options);
     isc.Init(*A_, D, b_.get());
 
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index 900e669..7a1e7a6 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -56,7 +56,8 @@
 
 IterativeSchurComplementSolver::IterativeSchurComplementSolver(
     const LinearSolver::Options& options)
-    : options_(options) {}
+    : options_(options) {
+}
 
 IterativeSchurComplementSolver::~IterativeSchurComplementSolver() {}
 
@@ -150,6 +151,8 @@
   preconditioner_options.e_block_size = options_.e_block_size;
   preconditioner_options.f_block_size = options_.f_block_size;
   preconditioner_options.elimination_groups = options_.elimination_groups;
+  CHECK(options_.context != NULL);
+  preconditioner_options.context = options_.context;
 
   switch (options_.preconditioner_type) {
     case JACOBI:
diff --git a/internal/ceres/iterative_schur_complement_solver_test.cc b/internal/ceres/iterative_schur_complement_solver_test.cc
index 41cce4d..f5a545c 100644
--- a/internal/ceres/iterative_schur_complement_solver_test.cc
+++ b/internal/ceres/iterative_schur_complement_solver_test.cc
@@ -39,6 +39,7 @@
 #include "ceres/block_random_access_dense_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
@@ -82,6 +83,8 @@
 
     LinearSolver::Options options;
     options.type = DENSE_QR;
+    ContextImpl context;
+    options.context = &context;
     scoped_ptr<LinearSolver> qr(LinearSolver::Create(options));
 
     LinearSolver::PerSolveOptions per_solve_options;
diff --git a/internal/ceres/line_search_preprocessor.cc b/internal/ceres/line_search_preprocessor.cc
index 831f5e8..7ddb280 100644
--- a/internal/ceres/line_search_preprocessor.cc
+++ b/internal/ceres/line_search_preprocessor.cc
@@ -32,6 +32,8 @@
 
 #include <numeric>
 #include <string>
+#include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/evaluator.h"
 #include "ceres/minimizer.h"
 #include "ceres/problem_impl.h"
@@ -57,6 +59,7 @@
   pp->evaluator_options.linear_solver_type = CGNR;
   pp->evaluator_options.num_eliminate_blocks = 0;
   pp->evaluator_options.num_threads = pp->options.num_threads;
+  pp->evaluator_options.context = pp->problem->context();
   pp->evaluator.reset(Evaluator::Create(pp->evaluator_options,
                                         pp->reduced_program.get(),
                                         &pp->error));
diff --git a/internal/ceres/line_search_preprocessor_test.cc b/internal/ceres/line_search_preprocessor_test.cc
index 0d69021..d0a866b 100644
--- a/internal/ceres/line_search_preprocessor_test.cc
+++ b/internal/ceres/line_search_preprocessor_test.cc
@@ -30,10 +30,10 @@
 
 #include <map>
 
+#include "ceres/line_search_preprocessor.h"
 #include "ceres/problem_impl.h"
 #include "ceres/sized_cost_function.h"
 #include "ceres/solver.h"
-#include "ceres/line_search_preprocessor.h"
 #include "gtest/gtest.h"
 
 namespace ceres {
@@ -99,6 +99,7 @@
   problem.AddParameterBlock(&x, 1);
   Solver::Options options;
   options.minimizer_type = LINE_SEARCH;
+
   LineSearchPreprocessor preprocessor;
   PreprocessedProblem pp;
   EXPECT_TRUE(preprocessor.Preprocess(options, &problem, &pp));
diff --git a/internal/ceres/linear_solver.cc b/internal/ceres/linear_solver.cc
index 81acc38..da14d3d 100644
--- a/internal/ceres/linear_solver.cc
+++ b/internal/ceres/linear_solver.cc
@@ -71,6 +71,8 @@
 }
 
 LinearSolver* LinearSolver::Create(const LinearSolver::Options& options) {
+  CHECK(options.context != NULL);
+
   switch (options.type) {
     case CGNR:
       return new CgnrSolver(options);
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 65cb242..3f58cfb 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -41,6 +41,7 @@
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/dense_sparse_matrix.h"
 #include "ceres/execution_summary.h"
 #include "ceres/triplet_sparse_matrix.h"
@@ -116,7 +117,8 @@
           residual_reset_period(10),
           row_block_size(Eigen::Dynamic),
           e_block_size(Eigen::Dynamic),
-          f_block_size(Eigen::Dynamic) {
+          f_block_size(Eigen::Dynamic),
+          context(NULL) {
     }
 
     LinearSolverType type;
@@ -175,6 +177,8 @@
     int row_block_size;
     int e_block_size;
     int f_block_size;
+
+    ContextImpl* context;
   };
 
   // Options for the Solve method.
diff --git a/internal/ceres/parallel_for.h b/internal/ceres/parallel_for.h
index 9222884..603c609 100644
--- a/internal/ceres/parallel_for.h
+++ b/internal/ceres/parallel_for.h
@@ -33,13 +33,18 @@
 
 #include <functional>
 
+#include "ceres/context_impl.h"
+
 namespace ceres {
 namespace internal {
 
 // Execute the function for every element in the range [start, end) with at most
 // num_threads. It will execute all the work on the calling thread if
 // num_threads is 1.
-void ParallelFor(int start, int end, int num_threads,
+void ParallelFor(ContextImpl* context,
+                 int start,
+                 int end,
+                 int num_threads,
                  const std::function<void(int)>& function);
 
 }  // namespace internal
diff --git a/internal/ceres/parallel_for_tbb.cc b/internal/ceres/parallel_for_tbb.cc
index 38a744f..82fbf10 100644
--- a/internal/ceres/parallel_for_tbb.cc
+++ b/internal/ceres/parallel_for_tbb.cc
@@ -43,9 +43,13 @@
 namespace ceres {
 namespace internal {
 
-void ParallelFor(int start, int end, int num_threads,
+void ParallelFor(ContextImpl* context,
+                 int start,
+                 int end,
+                 int num_threads,
                  const std::function<void(int)>& function) {
   CHECK_GT(num_threads, 0);
+  CHECK(context != NULL);
   if (end <= start) {
     return;
   }
diff --git a/internal/ceres/parallel_for_test.cc b/internal/ceres/parallel_for_test.cc
index eb10a3b..2f44460 100644
--- a/internal/ceres/parallel_for_test.cc
+++ b/internal/ceres/parallel_for_test.cc
@@ -31,12 +31,13 @@
 // This include must come before any #ifndef check on Ceres compile options.
 #include "ceres/internal/port.h"
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
 #include "ceres/parallel_for.h"
 
 #include <vector>
 
+#include "ceres/context_impl.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -48,6 +49,9 @@
 // Tests the parallel for loop computes the correct result for various number of
 // threads.
 TEST(ParallelFor, NumThreads) {
+  ContextImpl context;
+  context.EnsureMinimumThreads(/*num_threads=*/2);
+
   const int size = 16;
   std::vector<int> expected_results(size, 0);
   for (int i = 0; i < size; ++i) {
@@ -56,7 +60,7 @@
 
   for (int num_threads = 1; num_threads <= 8; ++num_threads) {
     std::vector<int> values(size, 0);
-    ParallelFor(0, size, num_threads,
+    ParallelFor(&context, 0, size, num_threads,
                 [&values](int i) { values[i] = std::sqrt(i); });
     EXPECT_THAT(values, ElementsAreArray(expected_results));
   }
@@ -64,11 +68,14 @@
 
 // Tests nested for loops do not result in a deadlock.
 TEST(ParallelFor, NestedParallelForDeadlock) {
+  ContextImpl context;
+  context.EnsureMinimumThreads(/*num_threads=*/2);
+
   // Increment each element in the 2D matrix.
   std::vector<std::vector<int>> x(3, {1, 2, 3});
-  ParallelFor(0, 3, 2, [&x](int i) {
+  ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
     std::vector<int>& y = x.at(i);
-    ParallelFor(0, 3, 2, [&y](int j) { ++y.at(j); });
+    ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
   });
 
   const std::vector<int> results = {2, 3, 4};
@@ -80,4 +87,4 @@
 }  // namespace internal
 }  // namespace ceres
 
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h
index d0b7a47..37b221e 100644
--- a/internal/ceres/preconditioner.h
+++ b/internal/ceres/preconditioner.h
@@ -34,6 +34,7 @@
 #include <vector>
 #include "ceres/casts.h"
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/linear_operator.h"
 #include "ceres/sparse_matrix.h"
 #include "ceres/types.h"
@@ -56,7 +57,8 @@
           num_threads(1),
           row_block_size(Eigen::Dynamic),
           e_block_size(Eigen::Dynamic),
-          f_block_size(Eigen::Dynamic) {
+          f_block_size(Eigen::Dynamic),
+          context(NULL) {
     }
 
     PreconditionerType type;
@@ -111,6 +113,8 @@
     int row_block_size;
     int e_block_size;
     int f_block_size;
+
+    ContextImpl* context;
   };
 
   // If the optimization problem is such that there are no remaining
diff --git a/internal/ceres/problem_impl.cc b/internal/ceres/problem_impl.cc
index ae77c0b..3ed0efd 100644
--- a/internal/ceres/problem_impl.cc
+++ b/internal/ceres/problem_impl.cc
@@ -41,6 +41,7 @@
 #include "ceres/casts.h"
 #include "ceres/compressed_row_jacobian_writer.h"
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/cost_function.h"
 #include "ceres/crs_matrix.h"
 #include "ceres/evaluator.h"
@@ -107,6 +108,18 @@
   }
 }
 
+void InitializeContext(Context* context,
+                       ContextImpl** context_impl,
+                       bool* context_impl_owned) {
+  if (context == NULL) {
+    *context_impl_owned = true;
+    *context_impl = new ContextImpl;
+  } else {
+    *context_impl_owned = false;
+    *context_impl = down_cast<ContextImpl*>(context);
+  }
+}
+
 }  // namespace
 
 ParameterBlock* ProblemImpl::InternalAddParameterBlock(double* values,
@@ -230,13 +243,17 @@
 }
 
 ProblemImpl::ProblemImpl()
-    : program_(new internal::Program) {
+    : options_(Problem::Options()),
+      program_(new internal::Program) {
   residual_parameters_.reserve(10);
+  InitializeContext(options_.context, &context_impl_, &context_impl_owned_);
 }
 
 ProblemImpl::ProblemImpl(const Problem::Options& options)
-    : options_(options), program_(new internal::Program) {
+    : options_(options),
+      program_(new internal::Program) {
   residual_parameters_.reserve(10);
+  InitializeContext(options_.context, &context_impl_, &context_impl_owned_);
 }
 
 ProblemImpl::~ProblemImpl() {
@@ -261,6 +278,10 @@
   // Delete the owned parameterizations.
   STLDeleteUniqueContainerPointers(local_parameterizations_to_delete_.begin(),
                                    local_parameterizations_to_delete_.end());
+
+  if (context_impl_owned_) {
+    delete context_impl_;
+  }
 }
 
 ResidualBlock* ProblemImpl::AddResidualBlock(
@@ -798,6 +819,10 @@
   evaluator_options.num_threads = evaluate_options.num_threads;
 #endif  // CERES_NO_THREADS
 
+  // The main thread also does work so we only need to launch num_threads - 1.
+  context_impl_->EnsureMinimumThreads(evaluator_options.num_threads - 1);
+  evaluator_options.context = context_impl_;
+
   scoped_ptr<Evaluator> evaluator(
       new ProgramEvaluator<ScratchEvaluatePreparer,
                            CompressedRowJacobianWriter>(evaluator_options,
diff --git a/internal/ceres/problem_impl.h b/internal/ceres/problem_impl.h
index 67d6f11..03e61d2 100644
--- a/internal/ceres/problem_impl.h
+++ b/internal/ceres/problem_impl.h
@@ -42,10 +42,11 @@
 #include <map>
 #include <vector>
 
+#include "ceres/collections_port.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/macros.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
-#include "ceres/collections_port.h"
 #include "ceres/problem.h"
 #include "ceres/types.h"
 
@@ -181,6 +182,8 @@
     return residual_block_set_;
   }
 
+  ContextImpl* context() { return context_impl_; }
+
  private:
   ParameterBlock* InternalAddParameterBlock(double* values, int size);
   void InternalRemoveResidualBlock(ResidualBlock* residual_block);
@@ -196,6 +199,9 @@
 
   const Problem::Options options_;
 
+  bool context_impl_owned_;
+  ContextImpl* context_impl_;
+
   // The mapping from user pointers to parameter blocks.
   std::map<double*, ParameterBlock*> parameter_block_map_;
 
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h
index 36700b9..ca60902 100644
--- a/internal/ceres/program_evaluator.h
+++ b/internal/ceres/program_evaluator.h
@@ -95,7 +95,7 @@
 #include "ceres/small_blas.h"
 #include "ceres/thread_token_provider.h"
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 #include <atomic>
 
 #include "ceres/parallel_for.h"
@@ -193,18 +193,19 @@
     for (int i = 0; i < num_residual_blocks; ++i) {
 #endif // CERES_NO_THREADS
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
     std::atomic_bool abort(false);
 
-    ParallelFor(0, num_residual_blocks, options_.num_threads, [&](int i) {
-#endif // CERES_USE_TBB
+    ParallelFor(options_.context, 0, num_residual_blocks, options_.num_threads,
+                [&](int i) {
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
       if (abort) {
-#ifndef CERES_USE_TBB
-        continue;
-#else
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
         return;
-#endif // !CERES_USE_TBB
+#else
+        continue;
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
       }
 
       const ScopedThreadToken scoped_thread_token(&thread_token_provider);
@@ -248,11 +249,11 @@
 #pragma omp flush(abort)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
-        continue;
-#else
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
         return;
-#endif // !CERES_USE_TBB
+#else
+        continue;
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
       }
 
       scratch->cost += block_cost;
@@ -285,9 +286,9 @@
         }
       }
     }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
     );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
     if (!abort) {
       const int num_parameters = program_->NumEffectiveParameters();
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h
index 74665f0..e83a715 100644
--- a/internal/ceres/schur_complement_solver.h
+++ b/internal/ceres/schur_complement_solver.h
@@ -113,6 +113,7 @@
       : options_(options) {
     CHECK_GT(options.elimination_groups.size(), 1);
     CHECK_GT(options.elimination_groups[0], 0);
+    CHECK(options.context != NULL);
   }
 
   // LinearSolver methods
diff --git a/internal/ceres/schur_complement_solver_test.cc b/internal/ceres/schur_complement_solver_test.cc
index 8228efb..3f70300 100644
--- a/internal/ceres/schur_complement_solver_test.cc
+++ b/internal/ceres/schur_complement_solver_test.cc
@@ -35,6 +35,7 @@
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/detect_structure.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
@@ -68,6 +69,8 @@
 
     LinearSolver::Options options;
     options.type = DENSE_QR;
+    ContextImpl context;
+    options.context = &context;
 
     scoped_ptr<LinearSolver> qr(LinearSolver::Create(options));
 
@@ -104,6 +107,8 @@
     options.sparse_linear_algebra_library_type =
         sparse_linear_algebra_library_type;
     options.use_postordering = use_postordering;
+    ContextImpl context;
+    options.context = &context;
     DetectStructure(*A->block_structure(),
                     num_eliminate_blocks,
                     &options.row_block_size,
diff --git a/internal/ceres/schur_eliminator.h b/internal/ceres/schur_eliminator.h
index 667f384..07a7f0c 100644
--- a/internal/ceres/schur_eliminator.h
+++ b/internal/ceres/schur_eliminator.h
@@ -33,13 +33,13 @@
 
 #include <map>
 #include <vector>
-#include "ceres/mutex.h"
 #include "ceres/block_random_access_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
-#include "ceres/linear_solver.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/linear_solver.h"
+#include "ceres/mutex.h"
 
 namespace ceres {
 namespace internal {
@@ -226,7 +226,8 @@
 class SchurEliminator : public SchurEliminatorBase {
  public:
   explicit SchurEliminator(const LinearSolver::Options& options)
-      : num_threads_(options.num_threads) {
+      : num_threads_(options.num_threads),
+        context_(CHECK_NOTNULL(options.context)) {
   }
 
   // SchurEliminatorBase Interface
@@ -318,6 +319,7 @@
                                BlockRandomAccessMatrix* lhs);
 
   int num_threads_;
+  ContextImpl* context_;
   int num_eliminate_blocks_;
   bool assume_full_rank_ete_;
 
diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h
index 1776987..ca8f30e 100644
--- a/internal/ceres/schur_eliminator_impl.h
+++ b/internal/ceres/schur_eliminator_impl.h
@@ -66,7 +66,7 @@
 #include "Eigen/Dense"
 #include "glog/logging.h"
 
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 #include "ceres/parallel_for.h"
 #endif
 
@@ -194,12 +194,12 @@
 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
     for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) {
 #else
-    ParallelFor(num_eliminate_blocks_, num_col_blocks, num_threads_,
+    ParallelFor(context_, num_eliminate_blocks_, num_col_blocks, num_threads_,
                 [&](int i) {
-#endif // !CERES_USE_TBB
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
       const int block_id = i - num_eliminate_blocks_;
       int r, c, row_stride, col_stride;
@@ -217,9 +217,9 @@
             += diag.array().square().matrix();
       }
     }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
     );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   }
 
   ThreadTokenProvider thread_token_provider(num_threads_);
@@ -241,11 +241,11 @@
 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
   for (int i = 0; i < chunks_.size(); ++i) {
 #else
-  ParallelFor(0, int(chunks_.size()), num_threads_, [&](int i) {
-#endif // !CERES_USE_TBB
+  ParallelFor(context_, 0, int(chunks_.size()), num_threads_, [&](int i) {
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
     const ScopedThreadToken scoped_thread_token(&thread_token_provider);
     const int thread_id = scoped_thread_token.token();
@@ -314,9 +314,9 @@
     ChunkOuterProduct(
         thread_id, bs, inverse_ete, buffer, chunk.buffer_layout, lhs);
   }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 
   // For rows with no e_blocks, the schur complement update reduces to
   // S += F'F.
@@ -337,11 +337,11 @@
 #pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
 #endif // CERES_USE_OPENMP
 
-#ifndef CERES_USE_TBB
+#if !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
   for (int i = 0; i < chunks_.size(); ++i) {
 #else
-  ParallelFor(0, int(chunks_.size()), num_threads_, [&](int i) {
-#endif // !CERES_USE_TBB
+  ParallelFor(context_, 0, int(chunks_.size()), num_threads_, [&](int i) {
+#endif // !(defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS))
 
     const Chunk& chunk = chunks_[i];
     const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
@@ -398,9 +398,9 @@
     y_block = InvertPSDMatrix<kEBlockSize>(assume_full_rank_ete_, ete)
         * y_block;
   }
-#ifdef CERES_USE_TBB
+#if defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
   );
-#endif // CERES_USE_TBB
+#endif // defined(CERES_USE_TBB) || defined(CERES_USE_CXX11_THREADS)
 }
 
 // Update the rhs of the reduced linear system. Compute
diff --git a/internal/ceres/schur_eliminator_test.cc b/internal/ceres/schur_eliminator_test.cc
index f07d102..9826d87 100644
--- a/internal/ceres/schur_eliminator_test.cc
+++ b/internal/ceres/schur_eliminator_test.cc
@@ -34,6 +34,7 @@
 #include "ceres/block_random_access_dense_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/detect_structure.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
@@ -142,6 +143,8 @@
     Vector rhs(schur_size);
 
     LinearSolver::Options options;
+    ContextImpl context;
+    options.context = &context;
     options.elimination_groups.push_back(num_eliminate_blocks);
     if (use_static_structure) {
       DetectStructure(*bs,
diff --git a/internal/ceres/schur_jacobi_preconditioner.cc b/internal/ceres/schur_jacobi_preconditioner.cc
index c3f43a9..d7f6fe9 100644
--- a/internal/ceres/schur_jacobi_preconditioner.cc
+++ b/internal/ceres/schur_jacobi_preconditioner.cc
@@ -53,6 +53,7 @@
   CHECK_GT(num_blocks, 0)
       << "Jacobian should have atleast 1 f_block for "
       << "SCHUR_JACOBI preconditioner.";
+  CHECK(options_.context != NULL);
 
   std::vector<int> blocks(num_blocks);
   for (int i = 0; i < num_blocks; ++i) {
@@ -75,6 +76,7 @@
   eliminator_options.e_block_size = options_.e_block_size;
   eliminator_options.f_block_size = options_.f_block_size;
   eliminator_options.row_block_size = options_.row_block_size;
+  eliminator_options.context = options_.context;
   eliminator_.reset(SchurEliminatorBase::Create(eliminator_options));
   const bool kFullRankETE = true;
   eliminator_->Init(
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index fcf8175..5f047cb 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -32,8 +32,11 @@
 #include "ceres/solver.h"
 
 #include <algorithm>
-#include <sstream>   // NOLINT
+#include <sstream>  // NOLINT
 #include <vector>
+#include "ceres/casts.h"
+#include "ceres/context.h"
+#include "ceres/context_impl.h"
 #include "ceres/detect_structure.h"
 #include "ceres/gradient_checking_cost_function.h"
 #include "ceres/internal/port.h"
@@ -524,6 +527,10 @@
   Program* program = problem_impl->mutable_program();
   PreSolveSummarize(options, problem_impl, summary);
 
+  // The main thread also does work so we only need to launch num_threads - 1.
+  problem_impl->context()->EnsureMinimumThreads(
+      std::max(options.num_threads, options.num_linear_solver_threads) - 1);
+
   // Make sure that all the parameter blocks states are set to the
   // values provided by the user.
   program->SetParameterBlockStatePtrsToUserStatePtrs();
diff --git a/internal/ceres/sparse_normal_cholesky_solver_test.cc b/internal/ceres/sparse_normal_cholesky_solver_test.cc
index ec59897..f991298 100644
--- a/internal/ceres/sparse_normal_cholesky_solver_test.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver_test.cc
@@ -30,6 +30,7 @@
 
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
 #include "ceres/linear_solver.h"
@@ -112,6 +113,8 @@
   options.sparse_linear_algebra_library_type = SUITE_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = false;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 
@@ -121,6 +124,8 @@
   options.sparse_linear_algebra_library_type = SUITE_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = true;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 #endif
@@ -132,6 +137,8 @@
   options.sparse_linear_algebra_library_type = CX_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = false;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 
@@ -141,6 +148,8 @@
   options.sparse_linear_algebra_library_type = CX_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = true;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 #endif
@@ -152,6 +161,8 @@
   options.sparse_linear_algebra_library_type = EIGEN_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = false;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 
@@ -161,6 +172,8 @@
   options.sparse_linear_algebra_library_type = EIGEN_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = true;
+  ContextImpl context;
+  options.context = &context;
   TestSolver(options);
 }
 #endif  // CERES_USE_EIGEN_SPARSE
diff --git a/internal/ceres/trust_region_preprocessor.cc b/internal/ceres/trust_region_preprocessor.cc
index d3f912c..803baef 100644
--- a/internal/ceres/trust_region_preprocessor.cc
+++ b/internal/ceres/trust_region_preprocessor.cc
@@ -33,6 +33,7 @@
 #include <numeric>
 #include <string>
 #include "ceres/callbacks.h"
+#include "ceres/context_impl.h"
 #include "ceres/evaluator.h"
 #include "ceres/linear_solver.h"
 #include "ceres/minimizer.h"
@@ -110,7 +111,7 @@
 // For Schur type and SPARSE_NORMAL_CHOLESKY linear solvers, reorder
 // the program to reduce fill-in and increase cache coherency.
 bool ReorderProgram(PreprocessedProblem* pp) {
-  Solver::Options& options = pp->options;
+  const Solver::Options& options = pp->options;
   if (IsSchurType(options.linear_solver_type)) {
     return ReorderProgramForSchurTypeLinearSolver(
         options.linear_solver_type,
@@ -194,6 +195,7 @@
   pp->linear_solver_options.dynamic_sparsity = options.dynamic_sparsity;
   pp->linear_solver_options.num_threads = options.num_linear_solver_threads;
   pp->linear_solver_options.use_postordering = options.use_postordering;
+  pp->linear_solver_options.context = pp->problem->context();
 
   if (IsSchurType(pp->linear_solver_options.type)) {
     OrderingToGroupSizes(options.linear_solver_ordering.get(),
@@ -249,6 +251,7 @@
 
   pp->evaluator_options.num_threads = options.num_threads;
   pp->evaluator_options.dynamic_sparsity = options.dynamic_sparsity;
+  pp->evaluator_options.context = pp->problem->context();
   pp->evaluator.reset(Evaluator::Create(pp->evaluator_options,
                                         pp->reduced_program.get(),
                                         &pp->error));
@@ -296,7 +299,8 @@
         CoordinateDescentMinimizer::CreateOrdering(*pp->reduced_program));
   }
 
-  pp->inner_iteration_minimizer.reset(new CoordinateDescentMinimizer);
+  pp->inner_iteration_minimizer.reset(
+      new CoordinateDescentMinimizer(pp->problem->context()));
   return pp->inner_iteration_minimizer->Init(*pp->reduced_program,
                                              pp->problem->parameter_map(),
                                              *options.inner_iteration_ordering,
diff --git a/internal/ceres/visibility_based_preconditioner.cc b/internal/ceres/visibility_based_preconditioner.cc
index 90550b2..24563ae 100644
--- a/internal/ceres/visibility_based_preconditioner.cc
+++ b/internal/ceres/visibility_based_preconditioner.cc
@@ -80,6 +80,7 @@
   num_blocks_ = bs.cols.size() - options_.elimination_groups[0];
   CHECK_GT(num_blocks_, 0) << "Jacobian should have atleast 1 f_block for "
                            << "visibility based preconditioning.";
+  CHECK(options_.context != NULL);
 
   // Vector of camera block sizes
   block_size_.resize(num_blocks_);
@@ -313,6 +314,7 @@
   eliminator_options.e_block_size = options_.e_block_size;
   eliminator_options.f_block_size = options_.f_block_size;
   eliminator_options.row_block_size = options_.row_block_size;
+  eliminator_options.context = options_.context;
   eliminator_.reset(SchurEliminatorBase::Create(eliminator_options));
   const bool kFullRankETE = true;
   eliminator_->Init(
diff --git a/jni/Android.mk b/jni/Android.mk
index f959ea8..e8ea065 100644
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -138,6 +138,8 @@
                    $(CERES_SRC_PATH)/compressed_row_sparse_matrix.cc \
                    $(CERES_SRC_PATH)/conditioned_cost_function.cc \
                    $(CERES_SRC_PATH)/conjugate_gradients_solver.cc \
+                   $(CERES_SRC_PATH)/context.cc \
+                   $(CERES_SRC_PATH)/context_impl.cc \
                    $(CERES_SRC_PATH)/coordinate_descent_minimizer.cc \
                    $(CERES_SRC_PATH)/corrector.cc \
                    $(CERES_SRC_PATH)/covariance.cc \