Add CUDA GPU and Runtime Detection * Added GPU device and CUDA compute capability identification. * Added GpuMemoryAvailable() to aid downstream optimizations based on GPU memory availability. Change-Id: I326dc1e4b7a6a7f5571b7e5479eb9aa300ad1075
diff --git a/internal/ceres/cgnr_solver.cc b/internal/ceres/cgnr_solver.cc index 64e018d..5374f51 100644 --- a/internal/ceres/cgnr_solver.cc +++ b/internal/ceres/cgnr_solver.cc
@@ -253,7 +253,7 @@ ". "; return nullptr; } - CHECK(options.context->IsCUDAInitialized()) + CHECK(options.context->IsCudaInitialized()) << "CudaCgnrSolver requires CUDA initialization."; auto solver = std::make_unique<CudaCgnrSolver>(options); return solver;
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc index 9f6cc25..a46e760 100644 --- a/internal/ceres/context_impl.cc +++ b/internal/ceres/context_impl.cc
@@ -33,6 +33,7 @@ #include <string> #include "ceres/internal/config.h" +#include "ceres/stringprintf.h" #include "ceres/wall_time.h" #ifndef CERES_NO_CUDA @@ -66,32 +67,84 @@ is_cuda_initialized_ = false; } -bool ContextImpl::InitCUDA(std::string* message) { +std::string ContextImpl::CudaConfigAsString() const { + return ceres::internal::StringPrintf( + "======================= CUDA Device Properties ======================\n" + "Cuda version : %d.%d\n" + "Device ID : %d\n" + "Device name : %s\n" + "Total GPU memory : %6.f MiB\n" + "GPU memory available : %6.f MiB\n" + "Compute capability : %d.%d\n" + "Warp size : %d\n" + "Max threads per block: %d\n" + "Max threads per dim : %d %d %d\n" + "Max grid size : %d %d %d\n" + "Multiprocessor count : %d\n" + "====================================================================", + cuda_version_major_, + cuda_version_minor_, + gpu_device_id_in_use_, + gpu_device_properties_.name, + gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0, + GpuMemoryAvailable() / 1024.0 / 1024.0, + gpu_device_properties_.major, + gpu_device_properties_.minor, + gpu_device_properties_.warpSize, + gpu_device_properties_.maxThreadsPerBlock, + gpu_device_properties_.maxThreadsDim[0], + gpu_device_properties_.maxThreadsDim[1], + gpu_device_properties_.maxThreadsDim[2], + gpu_device_properties_.maxGridSize[0], + gpu_device_properties_.maxGridSize[1], + gpu_device_properties_.maxGridSize[2], + gpu_device_properties_.multiProcessorCount); +} + +size_t ContextImpl::GpuMemoryAvailable() const { + size_t free, total; + cudaMemGetInfo(&free, &total); + return free; +} + +bool ContextImpl::InitCuda(std::string* message) { if (is_cuda_initialized_) { return true; } + CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess); + int cuda_version; + CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess); + cuda_version_major_ = cuda_version / 1000; + cuda_version_minor_ = (cuda_version % 1000) / 10; + CHECK_EQ(cudaGetDeviceProperties(&gpu_device_properties_, + gpu_device_id_in_use_), cudaSuccess); + VLOG(3) << "\n" << CudaConfigAsString(); EventLogger event_logger("InitCuda"); if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { - *message = "cuBLAS::cublasCreate failed."; + *message = "CUDA initialization failed because " + "cuBLAS::cublasCreate failed."; cublas_handle_ = nullptr; return false; } event_logger.AddEvent("cublasCreate"); if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) { - *message = "cuSolverDN::cusolverDnCreate failed."; + *message = "CUDA initialization failed because " + "cuSolverDN::cusolverDnCreate failed."; TearDown(); return false; } event_logger.AddEvent("cusolverDnCreate"); if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) { - *message = "cuSPARSE::cusparseCreate failed."; + *message = "CUDA initialization failed because " + "cuSPARSE::cusparseCreate failed."; TearDown(); return false; } event_logger.AddEvent("cusparseCreate"); if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) != cudaSuccess) { - *message = "CUDA::cudaStreamCreateWithFlags failed."; + *message = "CUDA initialization failed because " + "CUDA::cudaStreamCreateWithFlags failed."; TearDown(); return false; } @@ -100,7 +153,7 @@ CUSOLVER_STATUS_SUCCESS || cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS || cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) { - *message = "CUDA [Solver|BLAS|Sparse] SetStream failed."; + *message = "CUDA initialization failed because SetStream failed."; TearDown(); return false; }
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h index 4bd18e1..9eb59eb 100644 --- a/internal/ceres/context_impl.h +++ b/internal/ceres/context_impl.h
@@ -72,19 +72,39 @@ #endif // CERES_USE_CXX_THREADS #ifndef CERES_NO_CUDA - // Initializes the cuSolverDN context, creates an asynchronous stream, and - // associates the stream with cuSolverDN. Returns true iff initialization was - // successful, else it returns false and a human-readable error message is - // returned. - bool InitCUDA(std::string* message); + // Note on Ceres' use of CUDA Devices on multi-GPU systems: + // 1. On a multi-GPU system, if nothing special is done, the "default" CUDA + // device will be used, which is device 0. + // 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment + // variable, Ceres will still use device 0 visible to the program, but + // device 0 will be the first GPU indicated in the environment variable. + // 3. If the user explicitly selects a GPU in the host process before calling + // Ceres, Ceres will use that GPU. + + // Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an + // asynchronous CUDA stream, and associates the stream with the contexts. + // Returns true iff initialization was successful, else it returns false and a + // human-readable error message is returned. + bool InitCuda(std::string* message); void TearDown(); - inline bool IsCUDAInitialized() const { return is_cuda_initialized_; } + inline bool IsCudaInitialized() const { return is_cuda_initialized_; } + // Returns a human-readable string describing the capabilities of the current + // CUDA device. CudaConfigAsString can only be called after InitCuda has been + // called. + std::string CudaConfigAsString() const; + // Returns the number of bytes of available global memory on the current CUDA + // device. If it is called before InitCuda, it returns 0. + size_t GpuMemoryAvailable() const; cusolverDnHandle_t cusolver_handle_ = nullptr; cublasHandle_t cublas_handle_ = nullptr; cudaStream_t stream_ = nullptr; cusparseHandle_t cusparse_handle_ = nullptr; bool is_cuda_initialized_ = false; + int gpu_device_id_in_use_ = -1; + cudaDeviceProp gpu_device_properties_; + int cuda_version_major_ = 0; + int cuda_version_minor_ = 0; #endif // CERES_NO_CUDA };
diff --git a/internal/ceres/cuda_dense_cholesky_test.cc b/internal/ceres/cuda_dense_cholesky_test.cc index 1483923..4c5742e 100644 --- a/internal/ceres/cuda_dense_cholesky_test.cc +++ b/internal/ceres/cuda_dense_cholesky_test.cc
@@ -45,7 +45,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; auto dense_cuda_solver = CUDADenseCholesky::Create(options); EXPECT_EQ(dense_cuda_solver, nullptr); } @@ -65,7 +65,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseCholesky::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -94,7 +94,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseCholesky::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -116,7 +116,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseCholesky::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -131,7 +131,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseCholesky::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -150,7 +150,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = ceres::CUDA; std::unique_ptr<DenseCholesky> dense_cholesky = CUDADenseCholesky::Create(options); @@ -189,7 +189,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; auto solver = CUDADenseCholeskyMixedPrecision::Create(options); ASSERT_EQ(solver, nullptr); } @@ -199,7 +199,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = ceres::CUDA; auto solver = CUDADenseCholeskyMixedPrecision::Create(options); ASSERT_EQ(solver, nullptr); @@ -224,7 +224,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; options.use_mixed_precision_solves = true; auto solver = CUDADenseCholeskyMixedPrecision::Create(options); @@ -261,7 +261,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; options.use_mixed_precision_solves = true; auto solver = CUDADenseCholeskyMixedPrecision::Create(options); @@ -291,7 +291,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = ceres::CUDA; options.use_mixed_precision_solves = true; options.max_num_refinement_iterations = 20;
diff --git a/internal/ceres/cuda_dense_qr_test.cc b/internal/ceres/cuda_dense_qr_test.cc index cc0dbd6..798f12a 100644 --- a/internal/ceres/cuda_dense_qr_test.cc +++ b/internal/ceres/cuda_dense_qr_test.cc
@@ -44,7 +44,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; auto dense_cuda_solver = CUDADenseQR::Create(options); EXPECT_EQ(dense_cuda_solver, nullptr); } @@ -63,7 +63,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseQR::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -97,7 +97,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseQR::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -121,7 +121,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = CUDA; auto dense_cuda_solver = CUDADenseQR::Create(options); ASSERT_NE(dense_cuda_solver, nullptr); @@ -141,7 +141,7 @@ ContextImpl context; options.context = &context; std::string error; - EXPECT_TRUE(context.InitCUDA(&error)) << error; + EXPECT_TRUE(context.InitCuda(&error)) << error; options.dense_linear_algebra_library_type = ceres::CUDA; std::unique_ptr<DenseQR> dense_qr = CUDADenseQR::Create(options);
diff --git a/internal/ceres/cuda_sparse_matrix.cc b/internal/ceres/cuda_sparse_matrix.cc index e366112..1d8d0b0 100644 --- a/internal/ceres/cuda_sparse_matrix.cc +++ b/internal/ceres/cuda_sparse_matrix.cc
@@ -61,7 +61,7 @@ CudaSparseMatrix::CudaSparseMatrix( ContextImpl* context, const CompressedRowSparseMatrix& crs_matrix) { DCHECK_NE(context, nullptr); - CHECK(context->IsCUDAInitialized()); + CHECK(context->IsCudaInitialized()); context_ = context; num_rows_ = crs_matrix.num_rows(); num_cols_ = crs_matrix.num_cols();
diff --git a/internal/ceres/cuda_sparse_matrix_test.cc b/internal/ceres/cuda_sparse_matrix_test.cc index ae76b8f..a12ad40 100644 --- a/internal/ceres/cuda_sparse_matrix_test.cc +++ b/internal/ceres/cuda_sparse_matrix_test.cc
@@ -51,8 +51,8 @@ protected: void SetUp() final { std::string message; - CHECK(context_.InitCUDA(&message)) - << "InitCUDA() failed because: " << message; + CHECK(context_.InitCuda(&message)) + << "InitCuda() failed because: " << message; std::unique_ptr<LinearLeastSquaresProblem> problem = CreateLinearLeastSquaresProblemFromId(2); CHECK(problem != nullptr); @@ -121,7 +121,7 @@ ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1); CudaSparseMatrix A_gpu(&context, *A1_crs); CudaVector b_gpu(&context, A1.num_cols()); @@ -159,7 +159,7 @@ ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); CudaSparseMatrix A_gpu(&context, *A_crs); CudaVector b_gpu(&context, A.num_cols()); @@ -189,7 +189,7 @@ ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); CudaSparseMatrix A_gpu(&context, *A_crs); CudaVector b_gpu(&context, A.num_rows()); @@ -244,7 +244,7 @@ ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); CudaSparseMatrix A_gpu(&context, *A_crs); CudaVector b_gpu(&context, N);
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc index 7bac13a..46e6cb2 100644 --- a/internal/ceres/cuda_vector.cc +++ b/internal/ceres/cuda_vector.cc
@@ -52,7 +52,7 @@ CudaVector::CudaVector(ContextImpl* context, int size) { DCHECK_NE(context, nullptr); - CHECK(context->IsCUDAInitialized()); + CHECK(context->IsCudaInitialized()); context_ = context; Resize(size); }
diff --git a/internal/ceres/cuda_vector_test.cc b/internal/ceres/cuda_vector_test.cc index 84193c0..db1fec5 100644 --- a/internal/ceres/cuda_vector_test.cc +++ b/internal/ceres/cuda_vector_test.cc
@@ -45,7 +45,7 @@ TEST(CudaVector, Creation) { ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x(&context, 1000); EXPECT_EQ(x.num_rows(), 1000); EXPECT_NE(x.data().data(), nullptr); @@ -56,7 +56,7 @@ x << 1, 2, 3; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector y(&context, 10); y.CopyFromCpu(x); EXPECT_EQ(y.num_rows(), 3); @@ -72,7 +72,7 @@ x << 1, 2, 3; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 3); x_gpu.CopyFromCpu(x); @@ -94,7 +94,7 @@ y << 100, 10, 1; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); CudaVector y_gpu(&context, 10); x_gpu.CopyFromCpu(x); @@ -109,7 +109,7 @@ x << 1, 2, 3; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); x_gpu.CopyFromCpu(x); @@ -127,7 +127,7 @@ x << 1, 1, 1, 1; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); x_gpu.CopyFromCpu(x); @@ -145,7 +145,7 @@ TEST(CudaVector, Resize) { ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); EXPECT_EQ(x_gpu.num_rows(), 10); x_gpu.Resize(4); @@ -159,7 +159,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -180,7 +180,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -201,7 +201,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -222,7 +222,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -241,7 +241,7 @@ x << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -262,7 +262,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); CudaVector z_gpu(&context, 4); @@ -284,7 +284,7 @@ x << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector z_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -305,7 +305,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); CudaVector y_gpu(&context, 10); x_gpu.CopyFromCpu(x); @@ -326,7 +326,7 @@ y << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); x_gpu.CopyFromCpu(x); @@ -345,7 +345,7 @@ x << 100, 10, 1, 0; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 10); x_gpu.CopyFromCpu(x); @@ -366,7 +366,7 @@ D << 4, 3, 2, 1; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); CudaVector y_gpu(&context, 4); CudaVector D_gpu(&context, 4); @@ -387,7 +387,7 @@ x << 1, 2, 3, 4; ContextImpl context; std::string message; - CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; CudaVector x_gpu(&context, 4); x_gpu.CopyFromCpu(x);
diff --git a/internal/ceres/dense_cholesky.cc b/internal/ceres/dense_cholesky.cc index a3e578f..6d328ab 100644 --- a/internal/ceres/dense_cholesky.cc +++ b/internal/ceres/dense_cholesky.cc
@@ -348,7 +348,7 @@ #ifndef CERES_NO_CUDA bool CUDADenseCholesky::Init(ContextImpl* context, std::string* message) { - CHECK(context->IsCUDAInitialized()) + CHECK(context->IsCudaInitialized()) << "CUDADenseCholesky requires CUDA initialization."; cusolver_handle_ = context->cusolver_handle_; stream_ = context->stream_; @@ -490,7 +490,7 @@ bool CUDADenseCholeskyMixedPrecision::Init(const LinearSolver::Options& options, std::string* message) { - CHECK(options.context->IsCUDAInitialized()) + CHECK(options.context->IsCudaInitialized()) << "CUDADenseCholeskyMixedPrecision requires CUDA initialization."; cusolver_handle_ = options.context->cusolver_handle_; cublas_handle_ = options.context->cublas_handle_;
diff --git a/internal/ceres/dense_cholesky_test.cc b/internal/ceres/dense_cholesky_test.cc index 2c4ca10..bb8a467 100644 --- a/internal/ceres/dense_cholesky_test.cc +++ b/internal/ceres/dense_cholesky_test.cc
@@ -78,7 +78,7 @@ #ifndef CERES_NO_CUDA options.context = &context; std::string error; - CHECK(context.InitCUDA(&error)) << error; + CHECK(context.InitCuda(&error)) << error; #endif // CERES_NO_CUDA options.dense_linear_algebra_library_type = ::testing::get<0>(GetParam()); options.use_mixed_precision_solves = ::testing::get<1>(GetParam());
diff --git a/internal/ceres/dense_qr.cc b/internal/ceres/dense_qr.cc index fb3c228..22727f8 100644 --- a/internal/ceres/dense_qr.cc +++ b/internal/ceres/dense_qr.cc
@@ -312,7 +312,7 @@ #ifndef CERES_NO_CUDA bool CUDADenseQR::Init(ContextImpl* context, std::string* message) { - if (!context->InitCUDA(message)) { + if (!context->InitCuda(message)) { return false; } cublas_handle_ = context->cublas_handle_;
diff --git a/internal/ceres/dense_qr_test.cc b/internal/ceres/dense_qr_test.cc index c10dba5..8698c90 100644 --- a/internal/ceres/dense_qr_test.cc +++ b/internal/ceres/dense_qr_test.cc
@@ -70,7 +70,7 @@ #ifndef CERES_NO_CUDA options.context = &context; std::string error; - CHECK(context.InitCUDA(&error)) << error; + CHECK(context.InitCuda(&error)) << error; #endif // CERES_NO_CUDA options.dense_linear_algebra_library_type = GetParam(); const double kEpsilon = std::numeric_limits<double>::epsilon() * 1.5e4;
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc index ea18913..0656d34 100644 --- a/internal/ceres/solver.cc +++ b/internal/ceres/solver.cc
@@ -730,7 +730,7 @@ #ifndef CERES_NO_CUDA if (IsCudaRequired(options)) { - if (!problem_impl->context()->InitCUDA(&summary->message)) { + if (!problem_impl->context()->InitCuda(&summary->message)) { LOG(ERROR) << "Terminating: " << summary->message; return; }
diff --git a/internal/ceres/sparse_linear_operator_benchmark.cc b/internal/ceres/sparse_linear_operator_benchmark.cc index a08911e..4969c0c 100644 --- a/internal/ceres/sparse_linear_operator_benchmark.cc +++ b/internal/ceres/sparse_linear_operator_benchmark.cc
@@ -170,7 +170,7 @@ FLAGS_num_residuals_per_camera); ContextImpl context; std::string message; - context.InitCUDA(&message); + context.InitCuda(&message); CompressedRowSparseMatrix jacobian_crs( jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); @@ -205,7 +205,7 @@ FLAGS_num_residuals_per_camera); ContextImpl context; std::string message; - context.InitCUDA(&message); + context.InitCuda(&message); CompressedRowSparseMatrix jacobian_crs( jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);