Add CUDA GPU and Runtime Detection
* Added GPU device and CUDA compute capability identification.
* Added GpuMemoryAvailable() to aid downstream optimizations based
on GPU memory availability.
Change-Id: I326dc1e4b7a6a7f5571b7e5479eb9aa300ad1075
diff --git a/internal/ceres/cgnr_solver.cc b/internal/ceres/cgnr_solver.cc
index 64e018d..5374f51 100644
--- a/internal/ceres/cgnr_solver.cc
+++ b/internal/ceres/cgnr_solver.cc
@@ -253,7 +253,7 @@
". ";
return nullptr;
}
- CHECK(options.context->IsCUDAInitialized())
+ CHECK(options.context->IsCudaInitialized())
<< "CudaCgnrSolver requires CUDA initialization.";
auto solver = std::make_unique<CudaCgnrSolver>(options);
return solver;
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc
index 9f6cc25..a46e760 100644
--- a/internal/ceres/context_impl.cc
+++ b/internal/ceres/context_impl.cc
@@ -33,6 +33,7 @@
#include <string>
#include "ceres/internal/config.h"
+#include "ceres/stringprintf.h"
#include "ceres/wall_time.h"
#ifndef CERES_NO_CUDA
@@ -66,32 +67,84 @@
is_cuda_initialized_ = false;
}
-bool ContextImpl::InitCUDA(std::string* message) {
+std::string ContextImpl::CudaConfigAsString() const {
+ return ceres::internal::StringPrintf(
+ "======================= CUDA Device Properties ======================\n"
+ "Cuda version : %d.%d\n"
+ "Device ID : %d\n"
+ "Device name : %s\n"
+ "Total GPU memory : %6.f MiB\n"
+ "GPU memory available : %6.f MiB\n"
+ "Compute capability : %d.%d\n"
+ "Warp size : %d\n"
+ "Max threads per block: %d\n"
+ "Max threads per dim : %d %d %d\n"
+ "Max grid size : %d %d %d\n"
+ "Multiprocessor count : %d\n"
+ "====================================================================",
+ cuda_version_major_,
+ cuda_version_minor_,
+ gpu_device_id_in_use_,
+ gpu_device_properties_.name,
+ gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
+ GpuMemoryAvailable() / 1024.0 / 1024.0,
+ gpu_device_properties_.major,
+ gpu_device_properties_.minor,
+ gpu_device_properties_.warpSize,
+ gpu_device_properties_.maxThreadsPerBlock,
+ gpu_device_properties_.maxThreadsDim[0],
+ gpu_device_properties_.maxThreadsDim[1],
+ gpu_device_properties_.maxThreadsDim[2],
+ gpu_device_properties_.maxGridSize[0],
+ gpu_device_properties_.maxGridSize[1],
+ gpu_device_properties_.maxGridSize[2],
+ gpu_device_properties_.multiProcessorCount);
+}
+
+size_t ContextImpl::GpuMemoryAvailable() const {
+ size_t free, total;
+ cudaMemGetInfo(&free, &total);
+ return free;
+}
+
+bool ContextImpl::InitCuda(std::string* message) {
if (is_cuda_initialized_) {
return true;
}
+ CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
+ int cuda_version;
+ CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
+ cuda_version_major_ = cuda_version / 1000;
+ cuda_version_minor_ = (cuda_version % 1000) / 10;
+ CHECK_EQ(cudaGetDeviceProperties(&gpu_device_properties_,
+ gpu_device_id_in_use_), cudaSuccess);
+ VLOG(3) << "\n" << CudaConfigAsString();
EventLogger event_logger("InitCuda");
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
- *message = "cuBLAS::cublasCreate failed.";
+ *message = "CUDA initialization failed because "
+ "cuBLAS::cublasCreate failed.";
cublas_handle_ = nullptr;
return false;
}
event_logger.AddEvent("cublasCreate");
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnCreate failed.";
+ *message = "CUDA initialization failed because "
+ "cuSolverDN::cusolverDnCreate failed.";
TearDown();
return false;
}
event_logger.AddEvent("cusolverDnCreate");
if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
- *message = "cuSPARSE::cusparseCreate failed.";
+ *message = "CUDA initialization failed because "
+ "cuSPARSE::cusparseCreate failed.";
TearDown();
return false;
}
event_logger.AddEvent("cusparseCreate");
if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
cudaSuccess) {
- *message = "CUDA::cudaStreamCreateWithFlags failed.";
+ *message = "CUDA initialization failed because "
+ "CUDA::cudaStreamCreateWithFlags failed.";
TearDown();
return false;
}
@@ -100,7 +153,7 @@
CUSOLVER_STATUS_SUCCESS ||
cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS ||
cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) {
- *message = "CUDA [Solver|BLAS|Sparse] SetStream failed.";
+ *message = "CUDA initialization failed because SetStream failed.";
TearDown();
return false;
}
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h
index 4bd18e1..9eb59eb 100644
--- a/internal/ceres/context_impl.h
+++ b/internal/ceres/context_impl.h
@@ -72,19 +72,39 @@
#endif // CERES_USE_CXX_THREADS
#ifndef CERES_NO_CUDA
- // Initializes the cuSolverDN context, creates an asynchronous stream, and
- // associates the stream with cuSolverDN. Returns true iff initialization was
- // successful, else it returns false and a human-readable error message is
- // returned.
- bool InitCUDA(std::string* message);
+ // Note on Ceres' use of CUDA Devices on multi-GPU systems:
+ // 1. On a multi-GPU system, if nothing special is done, the "default" CUDA
+ // device will be used, which is device 0.
+ // 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment
+ // variable, Ceres will still use device 0 visible to the program, but
+ // device 0 will be the first GPU indicated in the environment variable.
+ // 3. If the user explicitly selects a GPU in the host process before calling
+ // Ceres, Ceres will use that GPU.
+
+ // Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an
+ // asynchronous CUDA stream, and associates the stream with the contexts.
+ // Returns true iff initialization was successful, else it returns false and a
+ // human-readable error message is returned.
+ bool InitCuda(std::string* message);
void TearDown();
- inline bool IsCUDAInitialized() const { return is_cuda_initialized_; }
+ inline bool IsCudaInitialized() const { return is_cuda_initialized_; }
+ // Returns a human-readable string describing the capabilities of the current
+ // CUDA device. CudaConfigAsString can only be called after InitCuda has been
+ // called.
+ std::string CudaConfigAsString() const;
+ // Returns the number of bytes of available global memory on the current CUDA
+ // device. If it is called before InitCuda, it returns 0.
+ size_t GpuMemoryAvailable() const;
cusolverDnHandle_t cusolver_handle_ = nullptr;
cublasHandle_t cublas_handle_ = nullptr;
cudaStream_t stream_ = nullptr;
cusparseHandle_t cusparse_handle_ = nullptr;
bool is_cuda_initialized_ = false;
+ int gpu_device_id_in_use_ = -1;
+ cudaDeviceProp gpu_device_properties_;
+ int cuda_version_major_ = 0;
+ int cuda_version_minor_ = 0;
#endif // CERES_NO_CUDA
};
diff --git a/internal/ceres/cuda_dense_cholesky_test.cc b/internal/ceres/cuda_dense_cholesky_test.cc
index 1483923..4c5742e 100644
--- a/internal/ceres/cuda_dense_cholesky_test.cc
+++ b/internal/ceres/cuda_dense_cholesky_test.cc
@@ -45,7 +45,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
EXPECT_EQ(dense_cuda_solver, nullptr);
}
@@ -65,7 +65,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -94,7 +94,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -116,7 +116,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -131,7 +131,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -150,7 +150,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
std::unique_ptr<DenseCholesky> dense_cholesky =
CUDADenseCholesky::Create(options);
@@ -189,7 +189,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_EQ(solver, nullptr);
}
@@ -199,7 +199,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_EQ(solver, nullptr);
@@ -224,7 +224,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
options.use_mixed_precision_solves = true;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
@@ -261,7 +261,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
options.use_mixed_precision_solves = true;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
@@ -291,7 +291,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
options.use_mixed_precision_solves = true;
options.max_num_refinement_iterations = 20;
diff --git a/internal/ceres/cuda_dense_qr_test.cc b/internal/ceres/cuda_dense_qr_test.cc
index cc0dbd6..798f12a 100644
--- a/internal/ceres/cuda_dense_qr_test.cc
+++ b/internal/ceres/cuda_dense_qr_test.cc
@@ -44,7 +44,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
auto dense_cuda_solver = CUDADenseQR::Create(options);
EXPECT_EQ(dense_cuda_solver, nullptr);
}
@@ -63,7 +63,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -97,7 +97,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -121,7 +121,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
@@ -141,7 +141,7 @@
ContextImpl context;
options.context = &context;
std::string error;
- EXPECT_TRUE(context.InitCUDA(&error)) << error;
+ EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
std::unique_ptr<DenseQR> dense_qr = CUDADenseQR::Create(options);
diff --git a/internal/ceres/cuda_sparse_matrix.cc b/internal/ceres/cuda_sparse_matrix.cc
index e366112..1d8d0b0 100644
--- a/internal/ceres/cuda_sparse_matrix.cc
+++ b/internal/ceres/cuda_sparse_matrix.cc
@@ -61,7 +61,7 @@
CudaSparseMatrix::CudaSparseMatrix(
ContextImpl* context, const CompressedRowSparseMatrix& crs_matrix) {
DCHECK_NE(context, nullptr);
- CHECK(context->IsCUDAInitialized());
+ CHECK(context->IsCudaInitialized());
context_ = context;
num_rows_ = crs_matrix.num_rows();
num_cols_ = crs_matrix.num_cols();
diff --git a/internal/ceres/cuda_sparse_matrix_test.cc b/internal/ceres/cuda_sparse_matrix_test.cc
index ae76b8f..a12ad40 100644
--- a/internal/ceres/cuda_sparse_matrix_test.cc
+++ b/internal/ceres/cuda_sparse_matrix_test.cc
@@ -51,8 +51,8 @@
protected:
void SetUp() final {
std::string message;
- CHECK(context_.InitCUDA(&message))
- << "InitCUDA() failed because: " << message;
+ CHECK(context_.InitCuda(&message))
+ << "InitCuda() failed because: " << message;
std::unique_ptr<LinearLeastSquaresProblem> problem =
CreateLinearLeastSquaresProblemFromId(2);
CHECK(problem != nullptr);
@@ -121,7 +121,7 @@
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1);
CudaSparseMatrix A_gpu(&context, *A1_crs);
CudaVector b_gpu(&context, A1.num_cols());
@@ -159,7 +159,7 @@
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, A.num_cols());
@@ -189,7 +189,7 @@
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, A.num_rows());
@@ -244,7 +244,7 @@
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, N);
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc
index 7bac13a..46e6cb2 100644
--- a/internal/ceres/cuda_vector.cc
+++ b/internal/ceres/cuda_vector.cc
@@ -52,7 +52,7 @@
CudaVector::CudaVector(ContextImpl* context, int size) {
DCHECK_NE(context, nullptr);
- CHECK(context->IsCUDAInitialized());
+ CHECK(context->IsCudaInitialized());
context_ = context;
Resize(size);
}
diff --git a/internal/ceres/cuda_vector_test.cc b/internal/ceres/cuda_vector_test.cc
index 84193c0..db1fec5 100644
--- a/internal/ceres/cuda_vector_test.cc
+++ b/internal/ceres/cuda_vector_test.cc
@@ -45,7 +45,7 @@
TEST(CudaVector, Creation) {
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x(&context, 1000);
EXPECT_EQ(x.num_rows(), 1000);
EXPECT_NE(x.data().data(), nullptr);
@@ -56,7 +56,7 @@
x << 1, 2, 3;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector y(&context, 10);
y.CopyFromCpu(x);
EXPECT_EQ(y.num_rows(), 3);
@@ -72,7 +72,7 @@
x << 1, 2, 3;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 3);
x_gpu.CopyFromCpu(x);
@@ -94,7 +94,7 @@
y << 100, 10, 1;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
CudaVector y_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
@@ -109,7 +109,7 @@
x << 1, 2, 3;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
@@ -127,7 +127,7 @@
x << 1, 1, 1, 1;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
@@ -145,7 +145,7 @@
TEST(CudaVector, Resize) {
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
EXPECT_EQ(x_gpu.num_rows(), 10);
x_gpu.Resize(4);
@@ -159,7 +159,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -180,7 +180,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -201,7 +201,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -222,7 +222,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -241,7 +241,7 @@
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -262,7 +262,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
CudaVector z_gpu(&context, 4);
@@ -284,7 +284,7 @@
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector z_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -305,7 +305,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
CudaVector y_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
@@ -326,7 +326,7 @@
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
@@ -345,7 +345,7 @@
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
@@ -366,7 +366,7 @@
D << 4, 3, 2, 1;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
CudaVector D_gpu(&context, 4);
@@ -387,7 +387,7 @@
x << 1, 2, 3, 4;
ContextImpl context;
std::string message;
- CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
diff --git a/internal/ceres/dense_cholesky.cc b/internal/ceres/dense_cholesky.cc
index a3e578f..6d328ab 100644
--- a/internal/ceres/dense_cholesky.cc
+++ b/internal/ceres/dense_cholesky.cc
@@ -348,7 +348,7 @@
#ifndef CERES_NO_CUDA
bool CUDADenseCholesky::Init(ContextImpl* context, std::string* message) {
- CHECK(context->IsCUDAInitialized())
+ CHECK(context->IsCudaInitialized())
<< "CUDADenseCholesky requires CUDA initialization.";
cusolver_handle_ = context->cusolver_handle_;
stream_ = context->stream_;
@@ -490,7 +490,7 @@
bool CUDADenseCholeskyMixedPrecision::Init(const LinearSolver::Options& options,
std::string* message) {
- CHECK(options.context->IsCUDAInitialized())
+ CHECK(options.context->IsCudaInitialized())
<< "CUDADenseCholeskyMixedPrecision requires CUDA initialization.";
cusolver_handle_ = options.context->cusolver_handle_;
cublas_handle_ = options.context->cublas_handle_;
diff --git a/internal/ceres/dense_cholesky_test.cc b/internal/ceres/dense_cholesky_test.cc
index 2c4ca10..bb8a467 100644
--- a/internal/ceres/dense_cholesky_test.cc
+++ b/internal/ceres/dense_cholesky_test.cc
@@ -78,7 +78,7 @@
#ifndef CERES_NO_CUDA
options.context = &context;
std::string error;
- CHECK(context.InitCUDA(&error)) << error;
+ CHECK(context.InitCuda(&error)) << error;
#endif // CERES_NO_CUDA
options.dense_linear_algebra_library_type = ::testing::get<0>(GetParam());
options.use_mixed_precision_solves = ::testing::get<1>(GetParam());
diff --git a/internal/ceres/dense_qr.cc b/internal/ceres/dense_qr.cc
index fb3c228..22727f8 100644
--- a/internal/ceres/dense_qr.cc
+++ b/internal/ceres/dense_qr.cc
@@ -312,7 +312,7 @@
#ifndef CERES_NO_CUDA
bool CUDADenseQR::Init(ContextImpl* context, std::string* message) {
- if (!context->InitCUDA(message)) {
+ if (!context->InitCuda(message)) {
return false;
}
cublas_handle_ = context->cublas_handle_;
diff --git a/internal/ceres/dense_qr_test.cc b/internal/ceres/dense_qr_test.cc
index c10dba5..8698c90 100644
--- a/internal/ceres/dense_qr_test.cc
+++ b/internal/ceres/dense_qr_test.cc
@@ -70,7 +70,7 @@
#ifndef CERES_NO_CUDA
options.context = &context;
std::string error;
- CHECK(context.InitCUDA(&error)) << error;
+ CHECK(context.InitCuda(&error)) << error;
#endif // CERES_NO_CUDA
options.dense_linear_algebra_library_type = GetParam();
const double kEpsilon = std::numeric_limits<double>::epsilon() * 1.5e4;
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index ea18913..0656d34 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -730,7 +730,7 @@
#ifndef CERES_NO_CUDA
if (IsCudaRequired(options)) {
- if (!problem_impl->context()->InitCUDA(&summary->message)) {
+ if (!problem_impl->context()->InitCuda(&summary->message)) {
LOG(ERROR) << "Terminating: " << summary->message;
return;
}
diff --git a/internal/ceres/sparse_linear_operator_benchmark.cc b/internal/ceres/sparse_linear_operator_benchmark.cc
index a08911e..4969c0c 100644
--- a/internal/ceres/sparse_linear_operator_benchmark.cc
+++ b/internal/ceres/sparse_linear_operator_benchmark.cc
@@ -170,7 +170,7 @@
FLAGS_num_residuals_per_camera);
ContextImpl context;
std::string message;
- context.InitCUDA(&message);
+ context.InitCuda(&message);
CompressedRowSparseMatrix jacobian_crs(
jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
@@ -205,7 +205,7 @@
FLAGS_num_residuals_per_camera);
ContextImpl context;
std::string message;
- context.InitCUDA(&message);
+ context.InitCuda(&message);
CompressedRowSparseMatrix jacobian_crs(
jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);