Fix checks for CUDA memory pools support Change-Id: Icc07625fc0e586e8798da48fa5edfde59487d702
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc index c085644..2b9d9cc 100644 --- a/internal/ceres/context_impl.cc +++ b/internal/ceres/context_impl.cc
@@ -102,7 +102,9 @@ gpu_device_properties_.maxGridSize[1], gpu_device_properties_.maxGridSize[2], gpu_device_properties_.multiProcessorCount, - gpu_device_properties_.memoryPoolsSupported ? "Yes" : "No"); + // In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it + // is not available in older versions + is_cuda_memory_pools_supported_ ? "Yes" : "No"); } size_t ContextImpl::GpuMemoryAvailable() const { @@ -123,6 +125,14 @@ CHECK_EQ( cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_), cudaSuccess); +#if CUDART_VERSION >= 11020 + int is_cuda_memory_pools_supported; + CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported, + cudaDevAttrMemoryPoolsSupported, + gpu_device_id_in_use_), + cudaSuccess); + is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1; +#endif VLOG(3) << "\n" << CudaConfigAsString(); EventLogger event_logger("InitCuda"); if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h index 508fd06..46692e6 100644 --- a/internal/ceres/context_impl.h +++ b/internal/ceres/context_impl.h
@@ -134,6 +134,7 @@ bool is_cuda_initialized_ = false; int gpu_device_id_in_use_ = -1; cudaDeviceProp gpu_device_properties_; + bool is_cuda_memory_pools_supported_ = false; int cuda_version_major_ = 0; int cuda_version_minor_ = 0; #endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_block_sparse_crs_view.cc b/internal/ceres/cuda_block_sparse_crs_view.cc index c370d22..7564d52 100644 --- a/internal/ceres/cuda_block_sparse_crs_view.cc +++ b/internal/ceres/cuda_block_sparse_crs_view.cc
@@ -52,7 +52,7 @@ rows.data(), cols.data(), context->DefaultStream(), - context); + context->is_cuda_memory_pools_supported_); is_crs_compatible_ = block_structure_->IsCrsCompatible(); // if matrix is crs-compatible - we can drop block-structure and don't need // streamed_buffer_
diff --git a/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc b/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc index 05b52e7..b9ca4cd 100644 --- a/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc +++ b/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc
@@ -55,11 +55,11 @@ cudaStream_t stream, bool memory_pools_supported) { void* data = nullptr; - // Stream-ordered alloaction API is available since CUDA 11.4, but might be + // Stream-ordered alloaction API is available since CUDA 11.2, but might be // not implemented by particular device -#if CUDART_VERSION < 11040 +#if CUDART_VERSION < 11020 #warning \ - "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.4+" + "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+" cudaMalloc(&data, size); #else if (memory_pools_supported) { @@ -72,11 +72,11 @@ } void CudaFree(void* data, cudaStream_t stream, bool memory_pools_supported) { - // Stream-ordered alloaction API is available since CUDA 11.4, but might be + // Stream-ordered alloaction API is available since CUDA 11.2, but might be // not implemented by particular device -#if CUDART_VERSION < 11040 +#if CUDART_VERSION < 11020 #warning \ - "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.4+" + "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+" cudaSuccess, cudaFree(data); #else if (memory_pools_supported) {
diff --git a/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc b/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc index 550e6d0..c0c1dc8 100644 --- a/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc +++ b/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc
@@ -80,7 +80,7 @@ rows_f.data(), cols_f.data(), context->DefaultStream(), - context); + context->is_cuda_memory_pools_supported_); f_is_crs_compatible_ = block_structure_->IsCrsCompatible(); if (f_is_crs_compatible_) { block_structure_ = nullptr;