Minor CUDA cleanup. 1. ceres_cuda_kernels.cu -> cuda_kernels.cu.cc 2. Add missing ifdef guards. 3. Fix an errant namespace Change-Id: I81a5bfbe3c795ff0b3ef41c3bcaa037b99d5b254
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index dae16b5..947003d 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -156,8 +156,8 @@ COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/cuda-memcheck --leak-check full $<TARGET_FILE:cuda_dense_cholesky_test>) endif (BUILD_TESTING AND GFLAGS) - set_source_files_properties(ceres_cuda_kernels.cu PROPERTIES LANGUAGE CUDA) - add_library(ceres_cuda_kernels ceres_cuda_kernels.cu) + set_source_files_properties(cuda_kernels.cu.cc PROPERTIES LANGUAGE CUDA) + add_library(ceres_cuda_kernels cuda_kernels.cu.cc) target_compile_features(ceres_cuda_kernels PRIVATE cxx_std_14) list(APPEND CERES_LIBRARY_PRIVATE_DEPENDENCIES ceres_cuda_kernels) endif (USE_CUDA)
diff --git a/internal/ceres/ceres_cuda_kernels.cu b/internal/ceres/cuda_kernels.cu.cc similarity index 91% rename from internal/ceres/ceres_cuda_kernels.cu rename to internal/ceres/cuda_kernels.cu.cc index cd045e3..2a1e755 100644 --- a/internal/ceres/ceres_cuda_kernels.cu +++ b/internal/ceres/cuda_kernels.cu.cc
@@ -40,7 +40,7 @@ // toolkit documentation. constexpr int kCudaBlockSize = 256; -template<typename SrcType, typename DstType> +template <typename SrcType, typename DstType> __global__ void TypeConversionKernel(const SrcType* __restrict__ input, DstType* __restrict__ output, const int size) { @@ -68,7 +68,7 @@ <<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size); } -template<typename T> +template <typename T> __global__ void SetZeroKernel(T* __restrict__ output, const int size) { const int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) { @@ -83,24 +83,21 @@ void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream) { const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize; - SetZeroKernel<double><<<num_blocks, kCudaBlockSize, 0, stream>>>( - output, size); + SetZeroKernel<double> + <<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size); } template <typename SrcType, typename DstType> __global__ void XPlusEqualsYKernel(DstType* __restrict__ x, const SrcType* __restrict__ y, - const int size) { + const int size) { const int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) { x[i] = x[i] + DstType(y[i]); } } -void CudaDsxpy(double* x, - float* y, - const int size, - cudaStream_t stream) { +void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream) { const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize; XPlusEqualsYKernel<float, double> <<<num_blocks, kCudaBlockSize, 0, stream>>>(x, y, size); @@ -122,8 +119,7 @@ const int size, cudaStream_t stream) { const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize; - CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>( - y, D, x, size); + CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>(y, D, x, size); } -} // namespace ceres_cuda_kernels \ No newline at end of file +} // namespace ceres::internal
diff --git a/internal/ceres/ceres_cuda_kernels.h b/internal/ceres/cuda_kernels.h similarity index 96% rename from internal/ceres/ceres_cuda_kernels.h rename to internal/ceres/cuda_kernels.h index 91675e8..d347c20 100644 --- a/internal/ceres/ceres_cuda_kernels.h +++ b/internal/ceres/cuda_kernels.h
@@ -28,6 +28,9 @@ // // Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +#ifndef CERES_INTERNAL_CUDA_KERNELS_H_ +#define CERES_INTERNAL_CUDA_KERNELS_H_ + #include "ceres/internal/config.h" #ifndef CERES_NO_CUDA @@ -72,3 +75,5 @@ } // namespace ceres::internal #endif // CERES_NO_CUDA + +#endif // CERES_INTERNAL_CUDA_KERNELS_H_
diff --git a/internal/ceres/cuda_kernels_test.cc b/internal/ceres/cuda_kernels_test.cc index 83b922d..053b442 100644 --- a/internal/ceres/cuda_kernels_test.cc +++ b/internal/ceres/cuda_kernels_test.cc
@@ -28,13 +28,14 @@ // // Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +#include "ceres/cuda_kernels.h" + #include <math.h> #include <limits> #include <string> #include <vector> -#include "ceres/ceres_cuda_kernels.h" #include "ceres/cuda_buffer.h" #include "ceres/internal/config.h" #include "ceres/internal/eigen.h"
diff --git a/internal/ceres/cuda_sparse_matrix.cc b/internal/ceres/cuda_sparse_matrix.cc index da64981..e366112 100644 --- a/internal/ceres/cuda_sparse_matrix.cc +++ b/internal/ceres/cuda_sparse_matrix.cc
@@ -51,8 +51,8 @@ #ifndef CERES_NO_CUDA -#include "ceres/ceres_cuda_kernels.h" #include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels.h" #include "ceres/cuda_vector.h" #include "cusparse.h" @@ -146,4 +146,4 @@ } // namespace ceres::internal -#endif // CERES_NO_CUDA \ No newline at end of file +#endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc index 7debeba..7bac13a 100644 --- a/internal/ceres/cuda_vector.cc +++ b/internal/ceres/cuda_vector.cc
@@ -43,8 +43,8 @@ #ifndef CERES_NO_CUDA -#include "ceres/ceres_cuda_kernels.h" #include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels.h" #include "ceres/cuda_vector.h" #include "cublas_v2.h" @@ -178,4 +178,4 @@ } // namespace ceres::internal -#endif // CERES_NO_CUDA \ No newline at end of file +#endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_vector.h b/internal/ceres/cuda_vector.h index e7c4b81..985eac0 100644 --- a/internal/ceres/cuda_vector.h +++ b/internal/ceres/cuda_vector.h
@@ -49,8 +49,8 @@ #ifndef CERES_NO_CUDA -#include "ceres/ceres_cuda_kernels.h" #include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels.h" #include "ceres/internal/eigen.h" #include "cublas_v2.h" #include "cusparse.h"
diff --git a/internal/ceres/dense_cholesky.cc b/internal/ceres/dense_cholesky.cc index af42e73..a3e578f 100644 --- a/internal/ceres/dense_cholesky.cc +++ b/internal/ceres/dense_cholesky.cc
@@ -40,8 +40,8 @@ #include "ceres/iterative_refiner.h" #ifndef CERES_NO_CUDA -#include "ceres/ceres_cuda_kernels.h" #include "ceres/context_impl.h" +#include "ceres/cuda_kernels.h" #include "cuda_runtime.h" #include "cusolverDn.h" #endif // CERES_NO_CUDA
diff --git a/internal/ceres/dense_qr.cc b/internal/ceres/dense_qr.cc index 775073d..fb3c228 100644 --- a/internal/ceres/dense_qr.cc +++ b/internal/ceres/dense_qr.cc
@@ -33,6 +33,7 @@ #include <algorithm> #include <memory> #include <string> + #ifndef CERES_NO_CUDA #include "ceres/context_impl.h" #include "cublas_v2.h"