Minor CUDA cleanup.
1. ceres_cuda_kernels.cu -> cuda_kernels.cu.cc
2. Add missing ifdef guards.
3. Fix an errant namespace
Change-Id: I81a5bfbe3c795ff0b3ef41c3bcaa037b99d5b254
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index dae16b5..947003d 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -156,8 +156,8 @@
COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/cuda-memcheck --leak-check full
$<TARGET_FILE:cuda_dense_cholesky_test>)
endif (BUILD_TESTING AND GFLAGS)
- set_source_files_properties(ceres_cuda_kernels.cu PROPERTIES LANGUAGE CUDA)
- add_library(ceres_cuda_kernels ceres_cuda_kernels.cu)
+ set_source_files_properties(cuda_kernels.cu.cc PROPERTIES LANGUAGE CUDA)
+ add_library(ceres_cuda_kernels cuda_kernels.cu.cc)
target_compile_features(ceres_cuda_kernels PRIVATE cxx_std_14)
list(APPEND CERES_LIBRARY_PRIVATE_DEPENDENCIES ceres_cuda_kernels)
endif (USE_CUDA)
diff --git a/internal/ceres/ceres_cuda_kernels.cu b/internal/ceres/cuda_kernels.cu.cc
similarity index 91%
rename from internal/ceres/ceres_cuda_kernels.cu
rename to internal/ceres/cuda_kernels.cu.cc
index cd045e3..2a1e755 100644
--- a/internal/ceres/ceres_cuda_kernels.cu
+++ b/internal/ceres/cuda_kernels.cu.cc
@@ -40,7 +40,7 @@
// toolkit documentation.
constexpr int kCudaBlockSize = 256;
-template<typename SrcType, typename DstType>
+template <typename SrcType, typename DstType>
__global__ void TypeConversionKernel(const SrcType* __restrict__ input,
DstType* __restrict__ output,
const int size) {
@@ -68,7 +68,7 @@
<<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size);
}
-template<typename T>
+template <typename T>
__global__ void SetZeroKernel(T* __restrict__ output, const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
@@ -83,24 +83,21 @@
void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream) {
const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize;
- SetZeroKernel<double><<<num_blocks, kCudaBlockSize, 0, stream>>>(
- output, size);
+ SetZeroKernel<double>
+ <<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size);
}
template <typename SrcType, typename DstType>
__global__ void XPlusEqualsYKernel(DstType* __restrict__ x,
const SrcType* __restrict__ y,
- const int size) {
+ const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
x[i] = x[i] + DstType(y[i]);
}
}
-void CudaDsxpy(double* x,
- float* y,
- const int size,
- cudaStream_t stream) {
+void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream) {
const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize;
XPlusEqualsYKernel<float, double>
<<<num_blocks, kCudaBlockSize, 0, stream>>>(x, y, size);
@@ -122,8 +119,7 @@
const int size,
cudaStream_t stream) {
const int num_blocks = (size + kCudaBlockSize - 1) / kCudaBlockSize;
- CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>(
- y, D, x, size);
+ CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>(y, D, x, size);
}
-} // namespace ceres_cuda_kernels
\ No newline at end of file
+} // namespace ceres::internal
diff --git a/internal/ceres/ceres_cuda_kernels.h b/internal/ceres/cuda_kernels.h
similarity index 96%
rename from internal/ceres/ceres_cuda_kernels.h
rename to internal/ceres/cuda_kernels.h
index 91675e8..d347c20 100644
--- a/internal/ceres/ceres_cuda_kernels.h
+++ b/internal/ceres/cuda_kernels.h
@@ -28,6 +28,9 @@
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
+#ifndef CERES_INTERNAL_CUDA_KERNELS_H_
+#define CERES_INTERNAL_CUDA_KERNELS_H_
+
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
@@ -72,3 +75,5 @@
} // namespace ceres::internal
#endif // CERES_NO_CUDA
+
+#endif // CERES_INTERNAL_CUDA_KERNELS_H_
diff --git a/internal/ceres/cuda_kernels_test.cc b/internal/ceres/cuda_kernels_test.cc
index 83b922d..053b442 100644
--- a/internal/ceres/cuda_kernels_test.cc
+++ b/internal/ceres/cuda_kernels_test.cc
@@ -28,13 +28,14 @@
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
+#include "ceres/cuda_kernels.h"
+
#include <math.h>
#include <limits>
#include <string>
#include <vector>
-#include "ceres/ceres_cuda_kernels.h"
#include "ceres/cuda_buffer.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
diff --git a/internal/ceres/cuda_sparse_matrix.cc b/internal/ceres/cuda_sparse_matrix.cc
index da64981..e366112 100644
--- a/internal/ceres/cuda_sparse_matrix.cc
+++ b/internal/ceres/cuda_sparse_matrix.cc
@@ -51,8 +51,8 @@
#ifndef CERES_NO_CUDA
-#include "ceres/ceres_cuda_kernels.h"
#include "ceres/cuda_buffer.h"
+#include "ceres/cuda_kernels.h"
#include "ceres/cuda_vector.h"
#include "cusparse.h"
@@ -146,4 +146,4 @@
} // namespace ceres::internal
-#endif // CERES_NO_CUDA
\ No newline at end of file
+#endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc
index 7debeba..7bac13a 100644
--- a/internal/ceres/cuda_vector.cc
+++ b/internal/ceres/cuda_vector.cc
@@ -43,8 +43,8 @@
#ifndef CERES_NO_CUDA
-#include "ceres/ceres_cuda_kernels.h"
#include "ceres/cuda_buffer.h"
+#include "ceres/cuda_kernels.h"
#include "ceres/cuda_vector.h"
#include "cublas_v2.h"
@@ -178,4 +178,4 @@
} // namespace ceres::internal
-#endif // CERES_NO_CUDA
\ No newline at end of file
+#endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_vector.h b/internal/ceres/cuda_vector.h
index e7c4b81..985eac0 100644
--- a/internal/ceres/cuda_vector.h
+++ b/internal/ceres/cuda_vector.h
@@ -49,8 +49,8 @@
#ifndef CERES_NO_CUDA
-#include "ceres/ceres_cuda_kernels.h"
#include "ceres/cuda_buffer.h"
+#include "ceres/cuda_kernels.h"
#include "ceres/internal/eigen.h"
#include "cublas_v2.h"
#include "cusparse.h"
diff --git a/internal/ceres/dense_cholesky.cc b/internal/ceres/dense_cholesky.cc
index af42e73..a3e578f 100644
--- a/internal/ceres/dense_cholesky.cc
+++ b/internal/ceres/dense_cholesky.cc
@@ -40,8 +40,8 @@
#include "ceres/iterative_refiner.h"
#ifndef CERES_NO_CUDA
-#include "ceres/ceres_cuda_kernels.h"
#include "ceres/context_impl.h"
+#include "ceres/cuda_kernels.h"
#include "cuda_runtime.h"
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
diff --git a/internal/ceres/dense_qr.cc b/internal/ceres/dense_qr.cc
index 775073d..fb3c228 100644
--- a/internal/ceres/dense_qr.cc
+++ b/internal/ceres/dense_qr.cc
@@ -33,6 +33,7 @@
#include <algorithm>
#include <memory>
#include <string>
+
#ifndef CERES_NO_CUDA
#include "ceres/context_impl.h"
#include "cublas_v2.h"