Fix bug in cuda_kernels_test
* Use the context stream, not the default stream, since all
CUDA operations now operate on the context stream.
Change-Id: I24c872c467f13ee4d276fc04d6822b79a18070fc
diff --git a/internal/ceres/cuda_kernels_test.cc b/internal/ceres/cuda_kernels_test.cc
index 9290aa5..a364c90 100644
--- a/internal/ceres/cuda_kernels_test.cc
+++ b/internal/ceres/cuda_kernels_test.cc
@@ -58,7 +58,7 @@
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.Reserve(fp64_cpu.size());
CudaFP64ToFP32(
- fp64_gpu.data(), fp32_gpu.data(), fp64_cpu.size(), cudaStreamDefault);
+ fp64_gpu.data(), fp32_gpu.data(), fp64_cpu.size(), context.stream_);
std::vector<float> fp32_cpu(fp64_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
for (int i = 0; i < fp32_cpu.size(); ++i) {
@@ -83,7 +83,7 @@
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.Reserve(fp64_cpu.size());
CudaFP64ToFP32(
- fp64_gpu.data(), fp32_gpu.data(), fp64_cpu.size(), cudaStreamDefault);
+ fp64_gpu.data(), fp32_gpu.data(), fp64_cpu.size(), context.stream_);
std::vector<float> fp32_cpu(fp64_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
EXPECT_EQ(fp32_cpu[0], 0.0f);
@@ -102,7 +102,7 @@
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.Reserve(fp32_cpu.size());
CudaFP32ToFP64(
- fp32_gpu.data(), fp64_gpu.data(), fp32_cpu.size(), cudaStreamDefault);
+ fp32_gpu.data(), fp64_gpu.data(), fp32_cpu.size(), context.stream_);
std::vector<double> fp64_cpu(fp32_cpu.size());
fp64_gpu.CopyToCpu(fp64_cpu.data(), fp64_cpu.size());
for (int i = 0; i < fp64_cpu.size(); ++i) {
@@ -117,7 +117,7 @@
std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.CopyFromCpuVector(fp32_cpu);
- CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), cudaStreamDefault);
+ CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), context.stream_);
std::vector<float> fp32_cpu_zero(fp32_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu_zero.data(), fp32_cpu_zero.size());
for (int i = 0; i < fp32_cpu_zero.size(); ++i) {
@@ -132,7 +132,7 @@
std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.CopyFromCpuVector(fp64_cpu);
- CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), cudaStreamDefault);
+ CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), context.stream_);
std::vector<double> fp64_cpu_zero(fp64_cpu.size());
fp64_gpu.CopyToCpu(fp64_cpu_zero.data(), fp64_cpu_zero.size());
for (int i = 0; i < fp64_cpu_zero.size(); ++i) {
@@ -154,7 +154,7 @@
CudaDsxpy(fp64_gpu_b.data(),
fp32_gpu_a.data(),
fp32_gpu_a.size(),
- cudaStreamDefault);
+ context.stream_);
fp64_gpu_b.CopyToCpu(fp64_cpu_b.data(), fp64_cpu_b.size());
for (int i = 0; i < fp64_cpu_b.size(); ++i) {
EXPECT_DOUBLE_EQ(fp64_cpu_b[i], 2.0 * fp32_cpu_a[i]);
@@ -178,7 +178,7 @@
d_gpu.data(),
x_gpu.data(),
y_gpu.size(),
- cudaStreamDefault);
+ context.stream_);
y_gpu.CopyToCpu(y_cpu.data(), y_cpu.size());
EXPECT_DOUBLE_EQ(y_cpu[0], 4.0 + 10.0 * 10.0 * 1.0);
EXPECT_DOUBLE_EQ(y_cpu[1], 3.0 + 20.0 * 20.0 * 2.0);