CUDA CGNR, Part 2: CudaVector * Added CudaVector to manage and operate on vectors with CUDA. * Added tests for CudaVector Change-Id: I528258c8e883011e8709bddf59edb1e933af8060
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index afe91fb..02916b1 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -198,6 +198,7 @@ cost_function.cc covariance.cc covariance_impl.cc + cuda_vector.cc dense_cholesky.cc dense_normal_cholesky_solver.cc dense_qr.cc @@ -483,6 +484,7 @@ ceres_test(cuda_dense_cholesky) ceres_test(cuda_dense_qr) ceres_test(cuda_kernels) + ceres_test(cuda_vector) ceres_test(dense_linear_solver) ceres_test(dense_cholesky) ceres_test(dense_qr)
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc index b8f9ff5..9f6cc25 100644 --- a/internal/ceres/context_impl.cc +++ b/internal/ceres/context_impl.cc
@@ -63,11 +63,11 @@ cudaStreamDestroy(stream_); stream_ = nullptr; } - cuda_initialized_ = false; + is_cuda_initialized_ = false; } bool ContextImpl::InitCUDA(std::string* message) { - if (cuda_initialized_) { + if (is_cuda_initialized_) { return true; } EventLogger event_logger("InitCuda"); @@ -105,7 +105,7 @@ return false; } event_logger.AddEvent("SetStream"); - cuda_initialized_ = true; + is_cuda_initialized_ = true; return true; } #endif // CERES_NO_CUDA
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h index d4bd436..4bd18e1 100644 --- a/internal/ceres/context_impl.h +++ b/internal/ceres/context_impl.h
@@ -78,13 +78,13 @@ // returned. bool InitCUDA(std::string* message); void TearDown(); + inline bool IsCUDAInitialized() const { return is_cuda_initialized_; } cusolverDnHandle_t cusolver_handle_ = nullptr; cublasHandle_t cublas_handle_ = nullptr; cudaStream_t stream_ = nullptr; cusparseHandle_t cusparse_handle_ = nullptr; - // Indicates whether all the CUDA resources have been initialized. - bool cuda_initialized_ = false; + bool is_cuda_initialized_ = false; #endif // CERES_NO_CUDA };
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc new file mode 100644 index 0000000..5a128de --- /dev/null +++ b/internal/ceres/cuda_vector.cc
@@ -0,0 +1,190 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A simple CUDA vector class. + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include <math.h> + +#include "ceres/internal/export.h" +#include "ceres/types.h" +#include "ceres/context_impl.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_vector.h" +#include "ceres/ceres_cuda_kernels.h" +#include "cublas_v2.h" + +namespace ceres::internal { + +CudaVector::CudaVector(ContextImpl* context, int size) { + DCHECK_NE(context, nullptr); + CHECK(context->IsCUDAInitialized()); + context_ = context; + Resize(size); +} + +CudaVector& CudaVector::operator=(const CudaVector& other) { + if (this != &other) { + Resize(other.num_rows()); + data_.CopyFromGPUArray(other.data_.data(), num_rows_, context_->stream_); + } + return *this; +} + +void CudaVector::DestroyDescriptor() { + if (descr_ != nullptr) { + CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS); + descr_ = nullptr; + } +} + +CudaVector::~CudaVector() { + DestroyDescriptor(); +} + +void CudaVector::Resize(int size) { + data_.Reserve(size); + num_rows_ = size; + DestroyDescriptor(); + CHECK_EQ(cusparseCreateDnVec(&descr_, + num_rows_, + data_.data(), + CUDA_R_64F), CUSPARSE_STATUS_SUCCESS); +} + +double CudaVector::Dot(const CudaVector& x) const { + double result = 0; + CHECK_EQ(cublasDdot(context_->cublas_handle_, + num_rows_, + data_.data(), + 1, + x.data().data(), + 1, &result), + CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDdot failed."; + return result; +} + +double CudaVector::Norm() const { + double result = 0; + CHECK_EQ(cublasDnrm2(context_->cublas_handle_, + num_rows_, + data_.data(), + 1, + &result), + CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDnrm2 failed."; + return result; +} + +void CudaVector::CopyFromCpu(const Vector& x) { + data_.Reserve(x.rows()); + data_.CopyFromCpu(x.data(), x.rows(), context_->stream_); + num_rows_ = x.rows(); + DestroyDescriptor(); + CHECK_EQ(cusparseCreateDnVec(&descr_, + num_rows_, + data_.data(), + CUDA_R_64F), CUSPARSE_STATUS_SUCCESS); +} + +void CudaVector::CopyTo(Vector* x) const { + CHECK(x != nullptr); + x->resize(num_rows_); + // Need to synchronize with any GPU kernels that may be writing to the + // buffer before the transfer happens. + CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess); + data_.CopyToCpu(x->data(), num_rows_); +} + +void CudaVector::CopyTo(double* x) const { + CHECK(x != nullptr); + // Need to synchronize with any GPU kernels that may be writing to the + // buffer before the transfer happens. + CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess); + data_.CopyToCpu(x, num_rows_); +} + +void CudaVector::SetZero() { + CHECK(data_.data() != nullptr); + CudaSetZeroFP64(data_.data(), num_rows_, context_->stream_); +} + +void CudaVector::Axpby(double a, const CudaVector& x, double b) { + if (&x == this) { + Scale(a + b); + return; + } + CHECK_EQ(num_rows_, x.num_rows_); + if (b != 1.0) { + // First scale y by b. + CHECK_EQ(cublasDscal(context_->cublas_handle_, + num_rows_, + &b, + data_.data(), + 1), + CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed."; + } + // Then add a * x to y. + CHECK_EQ(cublasDaxpy(context_->cublas_handle_, + num_rows_, + &a, + x.data().data(), + 1, + data_.data(), + 1), + CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDaxpy failed."; +} + +void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) { + CudaDtDxpy(data_.data(), + D.data().data(), + x.data().data(), + num_rows_, + context_->stream_); +} + +void CudaVector::Scale(double s) { + CHECK_EQ(cublasDscal(context_->cublas_handle_, + num_rows_, + &s, + data_.data(), + 1), + CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed."; +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA \ No newline at end of file
diff --git a/internal/ceres/cuda_vector.h b/internal/ceres/cuda_vector.h new file mode 100644 index 0000000..4018c1b --- /dev/null +++ b/internal/ceres/cuda_vector.h
@@ -0,0 +1,156 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A simple CUDA vector class. + +#ifndef CERES_INTERNAL_CUDA_VECTOR_H_ +#define CERES_INTERNAL_CUDA_VECTOR_H_ + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include <math.h> +#include <memory> +#include <string> + +#include "ceres/internal/export.h" +#include "ceres/types.h" +#include "ceres/context_impl.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_buffer.h" +#include "ceres/ceres_cuda_kernels.h" +#include "ceres/internal/eigen.h" +#include "cublas_v2.h" +#include "cusparse.h" + +namespace ceres::internal { + +// An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations. +class CERES_NO_EXPORT CudaVector { + public: + + // Create a pre-allocated vector of size N and return a pointer to it. The + // caller must ensure that InitCuda() has already been successfully called on + // context before calling this method. + CudaVector(ContextImpl* context, int size); + + ~CudaVector(); + + void Resize(int size); + + // Perform a deep copy of the vector. + CudaVector& operator=(const CudaVector&); + + // Return the inner product x' * y. + double Dot(const CudaVector& x) const; + + // Return the L2 norm of the vector (||y||_2). + double Norm() const; + + // Set all elements to zero. + void SetZero(); + + // Copy from Eigen vector. + void CopyFromCpu(const Vector& x); + + // Copy to Eigen vector. + void CopyTo(Vector* x) const; + + // Copy to CPU memory array. It is the caller's responsibility to ensure + // that the array is large enough. + void CopyTo(double* x) const; + + // y = a * x + b * y. + void Axpby(double a, const CudaVector& x, double b); + + // y = diag(d)' * diag(d) * x + y. + void DtDxpy(const CudaVector& D, const CudaVector& x); + + // y = s * y. + void Scale(double s); + + int num_rows() const { return num_rows_; } + int num_cols() const { return 1; } + + const CudaBuffer<double>& data() const { return data_; } + + const cusparseDnVecDescr_t& descr() const { return descr_; } + + private: + CudaVector(const CudaVector&) = delete; + void DestroyDescriptor(); + + int num_rows_ = 0; + ContextImpl* context_ = nullptr; + CudaBuffer<double> data_; + // CuSparse object that describes this dense vector. + cusparseDnVecDescr_t descr_ = nullptr; +}; + +// Blas1 operations on Cuda vectors. These functions are needed as an +// abstraction layer so that we can use different versions of a vector style +// object in the conjugate gradients linear solver. +inline double Norm(const CudaVector& x) { return x.Norm(); } +inline void SetZero(CudaVector& x) { x.SetZero(); } +inline void Axpby( + double a, + const CudaVector& x, + double b, + const CudaVector& y, + CudaVector& z) { + if (&x == &y && &y == &z) { + // z = (a + b) * z; + z.Scale(a + b); + } else if (&x == &z) { + // x is aliased to z. + // z = x + // = b * y + a * x; + z.Axpby(b, y, a); + } else if (&y == &z) { + // y is aliased to z. + // z = y = a * x + b * y; + z.Axpby(a, x, b); + } else { + // General case: all inputs and outputs are distinct. + z = y; + z.Axpby(a, x, b); + } +} +inline double Dot(const CudaVector& x, const CudaVector& y) { return x.Dot(y); } +inline void Copy(const CudaVector& from, CudaVector& to) { to = from; } + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_SPARSE_LINEAR_OPERATOR_H_
diff --git a/internal/ceres/cuda_vector_test.cc b/internal/ceres/cuda_vector_test.cc new file mode 100644 index 0000000..db930ed --- /dev/null +++ b/internal/ceres/cuda_vector_test.cc
@@ -0,0 +1,422 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include <string> + +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "ceres/cuda_vector.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres { +namespace internal { + +#ifndef CERES_NO_CUDA + +TEST(CudaVector, Creation) { + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x(&context, 1000); + EXPECT_EQ(x.num_rows(), 1000); + EXPECT_NE(x.data().data(), nullptr); +} + +TEST(CudaVector, CopyVector) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector y(&context, 10); + y.CopyFromCpu(x); + EXPECT_EQ(y.num_rows(), 3); + + Vector z(3); + z << 0, 0, 0; + y.CopyTo(&z); + EXPECT_EQ(x, z); +} + +TEST(CudaVector, DeepCopy) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 3); + x_gpu.CopyFromCpu(x); + + CudaVector y_gpu(&context, 3); + y_gpu.SetZero(); + EXPECT_EQ(y_gpu.Norm(), 0.0); + + y_gpu = x_gpu; + Vector y(3); + y << 0, 0, 0; + y_gpu.CopyTo(&y); + EXPECT_EQ(x, y); +} + +TEST(CudaVector, Dot) { + Vector x(3); + Vector y(3); + x << 1, 2, 3; + y << 100, 10, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + CudaVector y_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + EXPECT_EQ(x_gpu.Dot(y_gpu), 123.0); + EXPECT_EQ(Dot(x_gpu, y_gpu), 123.0); +} + +TEST(CudaVector, Norm) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + EXPECT_NEAR(x_gpu.Norm(), + sqrt(1.0 + 4.0 + 9.0), + std::numeric_limits<double>::epsilon()); + + EXPECT_NEAR(Norm(x_gpu), + sqrt(1.0 + 4.0 + 9.0), + std::numeric_limits<double>::epsilon()); +} + +TEST(CudaVector, SetZero) { + Vector x(4); + x << 1, 1, 1, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + EXPECT_NEAR(x_gpu.Norm(), + 2.0, + std::numeric_limits<double>::epsilon()); + + x_gpu.SetZero(); + EXPECT_NEAR(x_gpu.Norm(), + 0.0, + std::numeric_limits<double>::epsilon()); + + x_gpu.CopyFromCpu(x); + EXPECT_NEAR(x_gpu.Norm(), + 2.0, + std::numeric_limits<double>::epsilon()); + SetZero(x_gpu); + EXPECT_NEAR(x_gpu.Norm(), + 0.0, + std::numeric_limits<double>::epsilon()); +} + +TEST(CudaVector, Resize) { + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + EXPECT_EQ(x_gpu.num_rows(), 10); + x_gpu.Resize(4); + EXPECT_EQ(x_gpu.num_rows(), 4); +} + +TEST(CudaVector, Axpy) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyBEquals1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberFunctionBNotEqual1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 3.0); + Vector result; + Vector expected(4); + expected << 203, 23, 5, 3; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberFunctionBEqual1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberXAliasesY) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.SetZero(); + + x_gpu.Axpby(2.0, x_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 300, 30, 3, 0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodNoAliases) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + CudaVector z_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + z_gpu.Resize(4); + z_gpu.SetZero(); + + Axpby(2.0, x_gpu, 3.0, y_gpu, z_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + z_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesY) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector z_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + z_gpu.SetZero(); + + Axpby(2.0, x_gpu, 3.0, x_gpu, z_gpu); + Vector result; + Vector expected(4); + expected << 500, 50, 5, 0; + z_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesZ) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + CudaVector y_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + Axpby(2.0, x_gpu, 3.0, y_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodYAliasesZ) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + Axpby(2.0, x_gpu, 3.0, y_gpu, y_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + y_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesYAliasesZ) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) + << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + Axpby(2.0, x_gpu, 3.0, x_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 500, 50, 5, 0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, DtDxpy) { + Vector x(4); + Vector y(4); + Vector D(4); + x << 1, 2, 3, 4; + y << 100, 10, 1, 0; + D << 4, 3, 2, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + CudaVector D_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + D_gpu.CopyFromCpu(D); + + y_gpu.DtDxpy(D_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 116, 28, 13, 4; + y_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, Scale) { + Vector x(4); + x << 1, 2, 3, 4; + ContextImpl context; + std::string message; + CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message; + CudaVector x_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + + x_gpu.Scale(-3.0); + + Vector result; + Vector expected(4); + expected << -3.0, -6.0, -9.0, -12.0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +#endif // CERES_NO_CUDA + +} // namespace internal +} // namespace ceres