CUDA CGNR, Part 2: CudaVector
* Added CudaVector to manage and operate on vectors with CUDA.
* Added tests for CudaVector
Change-Id: I528258c8e883011e8709bddf59edb1e933af8060
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index afe91fb..02916b1 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -198,6 +198,7 @@
cost_function.cc
covariance.cc
covariance_impl.cc
+ cuda_vector.cc
dense_cholesky.cc
dense_normal_cholesky_solver.cc
dense_qr.cc
@@ -483,6 +484,7 @@
ceres_test(cuda_dense_cholesky)
ceres_test(cuda_dense_qr)
ceres_test(cuda_kernels)
+ ceres_test(cuda_vector)
ceres_test(dense_linear_solver)
ceres_test(dense_cholesky)
ceres_test(dense_qr)
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc
index b8f9ff5..9f6cc25 100644
--- a/internal/ceres/context_impl.cc
+++ b/internal/ceres/context_impl.cc
@@ -63,11 +63,11 @@
cudaStreamDestroy(stream_);
stream_ = nullptr;
}
- cuda_initialized_ = false;
+ is_cuda_initialized_ = false;
}
bool ContextImpl::InitCUDA(std::string* message) {
- if (cuda_initialized_) {
+ if (is_cuda_initialized_) {
return true;
}
EventLogger event_logger("InitCuda");
@@ -105,7 +105,7 @@
return false;
}
event_logger.AddEvent("SetStream");
- cuda_initialized_ = true;
+ is_cuda_initialized_ = true;
return true;
}
#endif // CERES_NO_CUDA
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h
index d4bd436..4bd18e1 100644
--- a/internal/ceres/context_impl.h
+++ b/internal/ceres/context_impl.h
@@ -78,13 +78,13 @@
// returned.
bool InitCUDA(std::string* message);
void TearDown();
+ inline bool IsCUDAInitialized() const { return is_cuda_initialized_; }
cusolverDnHandle_t cusolver_handle_ = nullptr;
cublasHandle_t cublas_handle_ = nullptr;
cudaStream_t stream_ = nullptr;
cusparseHandle_t cusparse_handle_ = nullptr;
- // Indicates whether all the CUDA resources have been initialized.
- bool cuda_initialized_ = false;
+ bool is_cuda_initialized_ = false;
#endif // CERES_NO_CUDA
};
diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc
new file mode 100644
index 0000000..5a128de
--- /dev/null
+++ b/internal/ceres/cuda_vector.cc
@@ -0,0 +1,190 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
+//
+// A simple CUDA vector class.
+
+// This include must come before any #ifndef check on Ceres compile options.
+// clang-format off
+#include "ceres/internal/config.h"
+// clang-format on
+
+#include <math.h>
+
+#include "ceres/internal/export.h"
+#include "ceres/types.h"
+#include "ceres/context_impl.h"
+
+#ifndef CERES_NO_CUDA
+
+#include "ceres/cuda_buffer.h"
+#include "ceres/cuda_vector.h"
+#include "ceres/ceres_cuda_kernels.h"
+#include "cublas_v2.h"
+
+namespace ceres::internal {
+
+CudaVector::CudaVector(ContextImpl* context, int size) {
+ DCHECK_NE(context, nullptr);
+ CHECK(context->IsCUDAInitialized());
+ context_ = context;
+ Resize(size);
+}
+
+CudaVector& CudaVector::operator=(const CudaVector& other) {
+ if (this != &other) {
+ Resize(other.num_rows());
+ data_.CopyFromGPUArray(other.data_.data(), num_rows_, context_->stream_);
+ }
+ return *this;
+}
+
+void CudaVector::DestroyDescriptor() {
+ if (descr_ != nullptr) {
+ CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS);
+ descr_ = nullptr;
+ }
+}
+
+CudaVector::~CudaVector() {
+ DestroyDescriptor();
+}
+
+void CudaVector::Resize(int size) {
+ data_.Reserve(size);
+ num_rows_ = size;
+ DestroyDescriptor();
+ CHECK_EQ(cusparseCreateDnVec(&descr_,
+ num_rows_,
+ data_.data(),
+ CUDA_R_64F), CUSPARSE_STATUS_SUCCESS);
+}
+
+double CudaVector::Dot(const CudaVector& x) const {
+ double result = 0;
+ CHECK_EQ(cublasDdot(context_->cublas_handle_,
+ num_rows_,
+ data_.data(),
+ 1,
+ x.data().data(),
+ 1, &result),
+ CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDdot failed.";
+ return result;
+}
+
+double CudaVector::Norm() const {
+ double result = 0;
+ CHECK_EQ(cublasDnrm2(context_->cublas_handle_,
+ num_rows_,
+ data_.data(),
+ 1,
+ &result),
+ CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDnrm2 failed.";
+ return result;
+}
+
+void CudaVector::CopyFromCpu(const Vector& x) {
+ data_.Reserve(x.rows());
+ data_.CopyFromCpu(x.data(), x.rows(), context_->stream_);
+ num_rows_ = x.rows();
+ DestroyDescriptor();
+ CHECK_EQ(cusparseCreateDnVec(&descr_,
+ num_rows_,
+ data_.data(),
+ CUDA_R_64F), CUSPARSE_STATUS_SUCCESS);
+}
+
+void CudaVector::CopyTo(Vector* x) const {
+ CHECK(x != nullptr);
+ x->resize(num_rows_);
+ // Need to synchronize with any GPU kernels that may be writing to the
+ // buffer before the transfer happens.
+ CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess);
+ data_.CopyToCpu(x->data(), num_rows_);
+}
+
+void CudaVector::CopyTo(double* x) const {
+ CHECK(x != nullptr);
+ // Need to synchronize with any GPU kernels that may be writing to the
+ // buffer before the transfer happens.
+ CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess);
+ data_.CopyToCpu(x, num_rows_);
+}
+
+void CudaVector::SetZero() {
+ CHECK(data_.data() != nullptr);
+ CudaSetZeroFP64(data_.data(), num_rows_, context_->stream_);
+}
+
+void CudaVector::Axpby(double a, const CudaVector& x, double b) {
+ if (&x == this) {
+ Scale(a + b);
+ return;
+ }
+ CHECK_EQ(num_rows_, x.num_rows_);
+ if (b != 1.0) {
+ // First scale y by b.
+ CHECK_EQ(cublasDscal(context_->cublas_handle_,
+ num_rows_,
+ &b,
+ data_.data(),
+ 1),
+ CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed.";
+ }
+ // Then add a * x to y.
+ CHECK_EQ(cublasDaxpy(context_->cublas_handle_,
+ num_rows_,
+ &a,
+ x.data().data(),
+ 1,
+ data_.data(),
+ 1),
+ CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDaxpy failed.";
+}
+
+void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) {
+ CudaDtDxpy(data_.data(),
+ D.data().data(),
+ x.data().data(),
+ num_rows_,
+ context_->stream_);
+}
+
+void CudaVector::Scale(double s) {
+ CHECK_EQ(cublasDscal(context_->cublas_handle_,
+ num_rows_,
+ &s,
+ data_.data(),
+ 1),
+ CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed.";
+}
+
+} // namespace ceres::internal
+
+#endif // CERES_NO_CUDA
\ No newline at end of file
diff --git a/internal/ceres/cuda_vector.h b/internal/ceres/cuda_vector.h
new file mode 100644
index 0000000..4018c1b
--- /dev/null
+++ b/internal/ceres/cuda_vector.h
@@ -0,0 +1,156 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
+//
+// A simple CUDA vector class.
+
+#ifndef CERES_INTERNAL_CUDA_VECTOR_H_
+#define CERES_INTERNAL_CUDA_VECTOR_H_
+
+// This include must come before any #ifndef check on Ceres compile options.
+// clang-format off
+#include "ceres/internal/config.h"
+// clang-format on
+
+#include <math.h>
+#include <memory>
+#include <string>
+
+#include "ceres/internal/export.h"
+#include "ceres/types.h"
+#include "ceres/context_impl.h"
+
+#ifndef CERES_NO_CUDA
+
+#include "ceres/cuda_buffer.h"
+#include "ceres/ceres_cuda_kernels.h"
+#include "ceres/internal/eigen.h"
+#include "cublas_v2.h"
+#include "cusparse.h"
+
+namespace ceres::internal {
+
+// An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations.
+class CERES_NO_EXPORT CudaVector {
+ public:
+
+ // Create a pre-allocated vector of size N and return a pointer to it. The
+ // caller must ensure that InitCuda() has already been successfully called on
+ // context before calling this method.
+ CudaVector(ContextImpl* context, int size);
+
+ ~CudaVector();
+
+ void Resize(int size);
+
+ // Perform a deep copy of the vector.
+ CudaVector& operator=(const CudaVector&);
+
+ // Return the inner product x' * y.
+ double Dot(const CudaVector& x) const;
+
+ // Return the L2 norm of the vector (||y||_2).
+ double Norm() const;
+
+ // Set all elements to zero.
+ void SetZero();
+
+ // Copy from Eigen vector.
+ void CopyFromCpu(const Vector& x);
+
+ // Copy to Eigen vector.
+ void CopyTo(Vector* x) const;
+
+ // Copy to CPU memory array. It is the caller's responsibility to ensure
+ // that the array is large enough.
+ void CopyTo(double* x) const;
+
+ // y = a * x + b * y.
+ void Axpby(double a, const CudaVector& x, double b);
+
+ // y = diag(d)' * diag(d) * x + y.
+ void DtDxpy(const CudaVector& D, const CudaVector& x);
+
+ // y = s * y.
+ void Scale(double s);
+
+ int num_rows() const { return num_rows_; }
+ int num_cols() const { return 1; }
+
+ const CudaBuffer<double>& data() const { return data_; }
+
+ const cusparseDnVecDescr_t& descr() const { return descr_; }
+
+ private:
+ CudaVector(const CudaVector&) = delete;
+ void DestroyDescriptor();
+
+ int num_rows_ = 0;
+ ContextImpl* context_ = nullptr;
+ CudaBuffer<double> data_;
+ // CuSparse object that describes this dense vector.
+ cusparseDnVecDescr_t descr_ = nullptr;
+};
+
+// Blas1 operations on Cuda vectors. These functions are needed as an
+// abstraction layer so that we can use different versions of a vector style
+// object in the conjugate gradients linear solver.
+inline double Norm(const CudaVector& x) { return x.Norm(); }
+inline void SetZero(CudaVector& x) { x.SetZero(); }
+inline void Axpby(
+ double a,
+ const CudaVector& x,
+ double b,
+ const CudaVector& y,
+ CudaVector& z) {
+ if (&x == &y && &y == &z) {
+ // z = (a + b) * z;
+ z.Scale(a + b);
+ } else if (&x == &z) {
+ // x is aliased to z.
+ // z = x
+ // = b * y + a * x;
+ z.Axpby(b, y, a);
+ } else if (&y == &z) {
+ // y is aliased to z.
+ // z = y = a * x + b * y;
+ z.Axpby(a, x, b);
+ } else {
+ // General case: all inputs and outputs are distinct.
+ z = y;
+ z.Axpby(a, x, b);
+ }
+}
+inline double Dot(const CudaVector& x, const CudaVector& y) { return x.Dot(y); }
+inline void Copy(const CudaVector& from, CudaVector& to) { to = from; }
+
+} // namespace ceres::internal
+
+#endif // CERES_NO_CUDA
+#endif // CERES_INTERNAL_CUDA_SPARSE_LINEAR_OPERATOR_H_
diff --git a/internal/ceres/cuda_vector_test.cc b/internal/ceres/cuda_vector_test.cc
new file mode 100644
index 0000000..db930ed
--- /dev/null
+++ b/internal/ceres/cuda_vector_test.cc
@@ -0,0 +1,422 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
+
+#include <string>
+
+#include "ceres/internal/config.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/cuda_vector.h"
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+namespace ceres {
+namespace internal {
+
+#ifndef CERES_NO_CUDA
+
+TEST(CudaVector, Creation) {
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x(&context, 1000);
+ EXPECT_EQ(x.num_rows(), 1000);
+ EXPECT_NE(x.data().data(), nullptr);
+}
+
+TEST(CudaVector, CopyVector) {
+ Vector x(3);
+ x << 1, 2, 3;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector y(&context, 10);
+ y.CopyFromCpu(x);
+ EXPECT_EQ(y.num_rows(), 3);
+
+ Vector z(3);
+ z << 0, 0, 0;
+ y.CopyTo(&z);
+ EXPECT_EQ(x, z);
+}
+
+TEST(CudaVector, DeepCopy) {
+ Vector x(3);
+ x << 1, 2, 3;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 3);
+ x_gpu.CopyFromCpu(x);
+
+ CudaVector y_gpu(&context, 3);
+ y_gpu.SetZero();
+ EXPECT_EQ(y_gpu.Norm(), 0.0);
+
+ y_gpu = x_gpu;
+ Vector y(3);
+ y << 0, 0, 0;
+ y_gpu.CopyTo(&y);
+ EXPECT_EQ(x, y);
+}
+
+TEST(CudaVector, Dot) {
+ Vector x(3);
+ Vector y(3);
+ x << 1, 2, 3;
+ y << 100, 10, 1;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ CudaVector y_gpu(&context, 10);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ EXPECT_EQ(x_gpu.Dot(y_gpu), 123.0);
+ EXPECT_EQ(Dot(x_gpu, y_gpu), 123.0);
+}
+
+TEST(CudaVector, Norm) {
+ Vector x(3);
+ x << 1, 2, 3;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ x_gpu.CopyFromCpu(x);
+
+ EXPECT_NEAR(x_gpu.Norm(),
+ sqrt(1.0 + 4.0 + 9.0),
+ std::numeric_limits<double>::epsilon());
+
+ EXPECT_NEAR(Norm(x_gpu),
+ sqrt(1.0 + 4.0 + 9.0),
+ std::numeric_limits<double>::epsilon());
+}
+
+TEST(CudaVector, SetZero) {
+ Vector x(4);
+ x << 1, 1, 1, 1;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ x_gpu.CopyFromCpu(x);
+
+ EXPECT_NEAR(x_gpu.Norm(),
+ 2.0,
+ std::numeric_limits<double>::epsilon());
+
+ x_gpu.SetZero();
+ EXPECT_NEAR(x_gpu.Norm(),
+ 0.0,
+ std::numeric_limits<double>::epsilon());
+
+ x_gpu.CopyFromCpu(x);
+ EXPECT_NEAR(x_gpu.Norm(),
+ 2.0,
+ std::numeric_limits<double>::epsilon());
+ SetZero(x_gpu);
+ EXPECT_NEAR(x_gpu.Norm(),
+ 0.0,
+ std::numeric_limits<double>::epsilon());
+}
+
+TEST(CudaVector, Resize) {
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ EXPECT_EQ(x_gpu.num_rows(), 10);
+ x_gpu.Resize(4);
+ EXPECT_EQ(x_gpu.num_rows(), 4);
+}
+
+TEST(CudaVector, Axpy) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ x_gpu.Axpby(2.0, y_gpu, 1.0);
+ Vector result;
+ Vector expected(4);
+ expected << 201, 21, 3, 1;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyBEquals1) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ x_gpu.Axpby(2.0, y_gpu, 1.0);
+ Vector result;
+ Vector expected(4);
+ expected << 201, 21, 3, 1;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyMemberFunctionBNotEqual1) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ x_gpu.Axpby(2.0, y_gpu, 3.0);
+ Vector result;
+ Vector expected(4);
+ expected << 203, 23, 5, 3;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyMemberFunctionBEqual1) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ x_gpu.Axpby(2.0, y_gpu, 1.0);
+ Vector result;
+ Vector expected(4);
+ expected << 201, 21, 3, 1;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyMemberXAliasesY) {
+ Vector x(4);
+ x << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.SetZero();
+
+ x_gpu.Axpby(2.0, x_gpu, 1.0);
+ Vector result;
+ Vector expected(4);
+ expected << 300, 30, 3, 0;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyNonMemberMethodNoAliases) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ CudaVector z_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+ z_gpu.Resize(4);
+ z_gpu.SetZero();
+
+ Axpby(2.0, x_gpu, 3.0, y_gpu, z_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 302, 32, 5, 2;
+ z_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyNonMemberMethodXAliasesY) {
+ Vector x(4);
+ x << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector z_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ z_gpu.SetZero();
+
+ Axpby(2.0, x_gpu, 3.0, x_gpu, z_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 500, 50, 5, 0;
+ z_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyNonMemberMethodXAliasesZ) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ CudaVector y_gpu(&context, 10);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ Axpby(2.0, x_gpu, 3.0, y_gpu, x_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 302, 32, 5, 2;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyNonMemberMethodYAliasesZ) {
+ Vector x(4);
+ Vector y(4);
+ x << 1, 1, 1, 1;
+ y << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+
+ Axpby(2.0, x_gpu, 3.0, y_gpu, y_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 302, 32, 5, 2;
+ y_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, AxpbyNonMemberMethodXAliasesYAliasesZ) {
+ Vector x(4);
+ x << 100, 10, 1, 0;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message))
+ << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 10);
+ x_gpu.CopyFromCpu(x);
+
+ Axpby(2.0, x_gpu, 3.0, x_gpu, x_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 500, 50, 5, 0;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, DtDxpy) {
+ Vector x(4);
+ Vector y(4);
+ Vector D(4);
+ x << 1, 2, 3, 4;
+ y << 100, 10, 1, 0;
+ D << 4, 3, 2, 1;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ CudaVector y_gpu(&context, 4);
+ CudaVector D_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+ y_gpu.CopyFromCpu(y);
+ D_gpu.CopyFromCpu(D);
+
+ y_gpu.DtDxpy(D_gpu, x_gpu);
+ Vector result;
+ Vector expected(4);
+ expected << 116, 28, 13, 4;
+ y_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+TEST(CudaVector, Scale) {
+ Vector x(4);
+ x << 1, 2, 3, 4;
+ ContextImpl context;
+ std::string message;
+ CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
+ CudaVector x_gpu(&context, 4);
+ x_gpu.CopyFromCpu(x);
+
+ x_gpu.Scale(-3.0);
+
+ Vector result;
+ Vector expected(4);
+ expected << -3.0, -6.0, -9.0, -12.0;
+ x_gpu.CopyTo(&result);
+ EXPECT_EQ(result, expected);
+}
+
+#endif // CERES_NO_CUDA
+
+} // namespace internal
+} // namespace ceres