| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2022 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: joydeepb@cs.utexas.edu (Joydeep Biswas) |
| // |
| // A simple CUDA vector class. |
| |
| // This include must come before any #ifndef check on Ceres compile options. |
| // clang-format off |
| #include "ceres/internal/config.h" |
| // clang-format on |
| |
| #include <math.h> |
| |
| #include "ceres/context_impl.h" |
| #include "ceres/internal/export.h" |
| #include "ceres/types.h" |
| |
| #ifndef CERES_NO_CUDA |
| |
| #include "ceres/cuda_buffer.h" |
| #include "ceres/cuda_kernels.h" |
| #include "ceres/cuda_vector.h" |
| #include "cublas_v2.h" |
| |
| namespace ceres::internal { |
| |
| CudaVector::CudaVector(ContextImpl* context, int size) { |
| DCHECK_NE(context, nullptr); |
| CHECK(context->IsCudaInitialized()); |
| context_ = context; |
| Resize(size); |
| } |
| |
| CudaVector& CudaVector::operator=(const CudaVector& other) { |
| if (this != &other) { |
| Resize(other.num_rows()); |
| data_.CopyFromGPUArray(other.data_.data(), num_rows_, context_->stream_); |
| } |
| return *this; |
| } |
| |
| void CudaVector::DestroyDescriptor() { |
| if (descr_ != nullptr) { |
| CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS); |
| descr_ = nullptr; |
| } |
| } |
| |
| CudaVector::~CudaVector() { DestroyDescriptor(); } |
| |
| void CudaVector::Resize(int size) { |
| data_.Reserve(size); |
| num_rows_ = size; |
| DestroyDescriptor(); |
| CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F), |
| CUSPARSE_STATUS_SUCCESS); |
| } |
| |
| double CudaVector::Dot(const CudaVector& x) const { |
| double result = 0; |
| CHECK_EQ(cublasDdot(context_->cublas_handle_, |
| num_rows_, |
| data_.data(), |
| 1, |
| x.data().data(), |
| 1, |
| &result), |
| CUBLAS_STATUS_SUCCESS) |
| << "CuBLAS cublasDdot failed."; |
| return result; |
| } |
| |
| double CudaVector::Norm() const { |
| double result = 0; |
| CHECK_EQ(cublasDnrm2( |
| context_->cublas_handle_, num_rows_, data_.data(), 1, &result), |
| CUBLAS_STATUS_SUCCESS) |
| << "CuBLAS cublasDnrm2 failed."; |
| return result; |
| } |
| |
| void CudaVector::CopyFromCpu(const Vector& x) { |
| data_.Reserve(x.rows()); |
| data_.CopyFromCpu(x.data(), x.rows(), context_->stream_); |
| num_rows_ = x.rows(); |
| DestroyDescriptor(); |
| CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F), |
| CUSPARSE_STATUS_SUCCESS); |
| } |
| |
| void CudaVector::CopyTo(Vector* x) const { |
| CHECK(x != nullptr); |
| x->resize(num_rows_); |
| // Need to synchronize with any GPU kernels that may be writing to the |
| // buffer before the transfer happens. |
| CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess); |
| data_.CopyToCpu(x->data(), num_rows_); |
| } |
| |
| void CudaVector::CopyTo(double* x) const { |
| CHECK(x != nullptr); |
| // Need to synchronize with any GPU kernels that may be writing to the |
| // buffer before the transfer happens. |
| CHECK_EQ(cudaStreamSynchronize(context_->stream_), cudaSuccess); |
| data_.CopyToCpu(x, num_rows_); |
| } |
| |
| void CudaVector::SetZero() { |
| CHECK(data_.data() != nullptr); |
| CudaSetZeroFP64(data_.data(), num_rows_, context_->stream_); |
| } |
| |
| void CudaVector::Axpby(double a, const CudaVector& x, double b) { |
| if (&x == this) { |
| Scale(a + b); |
| return; |
| } |
| CHECK_EQ(num_rows_, x.num_rows_); |
| if (b != 1.0) { |
| // First scale y by b. |
| CHECK_EQ( |
| cublasDscal(context_->cublas_handle_, num_rows_, &b, data_.data(), 1), |
| CUBLAS_STATUS_SUCCESS) |
| << "CuBLAS cublasDscal failed."; |
| } |
| // Then add a * x to y. |
| CHECK_EQ(cublasDaxpy(context_->cublas_handle_, |
| num_rows_, |
| &a, |
| x.data().data(), |
| 1, |
| data_.data(), |
| 1), |
| CUBLAS_STATUS_SUCCESS) |
| << "CuBLAS cublasDaxpy failed."; |
| } |
| |
| void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) { |
| CudaDtDxpy(data_.data(), |
| D.data().data(), |
| x.data().data(), |
| num_rows_, |
| context_->stream_); |
| } |
| |
| void CudaVector::Scale(double s) { |
| CHECK_EQ( |
| cublasDscal(context_->cublas_handle_, num_rows_, &s, data_.data(), 1), |
| CUBLAS_STATUS_SUCCESS) |
| << "CuBLAS cublasDscal failed."; |
| } |
| |
| } // namespace ceres::internal |
| |
| #endif // CERES_NO_CUDA |