| // Ceres Solver - A fast non-linear least squares minimizer | 
 | // Copyright 2023 Google Inc. All rights reserved. | 
 | // http://ceres-solver.org/ | 
 | // | 
 | // Redistribution and use in source and binary forms, with or without | 
 | // modification, are permitted provided that the following conditions are met: | 
 | // | 
 | // * Redistributions of source code must retain the above copyright notice, | 
 | //   this list of conditions and the following disclaimer. | 
 | // * Redistributions in binary form must reproduce the above copyright notice, | 
 | //   this list of conditions and the following disclaimer in the documentation | 
 | //   and/or other materials provided with the distribution. | 
 | // * Neither the name of Google Inc. nor the names of its contributors may be | 
 | //   used to endorse or promote products derived from this software without | 
 | //   specific prior written permission. | 
 | // | 
 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 | // POSSIBILITY OF SUCH DAMAGE. | 
 | // | 
 | // Author: joydeepb@cs.utexas.edu (Joydeep Biswas) | 
 | // | 
 | // A CUDA sparse matrix linear operator. | 
 |  | 
 | // This include must come before any #ifndef check on Ceres compile options. | 
 | // clang-format off | 
 | #include "ceres/internal/config.h" | 
 | // clang-format on | 
 |  | 
 | #include "ceres/cuda_sparse_matrix.h" | 
 |  | 
 | #include <math.h> | 
 |  | 
 | #include <memory> | 
 |  | 
 | #include "absl/log/check.h" | 
 | #include "ceres/block_sparse_matrix.h" | 
 | #include "ceres/compressed_row_sparse_matrix.h" | 
 | #include "ceres/context_impl.h" | 
 | #include "ceres/crs_matrix.h" | 
 | #include "ceres/internal/export.h" | 
 | #include "ceres/types.h" | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 |  | 
 | #include "ceres/cuda_buffer.h" | 
 | #include "ceres/cuda_kernels_vector_ops.h" | 
 | #include "ceres/cuda_vector.h" | 
 | #include "cuda_runtime_api.h" | 
 | #include "cusparse.h" | 
 |  | 
 | namespace ceres::internal { | 
 | namespace { | 
 | // Starting in CUDA 11.2.1, CUSPARSE_MV_ALG_DEFAULT was deprecated in favor of | 
 | // CUSPARSE_SPMV_ALG_DEFAULT. | 
 | #if CUDART_VERSION >= 11021 | 
 | const auto kSpMVAlgorithm = CUSPARSE_SPMV_ALG_DEFAULT; | 
 | #else   // CUDART_VERSION >= 11021 | 
 | const auto kSpMVAlgorithm = CUSPARSE_MV_ALG_DEFAULT; | 
 | #endif  // CUDART_VERSION >= 11021 | 
 | size_t GetTempBufferSizeForOp(const cusparseHandle_t& handle, | 
 |                               const cusparseOperation_t op, | 
 |                               const cusparseDnVecDescr_t& x, | 
 |                               const cusparseDnVecDescr_t& y, | 
 |                               const cusparseSpMatDescr_t& A) { | 
 |   size_t buffer_size; | 
 |   const double alpha = 1.0; | 
 |   const double beta = 1.0; | 
 |   CHECK_NE(A, nullptr); | 
 |   CHECK_EQ(cusparseSpMV_bufferSize(handle, | 
 |                                    op, | 
 |                                    &alpha, | 
 |                                    A, | 
 |                                    x, | 
 |                                    &beta, | 
 |                                    y, | 
 |                                    CUDA_R_64F, | 
 |                                    kSpMVAlgorithm, | 
 |                                    &buffer_size), | 
 |            CUSPARSE_STATUS_SUCCESS); | 
 |   return buffer_size; | 
 | } | 
 |  | 
 | size_t GetTempBufferSize(const cusparseHandle_t& handle, | 
 |                          const cusparseDnVecDescr_t& left, | 
 |                          const cusparseDnVecDescr_t& right, | 
 |                          const cusparseSpMatDescr_t& A) { | 
 |   CHECK_NE(A, nullptr); | 
 |   return std::max(GetTempBufferSizeForOp( | 
 |                       handle, CUSPARSE_OPERATION_NON_TRANSPOSE, right, left, A), | 
 |                   GetTempBufferSizeForOp( | 
 |                       handle, CUSPARSE_OPERATION_TRANSPOSE, left, right, A)); | 
 | } | 
 | }  // namespace | 
 |  | 
 | CudaSparseMatrix::CudaSparseMatrix(int num_cols, | 
 |                                    CudaBuffer<int32_t>&& rows, | 
 |                                    CudaBuffer<int32_t>&& cols, | 
 |                                    ContextImpl* context) | 
 |     : num_rows_(rows.size() - 1), | 
 |       num_cols_(num_cols), | 
 |       num_nonzeros_(cols.size()), | 
 |       context_(context), | 
 |       rows_(std::move(rows)), | 
 |       cols_(std::move(cols)), | 
 |       values_(context, num_nonzeros_), | 
 |       spmv_buffer_(context) { | 
 |   Initialize(); | 
 | } | 
 |  | 
 | CudaSparseMatrix::CudaSparseMatrix(ContextImpl* context, | 
 |                                    const CompressedRowSparseMatrix& crs_matrix) | 
 |     : num_rows_(crs_matrix.num_rows()), | 
 |       num_cols_(crs_matrix.num_cols()), | 
 |       num_nonzeros_(crs_matrix.num_nonzeros()), | 
 |       context_(context), | 
 |       rows_(context, num_rows_ + 1), | 
 |       cols_(context, num_nonzeros_), | 
 |       values_(context, num_nonzeros_), | 
 |       spmv_buffer_(context) { | 
 |   rows_.CopyFromCpu(crs_matrix.rows(), num_rows_ + 1); | 
 |   cols_.CopyFromCpu(crs_matrix.cols(), num_nonzeros_); | 
 |   values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_); | 
 |   Initialize(); | 
 | } | 
 |  | 
 | CudaSparseMatrix::~CudaSparseMatrix() { | 
 |   CHECK_EQ(cusparseDestroySpMat(descr_), CUSPARSE_STATUS_SUCCESS); | 
 |   descr_ = nullptr; | 
 |   CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_left_)); | 
 |   CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_right_)); | 
 | } | 
 |  | 
 | void CudaSparseMatrix::CopyValuesFromCpu( | 
 |     const CompressedRowSparseMatrix& crs_matrix) { | 
 |   // There is no quick and easy way to verify that the structure is unchanged, | 
 |   // but at least we can check that the size of the matrix and the number of | 
 |   // nonzeros is unchanged. | 
 |   CHECK_EQ(num_rows_, crs_matrix.num_rows()); | 
 |   CHECK_EQ(num_cols_, crs_matrix.num_cols()); | 
 |   CHECK_EQ(num_nonzeros_, crs_matrix.num_nonzeros()); | 
 |   values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_); | 
 | } | 
 |  | 
 | void CudaSparseMatrix::Initialize() { | 
 |   CHECK(context_->IsCudaInitialized()); | 
 |   CHECK_EQ(CUSPARSE_STATUS_SUCCESS, | 
 |            cusparseCreateCsr(&descr_, | 
 |                              num_rows_, | 
 |                              num_cols_, | 
 |                              num_nonzeros_, | 
 |                              rows_.data(), | 
 |                              cols_.data(), | 
 |                              values_.data(), | 
 |                              CUSPARSE_INDEX_32I, | 
 |                              CUSPARSE_INDEX_32I, | 
 |                              CUSPARSE_INDEX_BASE_ZERO, | 
 |                              CUDA_R_64F)); | 
 |  | 
 |   // Note: values_.data() is used as non-zero pointer to device memory | 
 |   // When there is no non-zero values, data-pointer of values_ array will be a | 
 |   // nullptr; but in this case left/right products are trivial and temporary | 
 |   // buffer (and vector descriptors) is not required | 
 |   if (!num_nonzeros_) return; | 
 |  | 
 |   CHECK_EQ(CUSPARSE_STATUS_SUCCESS, | 
 |            cusparseCreateDnVec( | 
 |                &descr_vec_left_, num_rows_, values_.data(), CUDA_R_64F)); | 
 |   CHECK_EQ(CUSPARSE_STATUS_SUCCESS, | 
 |            cusparseCreateDnVec( | 
 |                &descr_vec_right_, num_cols_, values_.data(), CUDA_R_64F)); | 
 |   size_t buffer_size = GetTempBufferSize( | 
 |       context_->cusparse_handle_, descr_vec_left_, descr_vec_right_, descr_); | 
 |   spmv_buffer_.Reserve(buffer_size); | 
 | } | 
 |  | 
 | void CudaSparseMatrix::SpMv(cusparseOperation_t op, | 
 |                             const cusparseDnVecDescr_t& x, | 
 |                             const cusparseDnVecDescr_t& y) const { | 
 |   const double alpha = 1.0; | 
 |   const double beta = 1.0; | 
 |  | 
 |   CHECK_EQ(cusparseSpMV(context_->cusparse_handle_, | 
 |                         op, | 
 |                         &alpha, | 
 |                         descr_, | 
 |                         x, | 
 |                         &beta, | 
 |                         y, | 
 |                         CUDA_R_64F, | 
 |                         kSpMVAlgorithm, | 
 |                         spmv_buffer_.data()), | 
 |            CUSPARSE_STATUS_SUCCESS); | 
 | } | 
 |  | 
 | void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x, | 
 |                                                   CudaVector* y) const { | 
 |   DCHECK(GetTempBufferSize( | 
 |              context_->cusparse_handle_, y->descr(), x.descr(), descr_) <= | 
 |          spmv_buffer_.size()); | 
 |   SpMv(CUSPARSE_OPERATION_NON_TRANSPOSE, x.descr(), y->descr()); | 
 | } | 
 |  | 
 | void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x, | 
 |                                                  CudaVector* y) const { | 
 |   // TODO(Joydeep Biswas): We should consider storing a transposed copy of the | 
 |   // matrix by converting CSR to CSC. From the cuSPARSE documentation: | 
 |   // "In general, opA == CUSPARSE_OPERATION_NON_TRANSPOSE is 3x faster than opA | 
 |   // != CUSPARSE_OPERATION_NON_TRANSPOSE" | 
 |   DCHECK(GetTempBufferSize( | 
 |              context_->cusparse_handle_, x.descr(), y->descr(), descr_) <= | 
 |          spmv_buffer_.size()); | 
 |   SpMv(CUSPARSE_OPERATION_TRANSPOSE, x.descr(), y->descr()); | 
 | } | 
 |  | 
 | }  // namespace ceres::internal | 
 |  | 
 | #endif  // CERES_NO_CUDA |