|  | // Ceres Solver - A fast non-linear least squares minimizer | 
|  | // Copyright 2022 Google Inc. All rights reserved. | 
|  | // http://ceres-solver.org/ | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | // * Redistributions of source code must retain the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer. | 
|  | // * Redistributions in binary form must reproduce the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer in the documentation | 
|  | //   and/or other materials provided with the distribution. | 
|  | // * Neither the name of Google Inc. nor the names of its contributors may be | 
|  | //   used to endorse or promote products derived from this software without | 
|  | //   specific prior written permission. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  | // | 
|  | // Author: joydeepb@cs.utexas.edu (Joydeep Biswas) | 
|  |  | 
|  | #ifndef CERES_INTERNAL_CUDA_BUFFER_H_ | 
|  | #define CERES_INTERNAL_CUDA_BUFFER_H_ | 
|  |  | 
|  | #include "ceres/internal/config.h" | 
|  |  | 
|  | #ifndef CERES_NO_CUDA | 
|  |  | 
|  | #include <vector> | 
|  |  | 
|  | #include "cuda_runtime.h" | 
|  | #include "glog/logging.h" | 
|  |  | 
|  | // An encapsulated buffer to maintain GPU memory, and handle transfers between | 
|  | // GPU and system memory. It is the responsibility of the user to ensure that | 
|  | // the appropriate GPU device is selected before each subroutine is called. This | 
|  | // is particularly important when using multiple GPU devices on different CPU | 
|  | // threads, since active Cuda devices are determined by the cuda runtime on a | 
|  | // per-thread basis. Note that unless otherwise specified, all methods use the | 
|  | // default stream, and are synchronous. | 
|  | template <typename T> | 
|  | class CudaBuffer { | 
|  | public: | 
|  | CudaBuffer() = default; | 
|  | CudaBuffer(const CudaBuffer&) = delete; | 
|  | CudaBuffer& operator=(const CudaBuffer&) = delete; | 
|  |  | 
|  | ~CudaBuffer() { | 
|  | if (data_ != nullptr) { | 
|  | CHECK_EQ(cudaFree(data_), cudaSuccess); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Grow the GPU memory buffer if needed to accommodate data of the specified | 
|  | // size | 
|  | void Reserve(const size_t size) { | 
|  | if (size > size_) { | 
|  | if (data_ != nullptr) { | 
|  | CHECK_EQ(cudaFree(data_), cudaSuccess); | 
|  | } | 
|  | CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess); | 
|  | size_ = size; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Perform an asynchronous copy from CPU memory to GPU memory using the stream | 
|  | // provided. | 
|  | void CopyToGpuAsync(const T* data, const size_t size, cudaStream_t stream) { | 
|  | Reserve(size); | 
|  | CHECK_EQ(cudaMemcpyAsync(data_, | 
|  | data, | 
|  | size * sizeof(T), | 
|  | cudaMemcpyHostToDevice, | 
|  | stream), | 
|  | cudaSuccess); | 
|  | } | 
|  |  | 
|  | // Copy data from the GPU to CPU memory. This is necessarily synchronous since | 
|  | // any potential GPU kernels that may be writing to the buffer must finish | 
|  | // before the transfer happens. | 
|  | void CopyToHost(T* data, const size_t size) { | 
|  | CHECK(data_ != nullptr); | 
|  | CHECK_EQ(cudaMemcpy(data, data_, size * sizeof(T), cudaMemcpyDeviceToHost), | 
|  | cudaSuccess); | 
|  | } | 
|  |  | 
|  | void CopyToGpu(const std::vector<T>& data) { | 
|  | CopyToGpu(data.data(), data.size()); | 
|  | } | 
|  |  | 
|  | T* data() { return data_; } | 
|  | size_t size() const { return size_; } | 
|  |  | 
|  | private: | 
|  | T* data_ = nullptr; | 
|  | size_t size_ = 0; | 
|  | }; | 
|  |  | 
|  | #endif  // CERES_NO_CUDA | 
|  |  | 
|  | #endif  // CERES_INTERNAL_CUDA_BUFFER_H_ |