blob: 56c97855732cb6246cec77c0aecfd84b914dd556 [file] [log] [blame]
Mike Vitusf408f892018-02-22 10:28:39 -08001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2018 Google Inc. All rights reserved.
3// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
31#include "ceres/context_impl.h"
32
Joydeep Biswas8e084212022-02-14 20:56:30 -060033#include <string>
34
Sameer Agarwal47051592022-03-12 15:22:19 -080035#include "ceres/internal/config.h"
Joydeep Biswas34492962022-08-17 19:19:11 -050036#include "ceres/stringprintf.h"
Joydeep Biswas67bae282022-08-07 17:14:32 -050037#include "ceres/wall_time.h"
Sameer Agarwal47051592022-03-12 15:22:19 -080038
Joydeep Biswas8e084212022-02-14 20:56:30 -060039#ifndef CERES_NO_CUDA
Joydeep Biswas8e084212022-02-14 20:56:30 -060040#include "cublas_v2.h"
Sameer Agarwalee35ef62022-03-12 16:25:45 -080041#include "cuda_runtime.h"
Joydeep Biswas8e084212022-02-14 20:56:30 -060042#include "cusolverDn.h"
43#endif // CERES_NO_CUDA
44
Sameer Agarwalcaf614a2022-04-21 17:41:10 -070045namespace ceres::internal {
Mike Vitusf408f892018-02-22 10:28:39 -080046
Sergiu Deitschc14f3602022-02-09 20:10:26 +010047ContextImpl::ContextImpl() = default;
48
Joydeep Biswas8e084212022-02-14 20:56:30 -060049#ifndef CERES_NO_CUDA
Joydeep Biswas67bae282022-08-07 17:14:32 -050050void ContextImpl::TearDown() {
51 if (cusolver_handle_ != nullptr) {
52 cusolverDnDestroy(cusolver_handle_);
53 cusolver_handle_ = nullptr;
54 }
55 if (cublas_handle_ != nullptr) {
56 cublasDestroy(cublas_handle_);
57 cublas_handle_ = nullptr;
58 }
Sameer Agarwal16668ee2022-09-28 17:35:40 -070059 if (cusparse_handle_ != nullptr) {
Joydeep Biswas67bae282022-08-07 17:14:32 -050060 cusparseDestroy(cusparse_handle_);
61 cusparse_handle_ = nullptr;
62 }
63 if (stream_ != nullptr) {
64 cudaStreamDestroy(stream_);
65 stream_ = nullptr;
66 }
Joydeep Biswasc914c7a2022-08-13 16:20:05 -050067 is_cuda_initialized_ = false;
Joydeep Biswas67bae282022-08-07 17:14:32 -050068}
69
Joydeep Biswas34492962022-08-17 19:19:11 -050070std::string ContextImpl::CudaConfigAsString() const {
71 return ceres::internal::StringPrintf(
72 "======================= CUDA Device Properties ======================\n"
73 "Cuda version : %d.%d\n"
74 "Device ID : %d\n"
75 "Device name : %s\n"
76 "Total GPU memory : %6.f MiB\n"
77 "GPU memory available : %6.f MiB\n"
78 "Compute capability : %d.%d\n"
79 "Warp size : %d\n"
80 "Max threads per block: %d\n"
81 "Max threads per dim : %d %d %d\n"
82 "Max grid size : %d %d %d\n"
83 "Multiprocessor count : %d\n"
84 "====================================================================",
85 cuda_version_major_,
86 cuda_version_minor_,
87 gpu_device_id_in_use_,
88 gpu_device_properties_.name,
89 gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
90 GpuMemoryAvailable() / 1024.0 / 1024.0,
91 gpu_device_properties_.major,
92 gpu_device_properties_.minor,
93 gpu_device_properties_.warpSize,
94 gpu_device_properties_.maxThreadsPerBlock,
95 gpu_device_properties_.maxThreadsDim[0],
96 gpu_device_properties_.maxThreadsDim[1],
97 gpu_device_properties_.maxThreadsDim[2],
98 gpu_device_properties_.maxGridSize[0],
99 gpu_device_properties_.maxGridSize[1],
100 gpu_device_properties_.maxGridSize[2],
101 gpu_device_properties_.multiProcessorCount);
102}
103
104size_t ContextImpl::GpuMemoryAvailable() const {
105 size_t free, total;
106 cudaMemGetInfo(&free, &total);
107 return free;
108}
109
110bool ContextImpl::InitCuda(std::string* message) {
Joydeep Biswasc914c7a2022-08-13 16:20:05 -0500111 if (is_cuda_initialized_) {
Joydeep Biswas8e084212022-02-14 20:56:30 -0600112 return true;
113 }
Joydeep Biswas34492962022-08-17 19:19:11 -0500114 CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
115 int cuda_version;
116 CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
117 cuda_version_major_ = cuda_version / 1000;
118 cuda_version_minor_ = (cuda_version % 1000) / 10;
Sameer Agarwal5f194682022-09-01 17:00:34 -0700119 CHECK_EQ(
120 cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_),
121 cudaSuccess);
Joydeep Biswas34492962022-08-17 19:19:11 -0500122 VLOG(3) << "\n" << CudaConfigAsString();
Joydeep Biswas67bae282022-08-07 17:14:32 -0500123 EventLogger event_logger("InitCuda");
Joydeep Biswas8e084212022-02-14 20:56:30 -0600124 if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
Sameer Agarwal5f194682022-09-01 17:00:34 -0700125 *message =
126 "CUDA initialization failed because cuBLAS::cublasCreate failed.";
Joydeep Biswas8e084212022-02-14 20:56:30 -0600127 cublas_handle_ = nullptr;
128 return false;
129 }
Joydeep Biswas67bae282022-08-07 17:14:32 -0500130 event_logger.AddEvent("cublasCreate");
Joydeep Biswas8e084212022-02-14 20:56:30 -0600131 if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
Sameer Agarwal5f194682022-09-01 17:00:34 -0700132 *message =
133 "CUDA initialization failed because cuSolverDN::cusolverDnCreate "
134 "failed.";
Joydeep Biswas67bae282022-08-07 17:14:32 -0500135 TearDown();
Joydeep Biswas8e084212022-02-14 20:56:30 -0600136 return false;
137 }
Joydeep Biswas67bae282022-08-07 17:14:32 -0500138 event_logger.AddEvent("cusolverDnCreate");
139 if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
Sameer Agarwal5f194682022-09-01 17:00:34 -0700140 *message =
141 "CUDA initialization failed because cuSPARSE::cusparseCreate failed.";
Joydeep Biswas67bae282022-08-07 17:14:32 -0500142 TearDown();
143 return false;
144 }
145 event_logger.AddEvent("cusparseCreate");
Joydeep Biswas8e084212022-02-14 20:56:30 -0600146 if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
147 cudaSuccess) {
Sameer Agarwal5f194682022-09-01 17:00:34 -0700148 *message =
149 "CUDA initialization failed because CUDA::cudaStreamCreateWithFlags "
150 "failed.";
Joydeep Biswas67bae282022-08-07 17:14:32 -0500151 TearDown();
Joydeep Biswas8e084212022-02-14 20:56:30 -0600152 return false;
153 }
Joydeep Biswas67bae282022-08-07 17:14:32 -0500154 event_logger.AddEvent("cudaStreamCreateWithFlags");
Joydeep Biswas8e084212022-02-14 20:56:30 -0600155 if (cusolverDnSetStream(cusolver_handle_, stream_) !=
Sameer Agarwalee35ef62022-03-12 16:25:45 -0800156 CUSOLVER_STATUS_SUCCESS ||
Joydeep Biswas67bae282022-08-07 17:14:32 -0500157 cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS ||
158 cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) {
Joydeep Biswas34492962022-08-17 19:19:11 -0500159 *message = "CUDA initialization failed because SetStream failed.";
Joydeep Biswas67bae282022-08-07 17:14:32 -0500160 TearDown();
Joydeep Biswas8e084212022-02-14 20:56:30 -0600161 return false;
162 }
Joydeep Biswas67bae282022-08-07 17:14:32 -0500163 event_logger.AddEvent("SetStream");
Joydeep Biswasc914c7a2022-08-13 16:20:05 -0500164 is_cuda_initialized_ = true;
Joydeep Biswas8e084212022-02-14 20:56:30 -0600165 return true;
166}
167#endif // CERES_NO_CUDA
168
169ContextImpl::~ContextImpl() {
170#ifndef CERES_NO_CUDA
Joydeep Biswas67bae282022-08-07 17:14:32 -0500171 TearDown();
Joydeep Biswas8e084212022-02-14 20:56:30 -0600172#endif // CERES_NO_CUDA
173}
Mike Vitusf408f892018-02-22 10:28:39 -0800174void ContextImpl::EnsureMinimumThreads(int num_threads) {
Alex Stewart7ef83e02020-05-30 11:30:01 +0100175#ifdef CERES_USE_CXX_THREADS
Mike Vitusf408f892018-02-22 10:28:39 -0800176 thread_pool.Resize(num_threads);
Alex Stewart7ef83e02020-05-30 11:30:01 +0100177#endif // CERES_USE_CXX_THREADS
Mike Vitusf408f892018-02-22 10:28:39 -0800178}
Sameer Agarwalcaf614a2022-04-21 17:41:10 -0700179} // namespace ceres::internal