CUDA CGNR, Part 1: Misc. CLeanup * Fixed an alignment bug with EventLogger. * Added expected solutions to some LinearLeastSquaresProblem tests. * Expanded ContextImpl to initialize cuSparse. Change-Id: I2d7556a0509e9b7c56c9cd5cfa75cb85614395dc
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc index decee3b..b8f9ff5 100644 --- a/internal/ceres/context_impl.cc +++ b/internal/ceres/context_impl.cc
@@ -33,6 +33,7 @@ #include <string> #include "ceres/internal/config.h" +#include "ceres/wall_time.h" #ifndef CERES_NO_CUDA #include "cublas_v2.h" @@ -45,45 +46,65 @@ ContextImpl::ContextImpl() = default; #ifndef CERES_NO_CUDA +void ContextImpl::TearDown() { + if (cusolver_handle_ != nullptr) { + cusolverDnDestroy(cusolver_handle_); + cusolver_handle_ = nullptr; + } + if (cublas_handle_ != nullptr) { + cublasDestroy(cublas_handle_); + cublas_handle_ = nullptr; + } + if (cusolver_handle_ != nullptr) { + cusparseDestroy(cusparse_handle_); + cusparse_handle_ = nullptr; + } + if (stream_ != nullptr) { + cudaStreamDestroy(stream_); + stream_ = nullptr; + } + cuda_initialized_ = false; +} + bool ContextImpl::InitCUDA(std::string* message) { if (cuda_initialized_) { return true; } + EventLogger event_logger("InitCuda"); if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { *message = "cuBLAS::cublasCreate failed."; cublas_handle_ = nullptr; return false; } + event_logger.AddEvent("cublasCreate"); if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnCreate failed."; - cusolver_handle_ = nullptr; - cublasDestroy(cublas_handle_); - cublas_handle_ = nullptr; + TearDown(); return false; } + event_logger.AddEvent("cusolverDnCreate"); + if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) { + *message = "cuSPARSE::cusparseCreate failed."; + TearDown(); + return false; + } + event_logger.AddEvent("cusparseCreate"); if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) != cudaSuccess) { *message = "CUDA::cudaStreamCreateWithFlags failed."; - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cusolver_handle_ = nullptr; - cublas_handle_ = nullptr; - stream_ = nullptr; + TearDown(); return false; } + event_logger.AddEvent("cudaStreamCreateWithFlags"); if (cusolverDnSetStream(cusolver_handle_, stream_) != CUSOLVER_STATUS_SUCCESS || - cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) { - *message = - "cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed."; - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cudaStreamDestroy(stream_); - cusolver_handle_ = nullptr; - cublas_handle_ = nullptr; - stream_ = nullptr; + cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS || + cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) { + *message = "CUDA [Solver|BLAS|Sparse] SetStream failed."; + TearDown(); return false; } + event_logger.AddEvent("SetStream"); cuda_initialized_ = true; return true; } @@ -91,11 +112,7 @@ ContextImpl::~ContextImpl() { #ifndef CERES_NO_CUDA - if (cuda_initialized_) { - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cudaStreamDestroy(stream_); - } + TearDown(); #endif // CERES_NO_CUDA } void ContextImpl::EnsureMinimumThreads(int num_threads) {
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h index 3bcb2b5..3324b52 100644 --- a/internal/ceres/context_impl.h +++ b/internal/ceres/context_impl.h
@@ -45,6 +45,7 @@ #ifndef CERES_NO_CUDA #include "cublas_v2.h" #include "cuda_runtime.h" +#include "cusparse.h" #include "cusolverDn.h" #endif // CERES_NO_CUDA @@ -76,13 +77,12 @@ // successful, else it returns false and a human-readable error message is // returned. bool InitCUDA(std::string* message); + void TearDown(); - // Handle to the cuSOLVER context. cusolverDnHandle_t cusolver_handle_ = nullptr; - // Handle to cuBLAS context. cublasHandle_t cublas_handle_ = nullptr; - // CUDA device stream. cudaStream_t stream_ = nullptr; + cusparseHandle_t cusparse_handle_ = nullptr; // Indicates whether all the CUDA resources have been initialized. bool cuda_initialized_ = false; #endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_buffer.h b/internal/ceres/cuda_buffer.h index 89828f9..f8abf13 100644 --- a/internal/ceres/cuda_buffer.h +++ b/internal/ceres/cuda_buffer.h
@@ -66,7 +66,8 @@ if (data_ != nullptr) { CHECK_EQ(cudaFree(data_), cudaSuccess); } - CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess); + CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess) << + "Failed to allocate " << size * sizeof(T) << " bytes of GPU memory"; size_ = size; } }
diff --git a/internal/ceres/linear_least_squares_problems.cc b/internal/ceres/linear_least_squares_problems.cc index 0ce37e8..d0262e3 100644 --- a/internal/ceres/linear_least_squares_problems.cc +++ b/internal/ceres/linear_least_squares_problems.cc
@@ -196,6 +196,13 @@ problem->D = std::make_unique<double[]>(num_cols); problem->num_eliminate_blocks = 2; + problem->x = std::make_unique<double[]>(num_cols); + problem->x[0] = -2.3061; + problem->x[1] = 0.3172; + problem->x[2] = 0.2102; + problem->x[3] = 2.1367; + problem->x[4] = 0.1388; + int* rows = A->mutable_rows(); int* cols = A->mutable_cols(); double* values = A->mutable_values(); @@ -300,6 +307,13 @@ problem->D = std::make_unique<double[]>(num_cols); problem->num_eliminate_blocks = 2; + problem->x = std::make_unique<double[]>(num_cols); + problem->x[0] = -2.3061; + problem->x[1] = 0.3172; + problem->x[2] = 0.2102; + problem->x[3] = 2.1367; + problem->x[4] = 0.1388; + auto* bs = new CompressedRowBlockStructure; std::unique_ptr<double[]> values = std::make_unique<double[]>(num_rows * num_cols);
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc index 1a718fb..c36092a 100644 --- a/internal/ceres/wall_time.cc +++ b/internal/ceres/wall_time.cc
@@ -73,7 +73,7 @@ start_time_ = WallTimeInSeconds(); last_event_time_ = start_time_; events_ = StringPrintf( - "\n%s\n Delta Cumulative\n", + "\n%s\n Delta Cumulative\n", logger_name.c_str()); }