CUDA CGNR, Part 1: Misc. CLeanup
* Fixed an alignment bug with EventLogger.
* Added expected solutions to some LinearLeastSquaresProblem tests.
* Expanded ContextImpl to initialize cuSparse.
Change-Id: I2d7556a0509e9b7c56c9cd5cfa75cb85614395dc
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc
index decee3b..b8f9ff5 100644
--- a/internal/ceres/context_impl.cc
+++ b/internal/ceres/context_impl.cc
@@ -33,6 +33,7 @@
#include <string>
#include "ceres/internal/config.h"
+#include "ceres/wall_time.h"
#ifndef CERES_NO_CUDA
#include "cublas_v2.h"
@@ -45,45 +46,65 @@
ContextImpl::ContextImpl() = default;
#ifndef CERES_NO_CUDA
+void ContextImpl::TearDown() {
+ if (cusolver_handle_ != nullptr) {
+ cusolverDnDestroy(cusolver_handle_);
+ cusolver_handle_ = nullptr;
+ }
+ if (cublas_handle_ != nullptr) {
+ cublasDestroy(cublas_handle_);
+ cublas_handle_ = nullptr;
+ }
+ if (cusolver_handle_ != nullptr) {
+ cusparseDestroy(cusparse_handle_);
+ cusparse_handle_ = nullptr;
+ }
+ if (stream_ != nullptr) {
+ cudaStreamDestroy(stream_);
+ stream_ = nullptr;
+ }
+ cuda_initialized_ = false;
+}
+
bool ContextImpl::InitCUDA(std::string* message) {
if (cuda_initialized_) {
return true;
}
+ EventLogger event_logger("InitCuda");
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
*message = "cuBLAS::cublasCreate failed.";
cublas_handle_ = nullptr;
return false;
}
+ event_logger.AddEvent("cublasCreate");
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnCreate failed.";
- cusolver_handle_ = nullptr;
- cublasDestroy(cublas_handle_);
- cublas_handle_ = nullptr;
+ TearDown();
return false;
}
+ event_logger.AddEvent("cusolverDnCreate");
+ if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
+ *message = "cuSPARSE::cusparseCreate failed.";
+ TearDown();
+ return false;
+ }
+ event_logger.AddEvent("cusparseCreate");
if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
cudaSuccess) {
*message = "CUDA::cudaStreamCreateWithFlags failed.";
- cusolverDnDestroy(cusolver_handle_);
- cublasDestroy(cublas_handle_);
- cusolver_handle_ = nullptr;
- cublas_handle_ = nullptr;
- stream_ = nullptr;
+ TearDown();
return false;
}
+ event_logger.AddEvent("cudaStreamCreateWithFlags");
if (cusolverDnSetStream(cusolver_handle_, stream_) !=
CUSOLVER_STATUS_SUCCESS ||
- cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) {
- *message =
- "cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed.";
- cusolverDnDestroy(cusolver_handle_);
- cublasDestroy(cublas_handle_);
- cudaStreamDestroy(stream_);
- cusolver_handle_ = nullptr;
- cublas_handle_ = nullptr;
- stream_ = nullptr;
+ cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS ||
+ cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) {
+ *message = "CUDA [Solver|BLAS|Sparse] SetStream failed.";
+ TearDown();
return false;
}
+ event_logger.AddEvent("SetStream");
cuda_initialized_ = true;
return true;
}
@@ -91,11 +112,7 @@
ContextImpl::~ContextImpl() {
#ifndef CERES_NO_CUDA
- if (cuda_initialized_) {
- cusolverDnDestroy(cusolver_handle_);
- cublasDestroy(cublas_handle_);
- cudaStreamDestroy(stream_);
- }
+ TearDown();
#endif // CERES_NO_CUDA
}
void ContextImpl::EnsureMinimumThreads(int num_threads) {
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h
index 3bcb2b5..3324b52 100644
--- a/internal/ceres/context_impl.h
+++ b/internal/ceres/context_impl.h
@@ -45,6 +45,7 @@
#ifndef CERES_NO_CUDA
#include "cublas_v2.h"
#include "cuda_runtime.h"
+#include "cusparse.h"
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
@@ -76,13 +77,12 @@
// successful, else it returns false and a human-readable error message is
// returned.
bool InitCUDA(std::string* message);
+ void TearDown();
- // Handle to the cuSOLVER context.
cusolverDnHandle_t cusolver_handle_ = nullptr;
- // Handle to cuBLAS context.
cublasHandle_t cublas_handle_ = nullptr;
- // CUDA device stream.
cudaStream_t stream_ = nullptr;
+ cusparseHandle_t cusparse_handle_ = nullptr;
// Indicates whether all the CUDA resources have been initialized.
bool cuda_initialized_ = false;
#endif // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_buffer.h b/internal/ceres/cuda_buffer.h
index 89828f9..f8abf13 100644
--- a/internal/ceres/cuda_buffer.h
+++ b/internal/ceres/cuda_buffer.h
@@ -66,7 +66,8 @@
if (data_ != nullptr) {
CHECK_EQ(cudaFree(data_), cudaSuccess);
}
- CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess);
+ CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess) <<
+ "Failed to allocate " << size * sizeof(T) << " bytes of GPU memory";
size_ = size;
}
}
diff --git a/internal/ceres/linear_least_squares_problems.cc b/internal/ceres/linear_least_squares_problems.cc
index 0ce37e8..d0262e3 100644
--- a/internal/ceres/linear_least_squares_problems.cc
+++ b/internal/ceres/linear_least_squares_problems.cc
@@ -196,6 +196,13 @@
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
+ problem->x = std::make_unique<double[]>(num_cols);
+ problem->x[0] = -2.3061;
+ problem->x[1] = 0.3172;
+ problem->x[2] = 0.2102;
+ problem->x[3] = 2.1367;
+ problem->x[4] = 0.1388;
+
int* rows = A->mutable_rows();
int* cols = A->mutable_cols();
double* values = A->mutable_values();
@@ -300,6 +307,13 @@
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
+ problem->x = std::make_unique<double[]>(num_cols);
+ problem->x[0] = -2.3061;
+ problem->x[1] = 0.3172;
+ problem->x[2] = 0.2102;
+ problem->x[3] = 2.1367;
+ problem->x[4] = 0.1388;
+
auto* bs = new CompressedRowBlockStructure;
std::unique_ptr<double[]> values =
std::make_unique<double[]>(num_rows * num_cols);
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc
index 1a718fb..c36092a 100644
--- a/internal/ceres/wall_time.cc
+++ b/internal/ceres/wall_time.cc
@@ -73,7 +73,7 @@
start_time_ = WallTimeInSeconds();
last_event_time_ = start_time_;
events_ = StringPrintf(
- "\n%s\n Delta Cumulative\n",
+ "\n%s\n Delta Cumulative\n",
logger_name.c_str());
}