CUDA CGNR, Part 1: Misc. CLeanup

* Fixed an alignment bug with EventLogger.
* Added expected solutions to some LinearLeastSquaresProblem tests.
* Expanded ContextImpl to initialize cuSparse.

Change-Id: I2d7556a0509e9b7c56c9cd5cfa75cb85614395dc
diff --git a/internal/ceres/context_impl.cc b/internal/ceres/context_impl.cc
index decee3b..b8f9ff5 100644
--- a/internal/ceres/context_impl.cc
+++ b/internal/ceres/context_impl.cc
@@ -33,6 +33,7 @@
 #include <string>
 
 #include "ceres/internal/config.h"
+#include "ceres/wall_time.h"
 
 #ifndef CERES_NO_CUDA
 #include "cublas_v2.h"
@@ -45,45 +46,65 @@
 ContextImpl::ContextImpl() = default;
 
 #ifndef CERES_NO_CUDA
+void ContextImpl::TearDown() {
+  if (cusolver_handle_ != nullptr) {
+    cusolverDnDestroy(cusolver_handle_);
+    cusolver_handle_ = nullptr;
+  }
+  if (cublas_handle_ != nullptr) {
+    cublasDestroy(cublas_handle_);
+    cublas_handle_ = nullptr;
+  }
+  if (cusolver_handle_ != nullptr) {
+    cusparseDestroy(cusparse_handle_);
+    cusparse_handle_ = nullptr;
+  }
+  if (stream_ != nullptr) {
+    cudaStreamDestroy(stream_);
+    stream_ = nullptr;
+  }
+  cuda_initialized_ = false;
+}
+
 bool ContextImpl::InitCUDA(std::string* message) {
   if (cuda_initialized_) {
     return true;
   }
+  EventLogger event_logger("InitCuda");
   if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
     *message = "cuBLAS::cublasCreate failed.";
     cublas_handle_ = nullptr;
     return false;
   }
+  event_logger.AddEvent("cublasCreate");
   if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
     *message = "cuSolverDN::cusolverDnCreate failed.";
-    cusolver_handle_ = nullptr;
-    cublasDestroy(cublas_handle_);
-    cublas_handle_ = nullptr;
+    TearDown();
     return false;
   }
+  event_logger.AddEvent("cusolverDnCreate");
+  if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
+    *message = "cuSPARSE::cusparseCreate failed.";
+    TearDown();
+    return false;
+  }
+  event_logger.AddEvent("cusparseCreate");
   if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
       cudaSuccess) {
     *message = "CUDA::cudaStreamCreateWithFlags failed.";
-    cusolverDnDestroy(cusolver_handle_);
-    cublasDestroy(cublas_handle_);
-    cusolver_handle_ = nullptr;
-    cublas_handle_ = nullptr;
-    stream_ = nullptr;
+    TearDown();
     return false;
   }
+  event_logger.AddEvent("cudaStreamCreateWithFlags");
   if (cusolverDnSetStream(cusolver_handle_, stream_) !=
           CUSOLVER_STATUS_SUCCESS ||
-      cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) {
-    *message =
-        "cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed.";
-    cusolverDnDestroy(cusolver_handle_);
-    cublasDestroy(cublas_handle_);
-    cudaStreamDestroy(stream_);
-    cusolver_handle_ = nullptr;
-    cublas_handle_ = nullptr;
-    stream_ = nullptr;
+      cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS ||
+      cusparseSetStream(cusparse_handle_, stream_) != CUSPARSE_STATUS_SUCCESS) {
+    *message = "CUDA [Solver|BLAS|Sparse] SetStream failed.";
+    TearDown();
     return false;
   }
+  event_logger.AddEvent("SetStream");
   cuda_initialized_ = true;
   return true;
 }
@@ -91,11 +112,7 @@
 
 ContextImpl::~ContextImpl() {
 #ifndef CERES_NO_CUDA
-  if (cuda_initialized_) {
-    cusolverDnDestroy(cusolver_handle_);
-    cublasDestroy(cublas_handle_);
-    cudaStreamDestroy(stream_);
-  }
+  TearDown();
 #endif  // CERES_NO_CUDA
 }
 void ContextImpl::EnsureMinimumThreads(int num_threads) {
diff --git a/internal/ceres/context_impl.h b/internal/ceres/context_impl.h
index 3bcb2b5..3324b52 100644
--- a/internal/ceres/context_impl.h
+++ b/internal/ceres/context_impl.h
@@ -45,6 +45,7 @@
 #ifndef CERES_NO_CUDA
 #include "cublas_v2.h"
 #include "cuda_runtime.h"
+#include "cusparse.h"
 #include "cusolverDn.h"
 #endif  // CERES_NO_CUDA
 
@@ -76,13 +77,12 @@
   // successful, else it returns false and a human-readable error message is
   // returned.
   bool InitCUDA(std::string* message);
+  void TearDown();
 
-  // Handle to the cuSOLVER context.
   cusolverDnHandle_t cusolver_handle_ = nullptr;
-  // Handle to cuBLAS context.
   cublasHandle_t cublas_handle_ = nullptr;
-  // CUDA device stream.
   cudaStream_t stream_ = nullptr;
+  cusparseHandle_t cusparse_handle_ = nullptr;
   // Indicates whether all the CUDA resources have been initialized.
   bool cuda_initialized_ = false;
 #endif  // CERES_NO_CUDA
diff --git a/internal/ceres/cuda_buffer.h b/internal/ceres/cuda_buffer.h
index 89828f9..f8abf13 100644
--- a/internal/ceres/cuda_buffer.h
+++ b/internal/ceres/cuda_buffer.h
@@ -66,7 +66,8 @@
       if (data_ != nullptr) {
         CHECK_EQ(cudaFree(data_), cudaSuccess);
       }
-      CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess);
+      CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess) <<
+          "Failed to allocate " << size * sizeof(T) << " bytes of GPU memory";
       size_ = size;
     }
   }
diff --git a/internal/ceres/linear_least_squares_problems.cc b/internal/ceres/linear_least_squares_problems.cc
index 0ce37e8..d0262e3 100644
--- a/internal/ceres/linear_least_squares_problems.cc
+++ b/internal/ceres/linear_least_squares_problems.cc
@@ -196,6 +196,13 @@
   problem->D = std::make_unique<double[]>(num_cols);
   problem->num_eliminate_blocks = 2;
 
+  problem->x = std::make_unique<double[]>(num_cols);
+  problem->x[0] = -2.3061;
+  problem->x[1] = 0.3172;
+  problem->x[2] = 0.2102;
+  problem->x[3] = 2.1367;
+  problem->x[4] = 0.1388;
+
   int* rows = A->mutable_rows();
   int* cols = A->mutable_cols();
   double* values = A->mutable_values();
@@ -300,6 +307,13 @@
   problem->D = std::make_unique<double[]>(num_cols);
   problem->num_eliminate_blocks = 2;
 
+  problem->x = std::make_unique<double[]>(num_cols);
+  problem->x[0] = -2.3061;
+  problem->x[1] = 0.3172;
+  problem->x[2] = 0.2102;
+  problem->x[3] = 2.1367;
+  problem->x[4] = 0.1388;
+
   auto* bs = new CompressedRowBlockStructure;
   std::unique_ptr<double[]> values =
       std::make_unique<double[]>(num_rows * num_cols);
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc
index 1a718fb..c36092a 100644
--- a/internal/ceres/wall_time.cc
+++ b/internal/ceres/wall_time.cc
@@ -73,7 +73,7 @@
   start_time_ = WallTimeInSeconds();
   last_event_time_ = start_time_;
   events_ = StringPrintf(
-      "\n%s\n                                   Delta   Cumulative\n",
+      "\n%s\n                                        Delta   Cumulative\n",
       logger_name.c_str());
 }