Fix a missing CERES_NO_CUDA guard Also run format_all.sh. Change-Id: I13902c1d3eb0d3a97548540fee13ec67c490a5ff

diff --git a/internal/ceres/cuda_sparse_matrix.cc b/internal/ceres/cuda_sparse_matrix.cc
index 2f857e2..da64981 100644
--- a/internal/ceres/cuda_sparse_matrix.cc
+++ b/internal/ceres/cuda_sparse_matrix.cc

@@ -38,41 +38,37 @@
 #include "ceres/cuda_sparse_matrix.h"
 
 #include <math.h>
+
 #include <memory>
 
-#include "ceres/internal/export.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/compressed_row_sparse_matrix.h"
-#include "ceres/crs_matrix.h"
-#include "ceres/types.h"
 #include "ceres/context_impl.h"
+#include "ceres/crs_matrix.h"
+#include "ceres/internal/export.h"
+#include "ceres/types.h"
 #include "ceres/wall_time.h"
 
 #ifndef CERES_NO_CUDA
 
+#include "ceres/ceres_cuda_kernels.h"
 #include "ceres/cuda_buffer.h"
 #include "ceres/cuda_vector.h"
-#include "ceres/ceres_cuda_kernels.h"
 #include "cusparse.h"
 
-
 namespace ceres::internal {
 
 CudaSparseMatrix::CudaSparseMatrix(
-      ContextImpl* context,
-      const CompressedRowSparseMatrix& crs_matrix) {
+    ContextImpl* context, const CompressedRowSparseMatrix& crs_matrix) {
   DCHECK_NE(context, nullptr);
   CHECK(context->IsCUDAInitialized());
   context_ = context;
   num_rows_ = crs_matrix.num_rows();
   num_cols_ = crs_matrix.num_cols();
   num_nonzeros_ = crs_matrix.num_nonzeros();
-  rows_.CopyFromCpu(
-      crs_matrix.rows(), num_rows_ + 1, context_->stream_);
-  cols_.CopyFromCpu(
-      crs_matrix.cols(), num_nonzeros_, context_->stream_);
-  values_.CopyFromCpu(
-      crs_matrix.values(), num_nonzeros_, context_->stream_);
+  rows_.CopyFromCpu(crs_matrix.rows(), num_rows_ + 1, context_->stream_);
+  cols_.CopyFromCpu(crs_matrix.cols(), num_nonzeros_, context_->stream_);
+  values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_, context_->stream_);
   cusparseCreateCsr(&descr_,
                     num_rows_,
                     num_cols_,
@@ -99,8 +95,7 @@
   CHECK_EQ(num_rows_, crs_matrix.num_rows());
   CHECK_EQ(num_cols_, crs_matrix.num_cols());
   CHECK_EQ(num_nonzeros_, crs_matrix.num_nonzeros());
-  values_.CopyFromCpu(
-      crs_matrix.values(), num_nonzeros_, context_->stream_);
+  values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_, context_->stream_);
 }
 
 void CudaSparseMatrix::SpMv(cusparseOperation_t op,
@@ -135,11 +130,13 @@
            CUSPARSE_STATUS_SUCCESS);
 }
 
-void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) {
+void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x,
+                                                  CudaVector* y) {
   SpMv(CUSPARSE_OPERATION_NON_TRANSPOSE, x, y);
 }
 
-void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) {
+void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x,
+                                                 CudaVector* y) {
   // TODO(Joydeep Biswas): We should consider storing a transposed copy of the
   // matrix by converting CSR to CSC. From the cuSPARSE documentation:
   // "In general, opA == CUSPARSE_OPERATION_NON_TRANSPOSE is 3x faster than opA

diff --git a/internal/ceres/cuda_sparse_matrix.h b/internal/ceres/cuda_sparse_matrix.h
index 7661fb9..62f6b77 100644
--- a/internal/ceres/cuda_sparse_matrix.h
+++ b/internal/ceres/cuda_sparse_matrix.h

@@ -43,9 +43,9 @@
 #include <string>
 
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/internal/export.h"
 #include "ceres/types.h"
-#include "ceres/context_impl.h"
 
 #ifndef CERES_NO_CUDA
 #include "ceres/cuda_buffer.h"
@@ -58,7 +58,6 @@
 // CUDA-accelerated operations.
 class CERES_NO_EXPORT CudaSparseMatrix {
  public:
-
   // Create a GPU copy of the matrix provided. The caller must ensure that
   // InitCuda() has already been successfully called on context before calling
   // this constructor.
@@ -86,7 +85,6 @@
   const cusparseSpMatDescr_t& descr() const { return descr_; }
 
  private:
-
   // Disable copy and assignment.
   CudaSparseMatrix(const CudaSparseMatrix&) = delete;
   CudaSparseMatrix& operator=(const CudaSparseMatrix&) = delete;

diff --git a/internal/ceres/cuda_sparse_matrix_test.cc b/internal/ceres/cuda_sparse_matrix_test.cc
index 3a01ff9..ae76b8f 100644
--- a/internal/ceres/cuda_sparse_matrix_test.cc
+++ b/internal/ceres/cuda_sparse_matrix_test.cc

@@ -28,15 +28,16 @@
 //
 // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
 
+#include "ceres/cuda_sparse_matrix.h"
+
 #include <string>
 
+#include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/cuda_vector.h"
 #include "ceres/internal/config.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/linear_least_squares_problems.h"
-#include "ceres/block_sparse_matrix.h"
-#include "ceres/cuda_vector.h"
-#include "ceres/cuda_sparse_matrix.h"
 #include "ceres/triplet_sparse_matrix.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
@@ -113,24 +114,11 @@
   // b: [1 2 3 4]'
   // A1 * b = [3 5]'
   // A2 * b = [5 18]'
-  TripletSparseMatrix A1(
-    2,
-    4,
-    {0, 0, 1, 1},
-    {0, 1, 1, 2},
-    {1, 1, 1, 1}
-  );
-  TripletSparseMatrix A2(
-    2,
-    4,
-    {0, 0, 1, 1},
-    {0, 1, 1, 2},
-    {1, 2, 3, 4}
-  );
+  TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1});
+  TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
   Vector b(4);
   b << 1, 2, 3, 4;
 
-
   ContextImpl context;
   std::string message;
   CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
@@ -163,13 +151,7 @@
   //   0 3 4 0]
   // b: [1 2 3 4]'
   // A * b = [5 18]'
-  TripletSparseMatrix A(
-    2,
-    4,
-    {0, 0, 1, 1},
-    {0, 1, 1, 2},
-    {1, 2, 3, 4}
-  );
+  TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
   Vector b(4);
   b << 1, 2, 3, 4;
   Vector x_expected(2);
@@ -199,13 +181,7 @@
   //   0 3 4 0]
   // b: [1 2]'
   // A'* b = [1 8 8 0]'
-  TripletSparseMatrix A(
-    2,
-    4,
-    {0, 0, 1, 1},
-    {0, 1, 1, 2},
-    {1, 2, 3, 4}
-  );
+  TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
   Vector b(2);
   b << 1, 2;
   Vector x_expected(4);
@@ -213,8 +189,7 @@
 
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
   CudaSparseMatrix A_gpu(&context, *A_crs);
   CudaVector b_gpu(&context, A.num_rows());
@@ -269,15 +244,13 @@
 
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
   CudaSparseMatrix A_gpu(&context, *A_crs);
   CudaVector b_gpu(&context, N);
   CudaVector x_gpu(&context, N);
   x_gpu.CopyFromCpu(x);
 
-
   // First check RightMultiply.
   {
     b_gpu.SetZero();

diff --git a/internal/ceres/cuda_vector.cc b/internal/ceres/cuda_vector.cc
index 5a128de..7debeba 100644
--- a/internal/ceres/cuda_vector.cc
+++ b/internal/ceres/cuda_vector.cc

@@ -37,15 +37,15 @@
 
 #include <math.h>
 
+#include "ceres/context_impl.h"
 #include "ceres/internal/export.h"
 #include "ceres/types.h"
-#include "ceres/context_impl.h"
 
 #ifndef CERES_NO_CUDA
 
+#include "ceres/ceres_cuda_kernels.h"
 #include "ceres/cuda_buffer.h"
 #include "ceres/cuda_vector.h"
-#include "ceres/ceres_cuda_kernels.h"
 #include "cublas_v2.h"
 
 namespace ceres::internal {
@@ -72,18 +72,14 @@
   }
 }
 
-CudaVector::~CudaVector() {
-  DestroyDescriptor();
-}
+CudaVector::~CudaVector() { DestroyDescriptor(); }
 
 void CudaVector::Resize(int size) {
   data_.Reserve(size);
   num_rows_ = size;
   DestroyDescriptor();
-  CHECK_EQ(cusparseCreateDnVec(&descr_,
-                               num_rows_,
-                               data_.data(),
-                               CUDA_R_64F), CUSPARSE_STATUS_SUCCESS);
+  CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F),
+           CUSPARSE_STATUS_SUCCESS);
 }
 
 double CudaVector::Dot(const CudaVector& x) const {
@@ -93,19 +89,19 @@
                       data_.data(),
                       1,
                       x.data().data(),
-                      1, &result),
-           CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDdot failed.";
+                      1,
+                      &result),
+           CUBLAS_STATUS_SUCCESS)
+      << "CuBLAS cublasDdot failed.";
   return result;
 }
 
 double CudaVector::Norm() const {
   double result = 0;
-  CHECK_EQ(cublasDnrm2(context_->cublas_handle_,
-                       num_rows_,
-                       data_.data(),
-                       1,
-                       &result),
-           CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDnrm2 failed.";
+  CHECK_EQ(cublasDnrm2(
+               context_->cublas_handle_, num_rows_, data_.data(), 1, &result),
+           CUBLAS_STATUS_SUCCESS)
+      << "CuBLAS cublasDnrm2 failed.";
   return result;
 }
 
@@ -114,10 +110,8 @@
   data_.CopyFromCpu(x.data(), x.rows(), context_->stream_);
   num_rows_ = x.rows();
   DestroyDescriptor();
-  CHECK_EQ(cusparseCreateDnVec(&descr_,
-                               num_rows_,
-                               data_.data(),
-                               CUDA_R_64F), CUSPARSE_STATUS_SUCCESS);
+  CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F),
+           CUSPARSE_STATUS_SUCCESS);
 }
 
 void CudaVector::CopyTo(Vector* x) const {
@@ -150,12 +144,10 @@
   CHECK_EQ(num_rows_, x.num_rows_);
   if (b != 1.0) {
     // First scale y by b.
-    CHECK_EQ(cublasDscal(context_->cublas_handle_,
-                        num_rows_,
-                        &b,
-                        data_.data(),
-                        1),
-            CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed.";
+    CHECK_EQ(
+        cublasDscal(context_->cublas_handle_, num_rows_, &b, data_.data(), 1),
+        CUBLAS_STATUS_SUCCESS)
+        << "CuBLAS cublasDscal failed.";
   }
   // Then add a * x to y.
   CHECK_EQ(cublasDaxpy(context_->cublas_handle_,
@@ -165,7 +157,8 @@
                        1,
                        data_.data(),
                        1),
-           CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDaxpy failed.";
+           CUBLAS_STATUS_SUCCESS)
+      << "CuBLAS cublasDaxpy failed.";
 }
 
 void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) {
@@ -177,12 +170,10 @@
 }
 
 void CudaVector::Scale(double s) {
-  CHECK_EQ(cublasDscal(context_->cublas_handle_,
-                       num_rows_,
-                       &s,
-                       data_.data(),
-                       1),
-           CUBLAS_STATUS_SUCCESS) << "CuBLAS cublasDscal failed.";
+  CHECK_EQ(
+      cublasDscal(context_->cublas_handle_, num_rows_, &s, data_.data(), 1),
+      CUBLAS_STATUS_SUCCESS)
+      << "CuBLAS cublasDscal failed.";
 }
 
 }  // namespace ceres::internal

diff --git a/internal/ceres/cuda_vector.h b/internal/ceres/cuda_vector.h
index 4018c1b..e7c4b81 100644
--- a/internal/ceres/cuda_vector.h
+++ b/internal/ceres/cuda_vector.h

@@ -39,17 +39,18 @@
 // clang-format on
 
 #include <math.h>
+
 #include <memory>
 #include <string>
 
+#include "ceres/context_impl.h"
 #include "ceres/internal/export.h"
 #include "ceres/types.h"
-#include "ceres/context_impl.h"
 
 #ifndef CERES_NO_CUDA
 
-#include "ceres/cuda_buffer.h"
 #include "ceres/ceres_cuda_kernels.h"
+#include "ceres/cuda_buffer.h"
 #include "ceres/internal/eigen.h"
 #include "cublas_v2.h"
 #include "cusparse.h"
@@ -59,7 +60,6 @@
 // An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations.
 class CERES_NO_EXPORT CudaVector {
  public:
-
   // Create a pre-allocated vector of size N and return a pointer to it. The
   // caller must ensure that InitCuda() has already been successfully called on
   // context before calling this method.
@@ -123,12 +123,11 @@
 // object in the conjugate gradients linear solver.
 inline double Norm(const CudaVector& x) { return x.Norm(); }
 inline void SetZero(CudaVector& x) { x.SetZero(); }
-inline void Axpby(
-    double a,
-    const CudaVector& x,
-    double b,
-    const CudaVector& y,
-    CudaVector& z) {
+inline void Axpby(double a,
+                  const CudaVector& x,
+                  double b,
+                  const CudaVector& y,
+                  CudaVector& z) {
   if (&x == &y && &y == &z) {
     // z = (a + b) * z;
     z.Scale(a + b);

diff --git a/internal/ceres/cuda_vector_test.cc b/internal/ceres/cuda_vector_test.cc
index db930ed..84193c0 100644
--- a/internal/ceres/cuda_vector_test.cc
+++ b/internal/ceres/cuda_vector_test.cc

@@ -28,11 +28,12 @@
 //
 // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
 
+#include "ceres/cuda_vector.h"
+
 #include <string>
 
 #include "ceres/internal/config.h"
 #include "ceres/internal/eigen.h"
-#include "ceres/cuda_vector.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
 
@@ -130,23 +131,15 @@
   CudaVector x_gpu(&context, 10);
   x_gpu.CopyFromCpu(x);
 
-  EXPECT_NEAR(x_gpu.Norm(),
-              2.0,
-              std::numeric_limits<double>::epsilon());
+  EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
 
   x_gpu.SetZero();
-  EXPECT_NEAR(x_gpu.Norm(),
-              0.0,
-              std::numeric_limits<double>::epsilon());
+  EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
 
   x_gpu.CopyFromCpu(x);
-  EXPECT_NEAR(x_gpu.Norm(),
-              2.0,
-              std::numeric_limits<double>::epsilon());
+  EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
   SetZero(x_gpu);
-  EXPECT_NEAR(x_gpu.Norm(),
-              0.0,
-              std::numeric_limits<double>::epsilon());
+  EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
 }
 
 TEST(CudaVector, Resize) {
@@ -187,8 +180,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -209,8 +201,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -231,8 +222,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -251,8 +241,7 @@
   x << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -273,8 +262,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   CudaVector z_gpu(&context, 4);
@@ -296,8 +284,7 @@
   x << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector z_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -318,8 +305,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 10);
   CudaVector y_gpu(&context, 10);
   x_gpu.CopyFromCpu(x);
@@ -340,8 +326,7 @@
   y << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 4);
   CudaVector y_gpu(&context, 4);
   x_gpu.CopyFromCpu(x);
@@ -360,8 +345,7 @@
   x << 100, 10, 1, 0;
   ContextImpl context;
   std::string message;
-  CHECK(context.InitCUDA(&message))
-      << "InitCUDA() failed because: " << message;
+  CHECK(context.InitCUDA(&message)) << "InitCUDA() failed because: " << message;
   CudaVector x_gpu(&context, 10);
   x_gpu.CopyFromCpu(x);
 

diff --git a/internal/ceres/implicit_schur_complement_test.cc b/internal/ceres/implicit_schur_complement_test.cc
index 66c6707..f7abca1 100644
--- a/internal/ceres/implicit_schur_complement_test.cc
+++ b/internal/ceres/implicit_schur_complement_test.cc

@@ -155,8 +155,8 @@
     // Here, assuming that block_diagonal(F'F) == diagonal(F'F)
     Matrix Z_reference =
         (F.transpose() * F + DF).diagonal().asDiagonal().inverse() *
-        F.transpose() * E * (E.transpose() * E + DE).inverse() *
-        E.transpose() * F;
+        F.transpose() * E * (E.transpose() * E + DE).inverse() * E.transpose() *
+        F;
 
     for (int i = 0; i < num_f_cols; ++i) {
       Vector x(num_f_cols);
@@ -166,7 +166,6 @@
       Vector y(num_f_cols);
       y = lhs * x;
 
-
       Vector z(num_f_cols);
       isc.RightMultiplyAndAccumulate(x.data(), z.data());
 

diff --git a/internal/ceres/sparse_linear_operator_benchmark.cc b/internal/ceres/sparse_linear_operator_benchmark.cc
index c224e35..e2e22c9 100644
--- a/internal/ceres/sparse_linear_operator_benchmark.cc
+++ b/internal/ceres/sparse_linear_operator_benchmark.cc

@@ -34,16 +34,19 @@
 #include <string>
 
 #include "Eigen/Dense"
-#include "gflags/gflags.h"
 #include "benchmark/benchmark.h"
-#include "ceres/context_impl.h"
 #include "ceres/block_sparse_matrix.h"
+#include "ceres/context_impl.h"
 #include "ceres/cuda_sparse_matrix.h"
 #include "ceres/cuda_vector.h"
 #include "ceres/internal/config.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/linear_solver.h"
+#include "gflags/gflags.h"
+
+#ifndef CERES_NO_CUDA
 #include "cuda_runtime.h"
+#endif
 
 namespace ceres::internal {
 
@@ -153,6 +156,10 @@
   CHECK_NE(sum, 0.0);
 }
 
+BENCHMARK(BM_CpuRightMultiplyAndAccumulate);
+BENCHMARK(BM_CpuLeftMultiplyAndAccumulate);
+
+#ifndef CERES_NO_CUDA
 static void BM_CudaRightMultiplyAndAccumulate(benchmark::State& state) {
   // Perform setup here
   std::unique_ptr<BlockSparseMatrix> jacobian =
@@ -165,9 +172,7 @@
   std::string message;
   context.InitCUDA(&message);
   CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(),
-      jacobian->num_cols(),
-      jacobian->num_nonzeros());
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
   CudaVector cuda_x(&context, 0);
@@ -202,9 +207,7 @@
   std::string message;
   context.InitCUDA(&message);
   CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(),
-      jacobian->num_cols(),
-      jacobian->num_nonzeros());
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
   CudaVector cuda_x(&context, 0);
@@ -227,10 +230,9 @@
   CHECK_NE(sum, 0.0);
 }
 
-BENCHMARK(BM_CpuRightMultiplyAndAccumulate);
-BENCHMARK(BM_CpuLeftMultiplyAndAccumulate);
 BENCHMARK(BM_CudaRightMultiplyAndAccumulate);
 BENCHMARK(BM_CudaLeftMultiplyAndAccumulate);
+#endif
 
 BENCHMARK_MAIN();