Improvements to benchmarks

1. Add CreateFakeBundleAdjustmentJacobian to create
   bundle adjustment structured jacobians.
2. sparse_linear_operator_benchmark -> spmv_benchmark
3. Refactor spmv_benchmark to use CreateFakeBundleAdjustmentJacobian.
4. Add BA and unstructured Jacobian variants of spmv benchmarks
5. Add BA and unstructured Jacobian benchmarks for the block Jacobi
   preconditioner.
6. Fix BlockSparseMatrix::ToCompressedRowSparseMatrix to add the
   row and column block structure to the output.

Change-Id: I737a3d7d82dad665b1f2e7886c2c3aedd702ffcd
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 947003d..cde3895 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -213,6 +213,7 @@
     eigensparse.cc
     evaluation_callback.cc
     evaluator.cc
+    fake_bundle_adjustment_jacobian.cc
     file.cc
     first_order_function.cc
     float_suitesparse.cc
@@ -591,9 +592,12 @@
   add_executable(dense_linear_solver_benchmark dense_linear_solver_benchmark.cc)
   add_dependencies_to_benchmark(dense_linear_solver_benchmark)
 
-  add_executable(sparse_linear_operator_benchmark
-    sparse_linear_operator_benchmark.cc)
-  add_dependencies_to_benchmark(sparse_linear_operator_benchmark)
+  add_executable(spmv_benchmark spmv_benchmark.cc)
+  add_dependencies_to_benchmark(spmv_benchmark)
+
+  add_executable(block_jacobi_preconditioner_benchmark
+    block_jacobi_preconditioner_benchmark.cc)
+  add_dependencies_to_benchmark(block_jacobi_preconditioner_benchmark)
 
   add_subdirectory(autodiff_benchmarks)
 endif (BUILD_BENCHMARKS)
diff --git a/internal/ceres/block_jacobi_preconditioner_benchmark.cc b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
new file mode 100644
index 0000000..90cab9e
--- /dev/null
+++ b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
@@ -0,0 +1,135 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: sameeragarwal@google.com (Sameer Agarwal)
+
+#include <memory>
+
+#include "Eigen/Dense"
+#include "benchmark/benchmark.h"
+#include "ceres/block_jacobi_preconditioner.h"
+#include "ceres/block_sparse_matrix.h"
+#include "ceres/fake_bundle_adjustment_jacobian.h"
+#include "ceres/internal/config.h"
+#include "ceres/internal/eigen.h"
+
+namespace ceres::internal {
+
+constexpr int kNumCameras = 1000;
+constexpr int kNumPoints = 10000;
+constexpr int kCameraSize = 6;
+constexpr int kPointSize = 3;
+constexpr double kVisibility = 0.1;
+
+constexpr int kNumRowBlocks = 100000;
+constexpr int kNumColBlocks = 10000;
+constexpr int kMinRowBlockSize = 1;
+constexpr int kMaxRowBlockSize = 5;
+constexpr int kMinColBlockSize = 1;
+constexpr int kMaxColBlockSize = 15;
+constexpr double kBlockDensity = 5.0 / kNumColBlocks;
+
+static void BM_BlockSparseJacobiPreconditionerBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+  BlockSparseJacobiPreconditioner p(*jacobian);
+  Vector d = Vector::Ones(jacobian->num_cols());
+  for (auto _ : state) {
+    p.Update(*jacobian, d.data());
+  }
+}
+
+BENCHMARK(BM_BlockSparseJacobiPreconditionerBA);
+
+static void BM_BlockCRSJacobiPreconditionerBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  BlockCRSJacobiPreconditioner p(jacobian_crs);
+  Vector d = Vector::Ones(jacobian_crs.num_cols());
+  for (auto _ : state) {
+    p.Update(jacobian_crs, d.data());
+  }
+}
+
+BENCHMARK(BM_BlockCRSJacobiPreconditionerBA);
+
+static void BM_BlockSparseJacobiPreconditionerUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  BlockSparseJacobiPreconditioner p(*jacobian);
+  Vector d = Vector::Ones(jacobian->num_cols());
+  for (auto _ : state) {
+    p.Update(*jacobian, d.data());
+  }
+}
+
+BENCHMARK(BM_BlockSparseJacobiPreconditionerUnstructured);
+
+static void BM_BlockCRSJacobiPreconditionerUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  BlockCRSJacobiPreconditioner p(jacobian_crs);
+  Vector d = Vector::Ones(jacobian_crs.num_cols());
+  for (auto _ : state) {
+    p.Update(jacobian_crs, d.data());
+  }
+}
+BENCHMARK(BM_BlockCRSJacobiPreconditionerUnstructured);
+
+}  // namespace ceres::internal
+
+BENCHMARK_MAIN();
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index 4202470..b1447f8 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc
@@ -170,9 +170,26 @@
 
 void BlockSparseMatrix::ToCompressedRowSparseMatrix(
     CompressedRowSparseMatrix* crs_matrix) const {
-  TripletSparseMatrix ts_matrix;
-  this->ToTripletSparseMatrix(&ts_matrix);
-  *crs_matrix = *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix);
+  {
+    TripletSparseMatrix ts_matrix;
+    this->ToTripletSparseMatrix(&ts_matrix);
+    *crs_matrix =
+        *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix);
+  }
+
+  int num_row_blocks = block_structure_->rows.size();
+  auto& row_blocks = *crs_matrix->mutable_row_blocks();
+  row_blocks.resize(num_row_blocks);
+  for (int i = 0; i < num_row_blocks; ++i) {
+    row_blocks[i] = block_structure_->rows[i].block.size;
+  }
+
+  int num_col_blocks = block_structure_->cols.size();
+  auto& col_blocks = *crs_matrix->mutable_col_blocks();
+  col_blocks.resize(num_col_blocks);
+  for (int i = 0; i < num_col_blocks; ++i) {
+    col_blocks[i] = block_structure_->cols[i].size;
+  }
 }
 
 void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
diff --git a/internal/ceres/fake_bundle_adjustment_jacobian.cc b/internal/ceres/fake_bundle_adjustment_jacobian.cc
new file mode 100644
index 0000000..9e7d82b
--- /dev/null
+++ b/internal/ceres/fake_bundle_adjustment_jacobian.cc
@@ -0,0 +1,99 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
+
+#include "ceres/fake_bundle_adjustment_jacobian.h"
+
+#include <iostream>
+#include <memory>
+#include <random>
+#include <string>
+
+#include "Eigen/Dense"
+#include "ceres/block_sparse_matrix.h"
+#include "ceres/internal/eigen.h"
+
+namespace ceres::internal {
+
+std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
+    int num_cameras,
+    int num_points,
+    int camera_size,
+    int point_size,
+    double visibility,
+    std::mt19937& prng) {
+  constexpr int kResidualSize = 2;
+
+  CompressedRowBlockStructure* bs = new CompressedRowBlockStructure;
+  int c = 0;
+  // Add column blocks for each point
+  for (int i = 0; i < num_points; ++i) {
+    bs->cols.push_back(Block(point_size, c));
+    c += point_size;
+  }
+
+  // Add column blocks for each camera.
+  for (int i = 0; i < num_cameras; ++i) {
+    bs->cols.push_back(Block(camera_size, c));
+    c += camera_size;
+  }
+
+  std::bernoulli_distribution visibility_distribution(visibility);
+  int row_pos = 0;
+  int cell_pos = 0;
+  for (int i = 0; i < num_points; ++i) {
+    for (int j = 0; j < num_cameras; ++j) {
+      if (!visibility_distribution(prng)) {
+        continue;
+      }
+      bs->rows.emplace_back();
+      auto& row = bs->rows.back();
+      row.block.position = row_pos;
+      row.block.size = kResidualSize;
+      auto& cells = row.cells;
+      cells.resize(2);
+
+      cells[0].block_id = i;
+      cells[0].position = cell_pos;
+      cell_pos += kResidualSize * point_size;
+
+      cells[1].block_id = num_points + j;
+      cells[1].position = cell_pos;
+      cell_pos += kResidualSize * camera_size;
+
+      row_pos += kResidualSize;
+    }
+  }
+
+  auto jacobian = std::make_unique<BlockSparseMatrix>(bs);
+  VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom();
+  return jacobian;
+}
+
+}  // namespace ceres::internal
diff --git a/internal/ceres/fake_bundle_adjustment_jacobian.h b/internal/ceres/fake_bundle_adjustment_jacobian.h
new file mode 100644
index 0000000..9dacfaa
--- /dev/null
+++ b/internal/ceres/fake_bundle_adjustment_jacobian.h
@@ -0,0 +1,51 @@
+
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2015 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#ifndef CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
+#define CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
+
+#include <memory>
+#include <random>
+
+#include "ceres/block_sparse_matrix.h"
+
+namespace ceres::internal {
+std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
+    int num_cameras,
+    int num_points,
+    int camera_size,
+    int landmark_size,
+    double visibility,
+    std::mt19937& prng);
+
+}  // namespace ceres::internal
+
+#endif  // CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
diff --git a/internal/ceres/sparse_linear_operator_benchmark.cc b/internal/ceres/sparse_linear_operator_benchmark.cc
deleted file mode 100644
index 4969c0c..0000000
--- a/internal/ceres/sparse_linear_operator_benchmark.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-// Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2022 Google Inc. All rights reserved.
-// http://ceres-solver.org/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// * Neither the name of Google Inc. nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
-
-#include <iostream>
-#include <memory>
-#include <random>
-#include <string>
-
-#include "Eigen/Dense"
-#include "benchmark/benchmark.h"
-#include "ceres/block_sparse_matrix.h"
-#include "ceres/context_impl.h"
-#include "ceres/cuda_sparse_matrix.h"
-#include "ceres/cuda_vector.h"
-#include "ceres/internal/config.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/linear_solver.h"
-#include "gflags/gflags.h"
-
-#ifndef CERES_NO_CUDA
-#include "cuda_runtime.h"
-#endif
-
-namespace ceres::internal {
-
-// TODO(Joydeep Biswas): Add a matrix of benchmark sizes to test.
-
-namespace {
-// Generate a synthetic BA-style Jacobian with n camera poses, m landmarks, n_d
-// parameters per camera, m_d parameters per landmark, and k residuals per
-// camera.
-// TODO: Unify the synthetic Jacobian generation code with the code from
-// schur_eliminator_benchmark.cc since they are very similar.
-std::unique_ptr<BlockSparseMatrix> GenerateSyntheticJacobian(
-    int n, int m, int n_d, int m_d, int k) {
-  static const int kResidualSize = 2;
-  CompressedRowBlockStructure* bs = new CompressedRowBlockStructure;
-  int c = 0;
-  // Add column blocks for each camera.
-  for (int i = 0; i < n; ++i) {
-    bs->cols.push_back(Block(n_d, c));
-    c += n_d;
-  }
-  // Add column blocks for each landmark.
-  for (int i = 0; i < m; ++i) {
-    bs->cols.push_back(Block(m_d, c));
-    c += m_d;
-  }
-  // Total number of row blocks = k * n.
-  bs->rows.resize(k * n);
-  int values_offset = 0;
-  std::mt19937 prng;
-  std::uniform_real_distribution<double> uniform_0_m(0.0,
-                                                     static_cast<double>(m));
-  // Generate structure of the Jacobian.
-  // For n cameras:
-  for (int i = 0; i < n; ++i) {
-    const int camera_block_id = i;
-    // For k residuals per camera:
-    for (int j = 0; j < k; ++j) {
-      // Pick the landmark of the residual randomly from [0, m).
-      const int landmark_id = uniform_0_m(prng);
-      const int landmark_block_id = n + landmark_id;
-      const int row_idx = i * k + j;
-      const int row = kResidualSize * row_idx;
-      bs->rows[row_idx].block = Block(kResidualSize, row);
-      bs->rows[row_idx].cells.resize(2);
-      // The camera part of the jacobian of this residual.
-      bs->rows[row_idx].cells[0] = Cell(camera_block_id, values_offset);
-      values_offset += n_d * kResidualSize;
-      // The landmark part of the jacobian of this residual.
-      bs->rows[row_idx].cells[1] = Cell(landmark_block_id, values_offset);
-      values_offset += m_d * kResidualSize;
-    }
-  }
-  auto jacobian = std::make_unique<BlockSparseMatrix>(bs);
-  VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom();
-  return jacobian;
-}
-}  // namespace
-
-DEFINE_int32(num_cameras, 1000, "Number of cameras.");
-DEFINE_int32(num_landmarks, 10000, "Number of landmarks.");
-DEFINE_int32(num_parameters_per_camera, 6, "Number of parameters per camera.");
-DEFINE_int32(num_parameters_per_landmark,
-             3,
-             "Number of parameters per landmark.");
-DEFINE_int32(num_residuals_per_camera, 100, "Number of residuals per camera.");
-
-static void BM_CpuRightMultiplyAndAccumulate(benchmark::State& state) {
-  // Perform setup here
-  std::unique_ptr<BlockSparseMatrix> jacobian =
-      GenerateSyntheticJacobian(FLAGS_num_cameras,
-                                FLAGS_num_landmarks,
-                                FLAGS_num_parameters_per_camera,
-                                FLAGS_num_parameters_per_landmark,
-                                FLAGS_num_residuals_per_camera);
-  Vector x(jacobian->num_cols());
-  Vector y(jacobian->num_rows());
-  x.setRandom();
-  y.setRandom();
-  double sum = 0;
-  for (auto _ : state) {
-    // This code gets timed
-    jacobian->RightMultiplyAndAccumulate(x.data(), y.data());
-    sum += y.norm();
-  }
-  CHECK_NE(sum, 0.0);
-}
-
-static void BM_CpuLeftMultiplyAndAccumulate(benchmark::State& state) {
-  // Perform setup here
-  std::unique_ptr<BlockSparseMatrix> jacobian =
-      GenerateSyntheticJacobian(FLAGS_num_cameras,
-                                FLAGS_num_landmarks,
-                                FLAGS_num_parameters_per_camera,
-                                FLAGS_num_parameters_per_landmark,
-                                FLAGS_num_residuals_per_camera);
-  Vector x(jacobian->num_rows());
-  Vector y(jacobian->num_cols());
-  x.setRandom();
-  y.setRandom();
-  double sum = 0;
-  for (auto _ : state) {
-    // This code gets timed
-    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
-    sum += y.norm();
-  }
-  CHECK_NE(sum, 0.0);
-}
-
-BENCHMARK(BM_CpuRightMultiplyAndAccumulate);
-BENCHMARK(BM_CpuLeftMultiplyAndAccumulate);
-
-#ifndef CERES_NO_CUDA
-static void BM_CudaRightMultiplyAndAccumulate(benchmark::State& state) {
-  // Perform setup here
-  std::unique_ptr<BlockSparseMatrix> jacobian =
-      GenerateSyntheticJacobian(FLAGS_num_cameras,
-                                FLAGS_num_landmarks,
-                                FLAGS_num_parameters_per_camera,
-                                FLAGS_num_parameters_per_landmark,
-                                FLAGS_num_residuals_per_camera);
-  ContextImpl context;
-  std::string message;
-  context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
-  CudaVector cuda_x(&context, 0);
-  CudaVector cuda_y(&context, 0);
-
-  Vector x(jacobian->num_cols());
-  Vector y(jacobian->num_rows());
-  x.setRandom();
-  y.setRandom();
-
-  cuda_x.CopyFromCpu(x);
-  cuda_y.CopyFromCpu(y);
-  double sum = 0;
-  for (auto _ : state) {
-    // This code gets timed
-    cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
-    sum += cuda_y.Norm();
-    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
-  }
-  CHECK_NE(sum, 0.0);
-}
-
-static void BM_CudaLeftMultiplyAndAccumulate(benchmark::State& state) {
-  // Perform setup here
-  std::unique_ptr<BlockSparseMatrix> jacobian =
-      GenerateSyntheticJacobian(FLAGS_num_cameras,
-                                FLAGS_num_landmarks,
-                                FLAGS_num_parameters_per_camera,
-                                FLAGS_num_parameters_per_landmark,
-                                FLAGS_num_residuals_per_camera);
-  ContextImpl context;
-  std::string message;
-  context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
-  CudaVector cuda_x(&context, 0);
-  CudaVector cuda_y(&context, 0);
-
-  Vector x(jacobian->num_rows());
-  Vector y(jacobian->num_cols());
-  x.setRandom();
-  y.setRandom();
-
-  cuda_x.CopyFromCpu(x);
-  cuda_y.CopyFromCpu(y);
-  double sum = 0;
-  for (auto _ : state) {
-    // This code gets timed
-    cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
-    sum += cuda_y.Norm();
-    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
-  }
-  CHECK_NE(sum, 0.0);
-}
-
-BENCHMARK(BM_CudaRightMultiplyAndAccumulate);
-BENCHMARK(BM_CudaLeftMultiplyAndAccumulate);
-#endif
-
-BENCHMARK_MAIN();
-
-}  // namespace ceres::internal
-
-BENCHMARK_MAIN();
diff --git a/internal/ceres/spmv_benchmark.cc b/internal/ceres/spmv_benchmark.cc
new file mode 100644
index 0000000..0f475be
--- /dev/null
+++ b/internal/ceres/spmv_benchmark.cc
@@ -0,0 +1,425 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2022 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
+
+#include <memory>
+#include <random>
+#include <string>
+
+#include "Eigen/Dense"
+#include "benchmark/benchmark.h"
+#include "ceres/block_jacobi_preconditioner.h"
+#include "ceres/block_sparse_matrix.h"
+#include "ceres/context_impl.h"
+#include "ceres/cuda_sparse_matrix.h"
+#include "ceres/cuda_vector.h"
+#include "ceres/fake_bundle_adjustment_jacobian.h"
+#include "ceres/internal/config.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/linear_solver.h"
+
+#ifndef CERES_NO_CUDA
+#include "cuda_runtime.h"
+#endif
+
+namespace ceres::internal {
+
+constexpr int kNumCameras = 1000;
+constexpr int kNumPoints = 10000;
+constexpr int kCameraSize = 6;
+constexpr int kPointSize = 3;
+constexpr double kVisibility = 0.1;
+
+constexpr int kNumRowBlocks = 100000;
+constexpr int kNumColBlocks = 10000;
+constexpr int kMinRowBlockSize = 1;
+constexpr int kMaxRowBlockSize = 5;
+constexpr int kMinColBlockSize = 1;
+constexpr int kMaxColBlockSize = 15;
+constexpr double kBlockDensity = 5.0 / kNumColBlocks;
+
+static void BM_BlockSparseRightMultiplyAndAccumulateBA(
+    benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian->RightMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA);
+
+static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian->RightMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured);
+
+static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA);
+
+static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = 100000;
+  options.num_col_blocks = 10000;
+  options.min_row_block_size = 1;
+  options.min_col_block_size = 1;
+  options.max_row_block_size = 10;
+  options.max_col_block_size = 15;
+  options.block_density = 5.0 / options.num_col_blocks;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured);
+
+static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+
+  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
+                                     bsm_jacobian->num_cols(),
+                                     bsm_jacobian->num_nonzeros());
+  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+
+  Vector x(jacobian.num_cols());
+  Vector y(jacobian.num_rows());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian.RightMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA);
+
+static void BM_CRSRightMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
+                                     bsm_jacobian->num_cols(),
+                                     bsm_jacobian->num_nonzeros());
+  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+
+  Vector x(jacobian.num_cols());
+  Vector y(jacobian.num_rows());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    jacobian.RightMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured);
+
+static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) {
+  std::mt19937 prng;
+  // Perform setup here
+  auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
+                                     bsm_jacobian->num_cols(),
+                                     bsm_jacobian->num_nonzeros());
+  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+
+  Vector x(jacobian.num_rows());
+  Vector y(jacobian.num_cols());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    // This code gets timed
+    jacobian.LeftMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA);
+
+static void BM_CRSLeftMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
+                                     bsm_jacobian->num_cols(),
+                                     bsm_jacobian->num_nonzeros());
+  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+
+  Vector x(jacobian.num_rows());
+  Vector y(jacobian.num_cols());
+  x.setRandom();
+  y.setRandom();
+  double sum = 0;
+  for (auto _ : state) {
+    // This code gets timed
+    jacobian.LeftMultiplyAndAccumulate(x.data(), y.data());
+    sum += y.norm();
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured);
+
+#ifndef CERES_NO_CUDA
+static void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+  ContextImpl context;
+  std::string message;
+  context.InitCuda(&message);
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  CudaVector cuda_x(&context, 0);
+  CudaVector cuda_y(&context, 0);
+
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
+  x.setRandom();
+  y.setRandom();
+
+  cuda_x.CopyFromCpu(x);
+  cuda_y.CopyFromCpu(y);
+  double sum = 0;
+  for (auto _ : state) {
+    cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
+    sum += cuda_y.Norm();
+    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA);
+
+static void BM_CudaRightMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  ContextImpl context;
+  std::string message;
+  context.InitCuda(&message);
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  CudaVector cuda_x(&context, 0);
+  CudaVector cuda_y(&context, 0);
+
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
+  x.setRandom();
+  y.setRandom();
+
+  cuda_x.CopyFromCpu(x);
+  cuda_y.CopyFromCpu(y);
+  double sum = 0;
+  for (auto _ : state) {
+    cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
+    sum += cuda_y.Norm();
+    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured);
+
+static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) {
+  std::mt19937 prng;
+  auto jacobian = CreateFakeBundleAdjustmentJacobian(
+      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
+  ContextImpl context;
+  std::string message;
+  context.InitCuda(&message);
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  CudaVector cuda_x(&context, 0);
+  CudaVector cuda_y(&context, 0);
+
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
+  x.setRandom();
+  y.setRandom();
+
+  cuda_x.CopyFromCpu(x);
+  cuda_y.CopyFromCpu(y);
+  double sum = 0;
+  for (auto _ : state) {
+    cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
+    sum += cuda_y.Norm();
+    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA);
+
+static void BM_CudaLeftMultiplyAndAccumulateUnstructured(
+    benchmark::State& state) {
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_row_blocks = kNumRowBlocks;
+  options.num_col_blocks = kNumColBlocks;
+  options.min_row_block_size = kMinRowBlockSize;
+  options.min_col_block_size = kMinColBlockSize;
+  options.max_row_block_size = kMaxRowBlockSize;
+  options.max_col_block_size = kMaxColBlockSize;
+  options.block_density = kBlockDensity;
+  std::mt19937 prng;
+
+  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+  ContextImpl context;
+  std::string message;
+  context.InitCuda(&message);
+  CompressedRowSparseMatrix jacobian_crs(
+      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
+  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  CudaVector cuda_x(&context, 0);
+  CudaVector cuda_y(&context, 0);
+
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
+  x.setRandom();
+  y.setRandom();
+
+  cuda_x.CopyFromCpu(x);
+  cuda_y.CopyFromCpu(y);
+  double sum = 0;
+  for (auto _ : state) {
+    cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
+    sum += cuda_y.Norm();
+    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+  CHECK_NE(sum, 0.0);
+}
+
+BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA);
+
+#endif
+
+}  // namespace ceres::internal
+
+BENCHMARK_MAIN();