Change storage in BlockRandomAccessSparseMatrix

 - TripletSparseMatrix in BlockRandomAccessSparseMatrix is replaced with
   BlockSparseMatrix
 - BlockSparseMatrix::ToCompressedRowSparseMatrix is performed in a
   direct sort-less way

Change-Id: Ib951fda1b9394050e2c47a9721172c5e3c674801
diff --git a/internal/ceres/block_jacobi_preconditioner_benchmark.cc b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
index edd431d..b5c1613 100644
--- a/internal/ceres/block_jacobi_preconditioner_benchmark.cc
+++ b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
@@ -85,19 +85,17 @@
   auto jacobian = CreateFakeBundleAdjustmentJacobian(
       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
 
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
   Preconditioner::Options preconditioner_options;
   ContextImpl context;
   preconditioner_options.context = &context;
   preconditioner_options.num_threads = static_cast<int>(state.range(0));
   context.EnsureMinimumThreads(preconditioner_options.num_threads);
-  BlockCRSJacobiPreconditioner p(preconditioner_options, jacobian_crs);
+  BlockCRSJacobiPreconditioner p(preconditioner_options, *jacobian_crs);
 
-  Vector d = Vector::Ones(jacobian_crs.num_cols());
+  Vector d = Vector::Ones(jacobian_crs->num_cols());
   for (auto _ : state) {
-    p.Update(jacobian_crs, d.data());
+    p.Update(*jacobian_crs, d.data());
   }
 }
 
@@ -154,19 +152,17 @@
   std::mt19937 prng;
 
   auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
   Preconditioner::Options preconditioner_options;
   ContextImpl context;
   preconditioner_options.context = &context;
   preconditioner_options.num_threads = static_cast<int>(state.range(0));
   context.EnsureMinimumThreads(preconditioner_options.num_threads);
-  BlockCRSJacobiPreconditioner p(preconditioner_options, jacobian_crs);
+  BlockCRSJacobiPreconditioner p(preconditioner_options, *jacobian_crs);
 
-  Vector d = Vector::Ones(jacobian_crs.num_cols());
+  Vector d = Vector::Ones(jacobian_crs->num_cols());
   for (auto _ : state) {
-    p.Update(jacobian_crs, d.data());
+    p.Update(*jacobian_crs, d.data());
   }
 }
 BENCHMARK(BM_BlockCRSJacobiPreconditionerUnstructured)
diff --git a/internal/ceres/block_random_access_sparse_matrix.cc b/internal/ceres/block_random_access_sparse_matrix.cc
index bd9f1ba..992bd01 100644
--- a/internal/ceres/block_random_access_sparse_matrix.cc
+++ b/internal/ceres/block_random_access_sparse_matrix.cc
@@ -53,53 +53,46 @@
   CHECK_LE(blocks.size(), std::numeric_limits<std::int32_t>::max());
 
   const int num_cols = NumScalarEntries(blocks);
+  const int num_blocks = blocks.size();
 
-  // Count the number of scalar non-zero entries and build the layout
-  // object for looking into the values array of the
-  // TripletSparseMatrix.
-  int num_nonzeros = 0;
-  for (const auto& block_pair : block_pairs) {
-    const int row_block_size = blocks_[block_pair.first].size;
-    const int col_block_size = blocks_[block_pair.second].size;
-    num_nonzeros += row_block_size * col_block_size;
+  std::vector<int> num_cells_at_row(num_blocks);
+  for (auto& p : block_pairs) {
+    ++num_cells_at_row[p.first];
   }
-
+  auto block_structure_ = new CompressedRowBlockStructure;
+  block_structure_->cols = blocks;
+  block_structure_->rows.resize(num_blocks);
+  auto p = block_pairs.begin();
+  int num_nonzeros = 0;
+  // Pairs of block indices are sorted lexicographically, thus pairs
+  // corresponding to a single row-block are stored in segments of index pairs
+  // with constant row-block index and increasing column-block index.
+  // CompressedRowBlockStructure is created by traversing block_pairs set.
+  for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
+    auto& row = block_structure_->rows[row_block_id];
+    row.block = blocks[row_block_id];
+    row.cells.reserve(num_cells_at_row[row_block_id]);
+    const int row_block_size = blocks[row_block_id].size;
+    // Process all index pairs corresponding to the current row block. Because
+    // index pairs are sorted lexicographically, cells are being appended to the
+    // current row-block till the first change in row-block index
+    for (; p != block_pairs.end() && row_block_id == p->first; ++p) {
+      const int col_block_id = p->second;
+      row.cells.emplace_back(col_block_id, num_nonzeros);
+      num_nonzeros += row_block_size * blocks[col_block_id].size;
+    }
+  }
+  bsm_ = std::make_unique<BlockSparseMatrix>(block_structure_);
   VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
           << num_nonzeros;
-
-  tsm_ =
-      std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros);
-  tsm_->set_num_nonzeros(num_nonzeros);
-  int* rows = tsm_->mutable_rows();
-  int* cols = tsm_->mutable_cols();
-  double* values = tsm_->mutable_values();
-
-  int pos = 0;
-  for (const auto& block_pair : block_pairs) {
-    const int row_block_size = blocks_[block_pair.first].size;
-    const int col_block_size = blocks_[block_pair.second].size;
-    cell_values_.emplace_back(block_pair, values + pos);
-    layout_[IntPairToInt64(block_pair.first, block_pair.second)] =
-        std::make_unique<CellInfo>(values + pos);
-    pos += row_block_size * col_block_size;
-  }
-
-  // Fill the sparsity pattern of the underlying matrix.
-  for (const auto& block_pair : block_pairs) {
-    const int row_block_id = block_pair.first;
-    const int col_block_id = block_pair.second;
-    const int row_block_size = blocks_[row_block_id].size;
-    const int col_block_size = blocks_[col_block_id].size;
-    int pos =
-        layout_[IntPairToInt64(row_block_id, col_block_id)]->values - values;
-    for (int r = 0; r < row_block_size; ++r) {
-      for (int c = 0; c < col_block_size; ++c, ++pos) {
-        rows[pos] = blocks_[row_block_id].position + r;
-        cols[pos] = blocks_[col_block_id].position + c;
-        values[pos] = 1.0;
-        DCHECK_LT(rows[pos], tsm_->num_rows());
-        DCHECK_LT(cols[pos], tsm_->num_rows());
-      }
+  double* values = bsm_->mutable_values();
+  for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
+    const auto& cells = block_structure_->rows[row_block_id].cells;
+    for (auto& c : cells) {
+      const int col_block_id = c.block_id;
+      double* const data = values + c.position;
+      layout_[IntPairToInt64(row_block_id, col_block_id)] =
+          std::make_unique<CellInfo>(data);
     }
   }
 }
@@ -126,35 +119,41 @@
 // Assume that the user does not hold any locks on any cell blocks
 // when they are calling SetZero.
 void BlockRandomAccessSparseMatrix::SetZero() {
-  ParallelSetZero(
-      context_, num_threads_, tsm_->mutable_values(), tsm_->num_nonzeros());
+  bsm_->SetZero(context_, num_threads_);
 }
 
 void BlockRandomAccessSparseMatrix::SymmetricRightMultiplyAndAccumulate(
     const double* x, double* y) const {
-  for (const auto& cell_position_and_data : cell_values_) {
-    const int row = cell_position_and_data.first.first;
-    const int row_block_size = blocks_[row].size;
-    const int row_block_pos = blocks_[row].position;
+  const auto bs = bsm_->block_structure();
+  const auto values = bsm_->values();
+  const int num_blocks = blocks_.size();
 
-    const int col = cell_position_and_data.first.second;
-    const int col_block_size = blocks_[col].size;
-    const int col_block_pos = blocks_[col].position;
+  for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
+    const auto& row_block = bs->rows[row_block_id];
+    const int row_block_size = row_block.block.size;
+    const int row_block_pos = row_block.block.position;
 
-    MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-        cell_position_and_data.second,
-        row_block_size,
-        col_block_size,
-        x + col_block_pos,
-        y + row_block_pos);
+    for (auto& c : row_block.cells) {
+      const int col_block_id = c.block_id;
+      const int col_block_size = blocks_[col_block_id].size;
+      const int col_block_pos = blocks_[col_block_id].position;
 
-    // Since the matrix is symmetric, but only the upper triangular
-    // part is stored, if the block being accessed is not a diagonal
-    // block, then use the same block to do the corresponding lower
-    // triangular multiply also.
-    if (row != col) {
+      MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+          values + c.position,
+          row_block_size,
+          col_block_size,
+          x + col_block_pos,
+          y + row_block_pos);
+      if (col_block_id == row_block_id) {
+        continue;
+      }
+
+      // Since the matrix is symmetric, but only the upper triangular
+      // part is stored, if the block being accessed is not a diagonal
+      // block, then use the same block to do the corresponding lower
+      // triangular multiply also
       MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-          cell_position_and_data.second,
+          values + c.position,
           row_block_size,
           col_block_size,
           x + row_block_pos,
diff --git a/internal/ceres/block_random_access_sparse_matrix.h b/internal/ceres/block_random_access_sparse_matrix.h
index a64a139..cb6f511 100644
--- a/internal/ceres/block_random_access_sparse_matrix.h
+++ b/internal/ceres/block_random_access_sparse_matrix.h
@@ -39,18 +39,18 @@
 #include <vector>
 
 #include "ceres/block_random_access_matrix.h"
+#include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/context_impl.h"
 #include "ceres/internal/disable_warnings.h"
 #include "ceres/internal/export.h"
 #include "ceres/small_blas.h"
-#include "ceres/triplet_sparse_matrix.h"
 #include "ceres/types.h"
 
 namespace ceres::internal {
 
 // A thread safe square block sparse implementation of
-// BlockRandomAccessMatrix. Internally a TripletSparseMatrix is used
+// BlockRandomAccessMatrix. Internally a BlockSparseMatrix is used
 // for doing the actual storage. This class augments this matrix with
 // an unordered_map that allows random read/write access.
 class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
@@ -81,19 +81,19 @@
   // locked.
   void SetZero() final;
 
-  // Assume that the matrix is symmetric and only one half of the
-  // matrix is stored.
+  // Assume that the matrix is symmetric and only one half of the matrix is
+  // stored.
   //
   // y += S * x
   void SymmetricRightMultiplyAndAccumulate(const double* x, double* y) const;
 
   // Since the matrix is square, num_rows() == num_cols().
-  int num_rows() const final { return tsm_->num_rows(); }
-  int num_cols() const final { return tsm_->num_cols(); }
+  int num_rows() const final { return bsm_->num_rows(); }
+  int num_cols() const final { return bsm_->num_cols(); }
 
   // Access to the underlying matrix object.
-  const TripletSparseMatrix* matrix() const { return tsm_.get(); }
-  TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
+  const BlockSparseMatrix* matrix() const { return bsm_.get(); }
+  BlockSparseMatrix* mutable_matrix() { return bsm_.get(); }
 
  private:
   int64_t IntPairToInt64(int row, int col) const {
@@ -117,12 +117,8 @@
   using LayoutType = std::unordered_map<int64_t, std::unique_ptr<CellInfo>>;
   LayoutType layout_;
 
-  // In order traversal of contents of the matrix. This allows us to
-  // implement a matrix-vector product which is 20% faster than using the
-  // iterator in the Layout object instead.
-  std::vector<std::pair<std::pair<int, int>, double*>> cell_values_;
   // The underlying matrix object which actually stores the cells.
-  std::unique_ptr<TripletSparseMatrix> tsm_;
+  std::unique_ptr<BlockSparseMatrix> bsm_;
 
   friend class BlockRandomAccessSparseMatrixTest;
 };
diff --git a/internal/ceres/block_random_access_sparse_matrix_test.cc b/internal/ceres/block_random_access_sparse_matrix_test.cc
index 2e7d439..f8afc1e 100644
--- a/internal/ceres/block_random_access_sparse_matrix_test.cc
+++ b/internal/ceres/block_random_access_sparse_matrix_test.cc
@@ -91,12 +91,11 @@
         Matrix::Ones(blocks[row_block_id].size, blocks[col_block_id].size);
   }
 
-  const TripletSparseMatrix* tsm = m.matrix();
-  EXPECT_EQ(tsm->num_nonzeros(), num_nonzeros);
-  EXPECT_EQ(tsm->max_num_nonzeros(), num_nonzeros);
+  const BlockSparseMatrix* bsm = m.matrix();
+  EXPECT_EQ(bsm->num_nonzeros(), num_nonzeros);
 
   Matrix dense;
-  tsm->ToDenseMatrix(&dense);
+  bsm->ToDenseMatrix(&dense);
 
   double kTolerance = 1e-14;
 
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index a60ee89..df2d9e1 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc
@@ -58,6 +58,120 @@
     rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
   }
 }
+
+template <bool transpose>
+std::unique_ptr<CompressedRowSparseMatrix>
+CreateStructureOfCompressedRowSparseMatrix(
+    const double* values,
+    int num_rows,
+    int num_cols,
+    int num_nonzeros,
+    const CompressedRowBlockStructure* block_structure) {
+  auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
+      num_rows, num_cols, num_nonzeros);
+  auto crs_cols = crs_matrix->mutable_cols();
+  auto crs_rows = crs_matrix->mutable_rows();
+  int value_offset = 0;
+  const int num_row_blocks = block_structure->rows.size();
+  const auto& cols = block_structure->cols;
+  *crs_rows++ = 0;
+  for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
+    const auto& row_block = block_structure->rows[row_block_id];
+    // Empty row block: only requires setting row offsets
+    if (row_block.cells.empty()) {
+      std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
+      crs_rows += row_block.block.size;
+      continue;
+    }
+
+    // Compute number of non-zeros per row from number of non-zeros in row-block
+    int row_block_nnz;
+    if constexpr (transpose) {
+      // Transposed block structure comes with nnz filled-in
+      row_block_nnz = row_block.nnz;
+    } else {
+      // Non-transposed block structure has sequential layout, but nnz field of
+      // block structure is not filled. Difference between values position of
+      // the first and the last cells in row-block gives number of non-zero
+      // elements in row-block excluding the last cell.
+      const auto& front = row_block.cells.front();
+      const auto& back = row_block.cells.back();
+      row_block_nnz =
+          (back.position - front.position) +
+          block_structure->cols[back.block_id].size * row_block.block.size;
+    }
+    const int row_nnz = row_block_nnz / row_block.block.size;
+
+    // Row-wise setup of matrix structure
+    for (int row = 0; row < row_block.block.size; ++row) {
+      value_offset += row_nnz;
+      *crs_rows++ = value_offset;
+      for (auto& c : row_block.cells) {
+        const int col_block_size = cols[c.block_id].size;
+        const int col_position = cols[c.block_id].position;
+        std::iota(crs_cols, crs_cols + col_block_size, col_position);
+        crs_cols += col_block_size;
+      }
+    }
+  }
+  return crs_matrix;
+}
+
+template <bool transpose>
+void UpdateCompressedRowSparseMatrixImpl(
+    CompressedRowSparseMatrix* crs_matrix,
+    const double* values,
+    const CompressedRowBlockStructure* block_structure) {
+  auto crs_values = crs_matrix->mutable_values();
+  auto crs_rows = crs_matrix->mutable_rows();
+  const int num_row_blocks = block_structure->rows.size();
+  const auto& cols = block_structure->cols;
+  for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
+    const auto& row_block = block_structure->rows[row_block_id];
+    const int row_block_size = row_block.block.size;
+    const int row_nnz = crs_rows[1] - crs_rows[0];
+    crs_rows += row_block_size;
+
+    if (row_nnz == 0) {
+      continue;
+    }
+
+    MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
+    int col_offset = 0;
+    for (auto& c : row_block.cells) {
+      const int col_block_size = cols[c.block_id].size;
+      auto crs_cell =
+          crs_row_block.block(0, col_offset, row_block_size, col_block_size);
+      if constexpr (transpose) {
+        // Transposed matrix is filled using transposed block-strucutre
+        ConstMatrixRef cell(
+            values + c.position, col_block_size, row_block_size);
+        crs_cell = cell.transpose();
+      } else {
+        ConstMatrixRef cell(
+            values + c.position, row_block_size, col_block_size);
+        crs_cell = cell;
+      }
+      col_offset += col_block_size;
+    }
+    crs_values += row_nnz * row_block_size;
+  }
+}
+
+void SetBlockStructureOfCompressedRowSparseMatrix(
+    CompressedRowSparseMatrix* crs_matrix,
+    CompressedRowBlockStructure* block_structure) {
+  const int num_row_blocks = block_structure->rows.size();
+  auto& row_blocks = *crs_matrix->mutable_row_blocks();
+  row_blocks.resize(num_row_blocks);
+  for (int i = 0; i < num_row_blocks; ++i) {
+    row_blocks[i] = block_structure->rows[i].block;
+  }
+
+  auto& col_blocks = *crs_matrix->mutable_col_blocks();
+  col_blocks = block_structure->cols;
+}
+
 }  // namespace
 
 BlockSparseMatrix::BlockSparseMatrix(
@@ -330,29 +444,47 @@
       transpose_bs->rows.data(),
       [](const CompressedRow& row) { return row.cumulative_nnz; });
 }
+std::unique_ptr<CompressedRowSparseMatrix>
+BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
+  auto bs = transpose_block_structure_.get();
+  auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
+      values(), num_cols_, num_rows_, num_nonzeros_, bs);
 
-void BlockSparseMatrix::ToCompressedRowSparseMatrix(
+  SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
+
+  UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
+  return crs_matrix;
+}
+
+std::unique_ptr<CompressedRowSparseMatrix>
+BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
+  auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
+      values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
+
+  SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
+                                               block_structure_.get());
+
+  UpdateCompressedRowSparseMatrix(crs_matrix.get());
+  return crs_matrix;
+}
+
+void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
     CompressedRowSparseMatrix* crs_matrix) const {
-  {
-    TripletSparseMatrix ts_matrix;
-    this->ToTripletSparseMatrix(&ts_matrix);
-    *crs_matrix =
-        *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix);
-  }
-
-  int num_row_blocks = block_structure_->rows.size();
-  auto& row_blocks = *crs_matrix->mutable_row_blocks();
-  row_blocks.resize(num_row_blocks);
-  for (int i = 0; i < num_row_blocks; ++i) {
-    row_blocks[i] = block_structure_->rows[i].block;
-  }
-
-  int num_col_blocks = block_structure_->cols.size();
-  auto& col_blocks = *crs_matrix->mutable_col_blocks();
-  col_blocks.resize(num_col_blocks);
-  for (int i = 0; i < num_col_blocks; ++i) {
-    col_blocks[i] = block_structure_->cols[i];
-  }
+  CHECK(crs_matrix != nullptr);
+  CHECK_EQ(crs_matrix->num_rows(), num_cols_);
+  CHECK_EQ(crs_matrix->num_cols(), num_rows_);
+  CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
+  UpdateCompressedRowSparseMatrixImpl<true>(
+      crs_matrix, values(), transpose_block_structure_.get());
+}
+void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
+    CompressedRowSparseMatrix* crs_matrix) const {
+  CHECK(crs_matrix != nullptr);
+  CHECK_EQ(crs_matrix->num_rows(), num_rows_);
+  CHECK_EQ(crs_matrix->num_cols(), num_cols_);
+  CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
+  UpdateCompressedRowSparseMatrixImpl<false>(
+      crs_matrix, values(), block_structure_.get());
 }
 
 void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
diff --git a/internal/ceres/block_sparse_matrix.h b/internal/ceres/block_sparse_matrix.h
index ed3146f..55f1cc4 100644
--- a/internal/ceres/block_sparse_matrix.h
+++ b/internal/ceres/block_sparse_matrix.h
@@ -91,7 +91,20 @@
   void ScaleColumns(const double* scale,
                     ContextImpl* context,
                     int num_threads) final;
-  void ToCompressedRowSparseMatrix(CompressedRowSparseMatrix* matrix) const;
+
+  // Convert to CompressedRowSparseMatrix
+  std::unique_ptr<CompressedRowSparseMatrix> ToCompressedRowSparseMatrix()
+      const;
+  // Create CompressedRowSparseMatrix corresponding to transposed matrix
+  std::unique_ptr<CompressedRowSparseMatrix>
+  ToCompressedRowSparseMatrixTranspose() const;
+  // Copy values to CompressedRowSparseMatrix that has compatible structure
+  void UpdateCompressedRowSparseMatrix(
+      CompressedRowSparseMatrix* crs_matrix) const;
+  // Copy values to CompressedRowSparseMatrix that has structure of transposed
+  // matrix
+  void UpdateCompressedRowSparseMatrixTranspose(
+      CompressedRowSparseMatrix* crs_matrix) const;
   void ToDenseMatrix(Matrix* dense_matrix) const final;
   void ToTextFile(FILE* file) const final;
 
diff --git a/internal/ceres/block_sparse_matrix_test.cc b/internal/ceres/block_sparse_matrix_test.cc
index 5036aa2..f749df3 100644
--- a/internal/ceres/block_sparse_matrix_test.cc
+++ b/internal/ceres/block_sparse_matrix_test.cc
@@ -482,38 +482,73 @@
 TEST(BlockSparseMatrix, ToCRSMatrix) {
   {
     std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(0);
-    CompressedRowSparseMatrix m_crs(
-        m->num_rows(), m->num_cols(), m->num_nonzeros());
-    m->ToCompressedRowSparseMatrix(&m_crs);
+    auto m_crs = m->ToCompressedRowSparseMatrix();
     std::vector<int> rows_expected = {0, 2, 4, 7, 10};
     std::vector<int> cols_expected = {0, 1, 0, 1, 2, 3, 4, 2, 3, 4};
     std::vector<double> values_expected = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
     for (int i = 0; i < rows_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.rows()[i], rows_expected[i]);
+      EXPECT_EQ(m_crs->rows()[i], rows_expected[i]);
     }
     for (int i = 0; i < cols_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.cols()[i], cols_expected[i]);
+      EXPECT_EQ(m_crs->cols()[i], cols_expected[i]);
     }
     for (int i = 0; i < values_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.values()[i], values_expected[i]);
+      EXPECT_EQ(m_crs->values()[i], values_expected[i]);
     }
   }
   {
     std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(1);
-    CompressedRowSparseMatrix m_crs(
-        m->num_rows(), m->num_cols(), m->num_nonzeros());
-    m->ToCompressedRowSparseMatrix(&m_crs);
+    auto m_crs = m->ToCompressedRowSparseMatrix();
     std::vector<int> rows_expected = {0, 4, 8, 9};
     std::vector<int> cols_expected = {0, 1, 3, 4, 0, 1, 3, 4, 2};
     std::vector<double> values_expected = {1, 2, 5, 6, 3, 4, 7, 8, 9};
     for (int i = 0; i < rows_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.rows()[i], rows_expected[i]);
+      EXPECT_EQ(m_crs->rows()[i], rows_expected[i]);
     }
     for (int i = 0; i < cols_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.cols()[i], cols_expected[i]);
+      EXPECT_EQ(m_crs->cols()[i], cols_expected[i]);
     }
     for (int i = 0; i < values_expected.size(); ++i) {
-      EXPECT_EQ(m_crs.values()[i], values_expected[i]);
+      EXPECT_EQ(m_crs->values()[i], values_expected[i]);
+    }
+  }
+}
+
+TEST(BlockSparseMatrix, ToCRSMatrixTranspose) {
+  {
+    std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(0);
+    auto m_crs_transpose = m->ToCompressedRowSparseMatrixTranspose();
+    std::vector<int> rows_expected = {0, 2, 4, 6, 8, 10, 10};
+    std::vector<int> cols_expected = {0, 1, 0, 1, 2, 3, 2, 3, 2, 3};
+    std::vector<double> values_expected = {1, 3, 2, 4, 5, 8, 6, 9, 7, 10};
+    EXPECT_EQ(m_crs_transpose->num_nonzeros(), cols_expected.size());
+    EXPECT_EQ(m_crs_transpose->num_rows(), rows_expected.size() - 1);
+    for (int i = 0; i < rows_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->rows()[i], rows_expected[i]);
+    }
+    for (int i = 0; i < cols_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->cols()[i], cols_expected[i]);
+    }
+    for (int i = 0; i < values_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->values()[i], values_expected[i]);
+    }
+  }
+  {
+    std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(1);
+    auto m_crs_transpose = m->ToCompressedRowSparseMatrixTranspose();
+    std::vector<int> rows_expected = {0, 2, 4, 5, 7, 9, 9};
+    std::vector<int> cols_expected = {0, 1, 0, 1, 2, 0, 1, 0, 1};
+    std::vector<double> values_expected = {1, 3, 2, 4, 9, 5, 7, 6, 8};
+    EXPECT_EQ(m_crs_transpose->num_nonzeros(), cols_expected.size());
+    EXPECT_EQ(m_crs_transpose->num_rows(), rows_expected.size() - 1);
+    for (int i = 0; i < rows_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->rows()[i], rows_expected[i]);
+    }
+    for (int i = 0; i < cols_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->cols()[i], cols_expected[i]);
+    }
+    for (int i = 0; i < values_expected.size(); ++i) {
+      EXPECT_EQ(m_crs_transpose->values()[i], values_expected[i]);
     }
   }
 }
diff --git a/internal/ceres/cuda_sparse_matrix_test.cc b/internal/ceres/cuda_sparse_matrix_test.cc
index a12ad40..74e74fd 100644
--- a/internal/ceres/cuda_sparse_matrix_test.cc
+++ b/internal/ceres/cuda_sparse_matrix_test.cc
@@ -80,10 +80,8 @@
 
 TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) {
   std::string message;
-  CompressedRowSparseMatrix A_crs(
-      A_->num_rows(), A_->num_cols(), A_->num_nonzeros());
-  A_->ToCompressedRowSparseMatrix(&A_crs);
-  CudaSparseMatrix A_gpu(&context_, A_crs);
+  auto A_crs = A_->ToCompressedRowSparseMatrix();
+  CudaSparseMatrix A_gpu(&context_, *A_crs);
   CudaVector x_gpu(&context_, A_gpu.num_cols());
   CudaVector res_gpu(&context_, A_gpu.num_rows());
   x_gpu.CopyFromCpu(x_);
diff --git a/internal/ceres/evaluation_benchmark.cc b/internal/ceres/evaluation_benchmark.cc
index 5dc1ef4..ae1a65d 100644
--- a/internal/ceres/evaluation_benchmark.cc
+++ b/internal/ceres/evaluation_benchmark.cc
@@ -128,13 +128,7 @@
   std::unique_ptr<CompressedRowSparseMatrix> CreateCompressedRowSparseJacobian(
       ContextImpl* context) {
     auto block_sparse = BlockSparseJacobian(context);
-    auto crs_jacobian = std::make_unique<CompressedRowSparseMatrix>(
-        block_sparse->num_rows(),
-        block_sparse->num_cols(),
-        block_sparse->num_nonzeros());
-
-    block_sparse->ToCompressedRowSparseMatrix(crs_jacobian.get());
-    return crs_jacobian;
+    return block_sparse->ToCompressedRowSparseMatrix();
   }
 
   const BlockSparseMatrix* BlockSparseJacobian(ContextImpl* context) {
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index aedc73a..e49d5bb 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -299,32 +299,36 @@
   summary.termination_type = LinearSolverTerminationType::SUCCESS;
   summary.message = "Success.";
 
-  const TripletSparseMatrix* tsm =
+  const BlockSparseMatrix* bsm =
       down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix();
-  if (tsm->num_rows() == 0) {
+  if (bsm->num_rows() == 0) {
     return summary;
   }
 
-  std::unique_ptr<CompressedRowSparseMatrix> lhs;
   const CompressedRowSparseMatrix::StorageType storage_type =
       sparse_cholesky_->StorageType();
   if (storage_type ==
       CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
-    lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
-    lhs->set_storage_type(
-        CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
+    if (!crs_lhs_) {
+      crs_lhs_ = bsm->ToCompressedRowSparseMatrix();
+      crs_lhs_->set_storage_type(
+          CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
+    } else {
+      bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get());
+    }
   } else {
-    lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
-    lhs->set_storage_type(
-        CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
+    if (!crs_lhs_) {
+      crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose();
+      crs_lhs_->set_storage_type(
+          CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
+    } else {
+      bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get());
+    }
   }
 
-  *lhs->mutable_col_blocks() = blocks_;
-  *lhs->mutable_row_blocks() = blocks_;
-
   summary.num_iterations = 1;
   summary.termination_type = sparse_cholesky_->FactorAndSolve(
-      lhs.get(), rhs().data(), solution, &summary.message);
+      crs_lhs_.get(), rhs().data(), solution, &summary.message);
   return summary;
 }
 
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h
index bb2cd88..9f6146b 100644
--- a/internal/ceres/schur_complement_solver.h
+++ b/internal/ceres/schur_complement_solver.h
@@ -186,6 +186,7 @@
   std::vector<Block> blocks_;
   std::unique_ptr<SparseCholesky> sparse_cholesky_;
   std::unique_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_;
+  std::unique_ptr<CompressedRowSparseMatrix> crs_lhs_;
   Vector cg_solution_;
   Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
 };
diff --git a/internal/ceres/spmv_benchmark.cc b/internal/ceres/spmv_benchmark.cc
index 2a6e679..4211784 100644
--- a/internal/ceres/spmv_benchmark.cc
+++ b/internal/ceres/spmv_benchmark.cc
@@ -183,21 +183,18 @@
   auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
 
-  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
-                                     bsm_jacobian->num_cols(),
-                                     bsm_jacobian->num_nonzeros());
-  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
 
   ContextImpl context;
   context.EnsureMinimumThreads(num_threads);
 
-  Vector x(jacobian.num_cols());
-  Vector y(jacobian.num_rows());
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
   x.setRandom();
   y.setRandom();
   double sum = 0;
   for (auto _ : state) {
-    jacobian.RightMultiplyAndAccumulate(
+    jacobian->RightMultiplyAndAccumulate(
         x.data(), y.data(), &context, num_threads);
     sum += y.norm();
   }
@@ -225,21 +222,18 @@
   std::mt19937 prng;
 
   auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
-  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
-                                     bsm_jacobian->num_cols(),
-                                     bsm_jacobian->num_nonzeros());
-  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
 
   ContextImpl context;
   context.EnsureMinimumThreads(num_threads);
 
-  Vector x(jacobian.num_cols());
-  Vector y(jacobian.num_rows());
+  Vector x(jacobian->num_cols());
+  Vector y(jacobian->num_rows());
   x.setRandom();
   y.setRandom();
   double sum = 0;
   for (auto _ : state) {
-    jacobian.RightMultiplyAndAccumulate(
+    jacobian->RightMultiplyAndAccumulate(
         x.data(), y.data(), &context, num_threads);
     sum += y.norm();
   }
@@ -258,19 +252,16 @@
   // Perform setup here
   auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
-  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
-                                     bsm_jacobian->num_cols(),
-                                     bsm_jacobian->num_nonzeros());
-  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
 
-  Vector x(jacobian.num_rows());
-  Vector y(jacobian.num_cols());
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
   x.setRandom();
   y.setRandom();
   double sum = 0;
   for (auto _ : state) {
     // This code gets timed
-    jacobian.LeftMultiplyAndAccumulate(x.data(), y.data());
+    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
     sum += y.norm();
   }
   CHECK_NE(sum, 0.0);
@@ -291,19 +282,16 @@
   std::mt19937 prng;
 
   auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
-  CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(),
-                                     bsm_jacobian->num_cols(),
-                                     bsm_jacobian->num_nonzeros());
-  bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian);
+  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
 
-  Vector x(jacobian.num_rows());
-  Vector y(jacobian.num_cols());
+  Vector x(jacobian->num_rows());
+  Vector y(jacobian->num_cols());
   x.setRandom();
   y.setRandom();
   double sum = 0;
   for (auto _ : state) {
     // This code gets timed
-    jacobian.LeftMultiplyAndAccumulate(x.data(), y.data());
+    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
     sum += y.norm();
   }
   CHECK_NE(sum, 0.0);
@@ -319,10 +307,8 @@
   ContextImpl context;
   std::string message;
   context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
+  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
   CudaVector cuda_x(&context, 0);
   CudaVector cuda_y(&context, 0);
 
@@ -360,10 +346,8 @@
   ContextImpl context;
   std::string message;
   context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
+  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
   CudaVector cuda_x(&context, 0);
   CudaVector cuda_y(&context, 0);
 
@@ -392,10 +376,8 @@
   ContextImpl context;
   std::string message;
   context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
+  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
   CudaVector cuda_x(&context, 0);
   CudaVector cuda_y(&context, 0);
 
@@ -433,10 +415,8 @@
   ContextImpl context;
   std::string message;
   context.InitCuda(&message);
-  CompressedRowSparseMatrix jacobian_crs(
-      jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros());
-  jacobian->ToCompressedRowSparseMatrix(&jacobian_crs);
-  CudaSparseMatrix cuda_jacobian(&context, jacobian_crs);
+  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
+  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
   CudaVector cuda_x(&context, 0);
   CudaVector cuda_y(&context, 0);
 
diff --git a/internal/ceres/visibility_based_preconditioner.cc b/internal/ceres/visibility_based_preconditioner.cc
index d2e4ee0..18bb864 100644
--- a/internal/ceres/visibility_based_preconditioner.cc
+++ b/internal/ceres/visibility_based_preconditioner.cc
@@ -392,27 +392,33 @@
 // Compute the sparse Cholesky factorization of the preconditioner
 // matrix.
 LinearSolverTerminationType VisibilityBasedPreconditioner::Factorize() {
-  // Extract the TripletSparseMatrix that is used for actually storing
+  // Extract the BlockSparseMatrix that is used for actually storing
   // S and convert it into a CompressedRowSparseMatrix.
-  const TripletSparseMatrix* tsm =
-      down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->mutable_matrix();
-
-  std::unique_ptr<CompressedRowSparseMatrix> lhs;
+  const BlockSparseMatrix* bsm =
+      down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->matrix();
   const CompressedRowSparseMatrix::StorageType storage_type =
       sparse_cholesky_->StorageType();
   if (storage_type ==
       CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
-    lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
-    lhs->set_storage_type(
-        CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
+    if (!m_crs_) {
+      m_crs_ = bsm->ToCompressedRowSparseMatrix();
+      m_crs_->set_storage_type(
+          CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
+    } else {
+      bsm->UpdateCompressedRowSparseMatrix(m_crs_.get());
+    }
   } else {
-    lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
-    lhs->set_storage_type(
-        CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
+    if (!m_crs_) {
+      m_crs_ = bsm->ToCompressedRowSparseMatrixTranspose();
+      m_crs_->set_storage_type(
+          CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
+    } else {
+      bsm->UpdateCompressedRowSparseMatrixTranspose(m_crs_.get());
+    }
   }
 
   std::string message;
-  return sparse_cholesky_->Factorize(lhs.get(), &message);
+  return sparse_cholesky_->Factorize(m_crs_.get(), &message);
 }
 
 void VisibilityBasedPreconditioner::RightMultiplyAndAccumulate(
diff --git a/internal/ceres/visibility_based_preconditioner.h b/internal/ceres/visibility_based_preconditioner.h
index 84773f8..96c9721 100644
--- a/internal/ceres/visibility_based_preconditioner.h
+++ b/internal/ceres/visibility_based_preconditioner.h
@@ -194,6 +194,7 @@
 
   // Preconditioner matrix.
   std::unique_ptr<BlockRandomAccessSparseMatrix> m_;
+  std::unique_ptr<CompressedRowSparseMatrix> m_crs_;
   std::unique_ptr<SparseCholesky> sparse_cholesky_;
 };