AddBlockStructureTranspose to BlockSparseMatrix Add structure of transposed matrix to BlockSparseMatrix Number of non-zero values per row block and cumulative non-zero values count are maintained for transposed structure Change-Id: Icf38bb7a734ca695c788579eece1c92d36d78e54

diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index 0dd3b20..0e05693 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc

@@ -46,6 +46,19 @@
 
 namespace ceres::internal {
 
+namespace {
+void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
+  if (!rows.size()) {
+    return;
+  }
+  rows[0].cumulative_nnz = rows[0].nnz;
+  for (int c = 1; c < rows.size(); ++c) {
+    const int curr_nnz = rows[c].nnz;
+    rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
+  }
+}
+}  // namespace
+
 BlockSparseMatrix::BlockSparseMatrix(
     CompressedRowBlockStructure* block_structure)
     : num_rows_(0),
@@ -83,6 +96,12 @@
   CHECK(values_ != nullptr);
 }
 
+void BlockSparseMatrix::AddTransposeBlockStructure() {
+  if (transpose_block_structure_ == nullptr) {
+    transpose_block_structure_ = CreateTranspose(*block_structure_);
+  }
+}
+
 void BlockSparseMatrix::SetZero() {
   std::fill(values_.get(), values_.get() + num_nonzeros_, 0.0);
 }
@@ -133,7 +152,6 @@
                                                   double* y) const {
   CHECK(x != nullptr);
   CHECK(y != nullptr);
-
   for (int i = 0; i < block_structure_->rows.size(); ++i) {
     int row_block_pos = block_structure_->rows[i].block.position;
     int row_block_size = block_structure_->rows[i].block.size;
@@ -267,6 +285,13 @@
   return block_structure_.get();
 }
 
+// Return a pointer to the block structure of matrix transpose. We continue to
+// hold ownership of the object though.
+const CompressedRowBlockStructure*
+BlockSparseMatrix::transpose_block_structure() const {
+  return transpose_block_structure_.get();
+}
+
 void BlockSparseMatrix::ToTextFile(FILE* file) const {
   CHECK(file != nullptr);
   for (int i = 0; i < block_structure_->rows.size(); ++i) {
@@ -337,16 +362,28 @@
 
   for (int i = 0; i < m_bs->rows.size(); ++i) {
     const CompressedRow& m_row = m_bs->rows[i];
-    CompressedRow& row = block_structure_->rows[old_num_row_blocks + i];
+    const int row_block_id = old_num_row_blocks + i;
+    CompressedRow& row = block_structure_->rows[row_block_id];
     row.block.size = m_row.block.size;
     row.block.position = num_rows_;
     num_rows_ += m_row.block.size;
     row.cells.resize(m_row.cells.size());
+    if (transpose_block_structure_) {
+      transpose_block_structure_->cols.emplace_back(row.block);
+    }
     for (int c = 0; c < m_row.cells.size(); ++c) {
       const int block_id = m_row.cells[c].block_id;
       row.cells[c].block_id = block_id;
       row.cells[c].position = num_nonzeros_;
-      num_nonzeros_ += m_row.block.size * m_bs->cols[block_id].size;
+
+      const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
+      if (transpose_block_structure_) {
+        transpose_block_structure_->rows[block_id].cells.emplace_back(
+            row_block_id, num_nonzeros_);
+        transpose_block_structure_->rows[block_id].nnz += cell_nnz;
+      }
+
+      num_nonzeros_ += cell_nnz;
     }
   }
 
@@ -360,10 +397,16 @@
   std::copy(m.values(),
             m.values() + m.num_nonzeros(),
             values_.get() + old_num_nonzeros);
+
+  if (transpose_block_structure_ == nullptr) {
+    return;
+  }
+  ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
 }
 
 void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
   const int num_row_blocks = block_structure_->rows.size();
+  const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
   int delta_num_nonzeros = 0;
   int delta_num_rows = 0;
   const std::vector<Block>& column_blocks = block_structure_->cols;
@@ -373,11 +416,34 @@
     for (int c = 0; c < row.cells.size(); ++c) {
       const Cell& cell = row.cells[c];
       delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
+
+      if (transpose_block_structure_) {
+        auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
+        while (!col_cells.empty() &&
+               col_cells.back().block_id >= new_num_row_blocks) {
+          const int del_block_id = col_cells.back().block_id;
+          const int del_block_rows =
+              block_structure_->rows[del_block_id].block.size;
+          const int del_block_cols = column_blocks[cell.block_id].size;
+          const int del_cell_nnz = del_block_rows * del_block_cols;
+          transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
+          col_cells.pop_back();
+        }
+      }
     }
   }
   num_nonzeros_ -= delta_num_nonzeros;
   num_rows_ -= delta_num_rows;
-  block_structure_->rows.resize(num_row_blocks - delta_row_blocks);
+  block_structure_->rows.resize(new_num_row_blocks);
+
+  if (transpose_block_structure_ == nullptr) {
+    return;
+  }
+  for (int i = 0; i < delta_row_blocks; ++i) {
+    transpose_block_structure_->cols.pop_back();
+  }
+
+  ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
 }
 
 std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
@@ -449,4 +515,30 @@
   return matrix;
 }
 
+std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
+    const CompressedRowBlockStructure& bs) {
+  auto transpose = std::make_unique<CompressedRowBlockStructure>();
+
+  transpose->rows.resize(bs.cols.size());
+  for (int i = 0; i < bs.cols.size(); ++i) {
+    transpose->rows[i].block = bs.cols[i];
+    transpose->rows[i].nnz = 0;
+  }
+
+  transpose->cols.resize(bs.rows.size());
+  for (int i = 0; i < bs.rows.size(); ++i) {
+    auto& row = bs.rows[i];
+    transpose->cols[i] = row.block;
+
+    const int nrows = row.block.size;
+    for (auto& cell : row.cells) {
+      transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
+      const int ncols = transpose->rows[cell.block_id].block.size;
+      transpose->rows[cell.block_id].nnz += nrows * ncols;
+    }
+  }
+  ComputeCumulativeNumberOfNonZeros(transpose->rows);
+  return transpose;
+}
+
 }  // namespace ceres::internal

diff --git a/internal/ceres/block_sparse_matrix.h b/internal/ceres/block_sparse_matrix.h
index 3a8dac9..b4abdc7 100644
--- a/internal/ceres/block_sparse_matrix.h
+++ b/internal/ceres/block_sparse_matrix.h

@@ -84,6 +84,8 @@
   void ToDenseMatrix(Matrix* dense_matrix) const final;
   void ToTextFile(FILE* file) const final;
 
+  void AddTransposeBlockStructure();
+
   // clang-format off
   int num_rows()         const final { return num_rows_;     }
   int num_cols()         const final { return num_cols_;     }
@@ -94,6 +96,7 @@
 
   void ToTripletSparseMatrix(TripletSparseMatrix* matrix) const;
   const CompressedRowBlockStructure* block_structure() const;
+  const CompressedRowBlockStructure* transpose_block_structure() const;
 
   // Append the contents of m to the bottom of this matrix. m must
   // have the same column blocks structure as this matrix.
@@ -137,6 +140,7 @@
   int max_num_nonzeros_;
   std::unique_ptr<double[]> values_;
   std::unique_ptr<CompressedRowBlockStructure> block_structure_;
+  std::unique_ptr<CompressedRowBlockStructure> transpose_block_structure_;
 };
 
 // A number of algorithms like the SchurEliminator do not need
@@ -164,6 +168,9 @@
   const double* values_;
 };
 
+std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
+    const CompressedRowBlockStructure& bs);
+
 }  // namespace ceres::internal
 
 #include "ceres/internal/reenable_warnings.h"

diff --git a/internal/ceres/block_sparse_matrix_test.cc b/internal/ceres/block_sparse_matrix_test.cc
index 3f5c78a..62bb0f4 100644
--- a/internal/ceres/block_sparse_matrix_test.cc
+++ b/internal/ceres/block_sparse_matrix_test.cc

@@ -227,6 +227,85 @@
   }
 }
 
+TEST_F(BlockSparseMatrixTest, AppendDeleteRowsTransposedStructure) {
+  auto problem = CreateLinearLeastSquaresProblemFromId(2);
+  std::unique_ptr<BlockSparseMatrix> m(
+      down_cast<BlockSparseMatrix*>(problem->A.release()));
+
+  A_->AddTransposeBlockStructure();
+  auto block_structure = A_->block_structure();
+
+  // Several AppendRows and DeleteRowBlocks operations are applied to matrix,
+  // with regular and transpose block structures being compared after each
+  // operation.
+  //
+  // Non-negative values encode number of row blocks to remove
+  // -1 encodes appending matrix m
+  const int num_row_blocks_to_delete[] = {0, -1, 1, -1, 8, -1, 10};
+  for (auto& t : num_row_blocks_to_delete) {
+    if (t == -1) {
+      A_->AppendRows(*m);
+    } else if (t > 0) {
+      CHECK_GE(block_structure->rows.size(), t);
+      A_->DeleteRowBlocks(t);
+    }
+
+    auto block_structure = A_->block_structure();
+    auto transpose_block_structure = A_->transpose_block_structure();
+    ASSERT_NE(block_structure, nullptr);
+    ASSERT_NE(transpose_block_structure, nullptr);
+
+    EXPECT_EQ(block_structure->rows.size(),
+              transpose_block_structure->cols.size());
+    EXPECT_EQ(block_structure->cols.size(),
+              transpose_block_structure->rows.size());
+
+    std::vector<int> nnz_col(transpose_block_structure->rows.size());
+    for (int i = 0; i < block_structure->cols.size(); ++i) {
+      EXPECT_EQ(block_structure->cols[i].position,
+                transpose_block_structure->rows[i].block.position);
+      const int col_size = transpose_block_structure->rows[i].block.size;
+      EXPECT_EQ(block_structure->cols[i].size, col_size);
+
+      for (auto& col_cell : transpose_block_structure->rows[i].cells) {
+        int matches = 0;
+        const int row_block_id = col_cell.block_id;
+        nnz_col[i] +=
+            col_size * transpose_block_structure->cols[row_block_id].size;
+        for (auto& row_cell : block_structure->rows[row_block_id].cells) {
+          if (row_cell.block_id != i) continue;
+          EXPECT_EQ(row_cell.position, col_cell.position);
+          ++matches;
+        }
+        EXPECT_EQ(matches, 1);
+      }
+      EXPECT_EQ(nnz_col[i], transpose_block_structure->rows[i].nnz);
+      if (i > 0) {
+        nnz_col[i] += nnz_col[i - 1];
+      }
+      EXPECT_EQ(nnz_col[i], transpose_block_structure->rows[i].cumulative_nnz);
+    }
+    for (int i = 0; i < block_structure->rows.size(); ++i) {
+      EXPECT_EQ(block_structure->rows[i].block.position,
+                transpose_block_structure->cols[i].position);
+      EXPECT_EQ(block_structure->rows[i].block.size,
+                transpose_block_structure->cols[i].size);
+
+      for (auto& row_cell : block_structure->rows[i].cells) {
+        int matches = 0;
+        const int col_block_id = row_cell.block_id;
+        for (auto& col_cell :
+             transpose_block_structure->rows[col_block_id].cells) {
+          if (col_cell.block_id != i) continue;
+          EXPECT_EQ(col_cell.position, row_cell.position);
+          ++matches;
+        }
+        EXPECT_EQ(matches, 1);
+      }
+    }
+  }
+}
+
 TEST_F(BlockSparseMatrixTest, AppendAndDeleteBlockDiagonalMatrix) {
   const std::vector<Block>& column_blocks = A_->block_structure()->cols;
   const int num_cols =
@@ -368,5 +447,46 @@
   }
 }
 
+TEST(BlockSparseMatrix, CreateTranspose) {
+  constexpr int kNumtrials = 10;
+  BlockSparseMatrix::RandomMatrixOptions options;
+  options.num_col_blocks = 10;
+  options.min_col_block_size = 1;
+  options.max_col_block_size = 3;
+
+  options.num_row_blocks = 20;
+  options.min_row_block_size = 1;
+  options.max_row_block_size = 4;
+  options.block_density = 0.25;
+  std::mt19937 prng;
+
+  for (int trial = 0; trial < kNumtrials; ++trial) {
+    auto a = BlockSparseMatrix::CreateRandomMatrix(options, prng);
+
+    auto ap_bs = std::make_unique<CompressedRowBlockStructure>();
+    *ap_bs = *a->block_structure();
+    BlockSparseMatrix ap(ap_bs.release());
+    std::copy_n(a->values(), a->num_nonzeros(), ap.mutable_values());
+    ap.AddTransposeBlockStructure();
+
+    Vector x = Vector::Random(a->num_cols());
+    Vector y = Vector::Random(a->num_rows());
+    Vector a_x = Vector::Zero(a->num_rows());
+    Vector a_t_y = Vector::Zero(a->num_cols());
+    Vector ap_x = Vector::Zero(a->num_rows());
+    Vector ap_t_y = Vector::Zero(a->num_cols());
+    a->RightMultiplyAndAccumulate(x.data(), a_x.data());
+    ap.RightMultiplyAndAccumulate(x.data(), ap_x.data());
+    EXPECT_NEAR((a_x - ap_x).norm() / a_x.norm(),
+                0.0,
+                std::numeric_limits<double>::epsilon());
+    a->LeftMultiplyAndAccumulate(y.data(), a_t_y.data());
+    ap.LeftMultiplyAndAccumulate(y.data(), ap_t_y.data());
+    EXPECT_NEAR((a_t_y - ap_t_y).norm() / a_t_y.norm(),
+                0.0,
+                std::numeric_limits<double>::epsilon());
+  }
+}
+
 }  // namespace internal
 }  // namespace ceres

diff --git a/internal/ceres/block_structure.h b/internal/ceres/block_structure.h
index 9ee10af..aaa4080 100644
--- a/internal/ceres/block_structure.h
+++ b/internal/ceres/block_structure.h

@@ -81,6 +81,11 @@
   explicit CompressedList(int num_cells) noexcept : cells(num_cells) {}
   Block block;
   std::vector<Cell> cells;
+  // Number of non-zeros in cells of this row block
+  int nnz{-1};
+  // Number of non-zeros in cells of this and every preceeding row block in
+  // block-sparse matrix
+  int cumulative_nnz{-1};
 };
 
 using CompressedRow = CompressedList;