Change storage in BlockRandomAccessSparseMatrix - TripletSparseMatrix in BlockRandomAccessSparseMatrix is replaced with BlockSparseMatrix - BlockSparseMatrix::ToCompressedRowSparseMatrix is performed in a direct sort-less way Change-Id: Ib951fda1b9394050e2c47a9721172c5e3c674801
diff --git a/internal/ceres/block_jacobi_preconditioner_benchmark.cc b/internal/ceres/block_jacobi_preconditioner_benchmark.cc index edd431d..b5c1613 100644 --- a/internal/ceres/block_jacobi_preconditioner_benchmark.cc +++ b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
@@ -85,19 +85,17 @@ auto jacobian = CreateFakeBundleAdjustmentJacobian( kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); Preconditioner::Options preconditioner_options; ContextImpl context; preconditioner_options.context = &context; preconditioner_options.num_threads = static_cast<int>(state.range(0)); context.EnsureMinimumThreads(preconditioner_options.num_threads); - BlockCRSJacobiPreconditioner p(preconditioner_options, jacobian_crs); + BlockCRSJacobiPreconditioner p(preconditioner_options, *jacobian_crs); - Vector d = Vector::Ones(jacobian_crs.num_cols()); + Vector d = Vector::Ones(jacobian_crs->num_cols()); for (auto _ : state) { - p.Update(jacobian_crs, d.data()); + p.Update(*jacobian_crs, d.data()); } } @@ -154,19 +152,17 @@ std::mt19937 prng; auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); Preconditioner::Options preconditioner_options; ContextImpl context; preconditioner_options.context = &context; preconditioner_options.num_threads = static_cast<int>(state.range(0)); context.EnsureMinimumThreads(preconditioner_options.num_threads); - BlockCRSJacobiPreconditioner p(preconditioner_options, jacobian_crs); + BlockCRSJacobiPreconditioner p(preconditioner_options, *jacobian_crs); - Vector d = Vector::Ones(jacobian_crs.num_cols()); + Vector d = Vector::Ones(jacobian_crs->num_cols()); for (auto _ : state) { - p.Update(jacobian_crs, d.data()); + p.Update(*jacobian_crs, d.data()); } } BENCHMARK(BM_BlockCRSJacobiPreconditionerUnstructured)
diff --git a/internal/ceres/block_random_access_sparse_matrix.cc b/internal/ceres/block_random_access_sparse_matrix.cc index bd9f1ba..992bd01 100644 --- a/internal/ceres/block_random_access_sparse_matrix.cc +++ b/internal/ceres/block_random_access_sparse_matrix.cc
@@ -53,53 +53,46 @@ CHECK_LE(blocks.size(), std::numeric_limits<std::int32_t>::max()); const int num_cols = NumScalarEntries(blocks); + const int num_blocks = blocks.size(); - // Count the number of scalar non-zero entries and build the layout - // object for looking into the values array of the - // TripletSparseMatrix. - int num_nonzeros = 0; - for (const auto& block_pair : block_pairs) { - const int row_block_size = blocks_[block_pair.first].size; - const int col_block_size = blocks_[block_pair.second].size; - num_nonzeros += row_block_size * col_block_size; + std::vector<int> num_cells_at_row(num_blocks); + for (auto& p : block_pairs) { + ++num_cells_at_row[p.first]; } - + auto block_structure_ = new CompressedRowBlockStructure; + block_structure_->cols = blocks; + block_structure_->rows.resize(num_blocks); + auto p = block_pairs.begin(); + int num_nonzeros = 0; + // Pairs of block indices are sorted lexicographically, thus pairs + // corresponding to a single row-block are stored in segments of index pairs + // with constant row-block index and increasing column-block index. + // CompressedRowBlockStructure is created by traversing block_pairs set. + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + auto& row = block_structure_->rows[row_block_id]; + row.block = blocks[row_block_id]; + row.cells.reserve(num_cells_at_row[row_block_id]); + const int row_block_size = blocks[row_block_id].size; + // Process all index pairs corresponding to the current row block. Because + // index pairs are sorted lexicographically, cells are being appended to the + // current row-block till the first change in row-block index + for (; p != block_pairs.end() && row_block_id == p->first; ++p) { + const int col_block_id = p->second; + row.cells.emplace_back(col_block_id, num_nonzeros); + num_nonzeros += row_block_size * blocks[col_block_id].size; + } + } + bsm_ = std::make_unique<BlockSparseMatrix>(block_structure_); VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] " << num_nonzeros; - - tsm_ = - std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros); - tsm_->set_num_nonzeros(num_nonzeros); - int* rows = tsm_->mutable_rows(); - int* cols = tsm_->mutable_cols(); - double* values = tsm_->mutable_values(); - - int pos = 0; - for (const auto& block_pair : block_pairs) { - const int row_block_size = blocks_[block_pair.first].size; - const int col_block_size = blocks_[block_pair.second].size; - cell_values_.emplace_back(block_pair, values + pos); - layout_[IntPairToInt64(block_pair.first, block_pair.second)] = - std::make_unique<CellInfo>(values + pos); - pos += row_block_size * col_block_size; - } - - // Fill the sparsity pattern of the underlying matrix. - for (const auto& block_pair : block_pairs) { - const int row_block_id = block_pair.first; - const int col_block_id = block_pair.second; - const int row_block_size = blocks_[row_block_id].size; - const int col_block_size = blocks_[col_block_id].size; - int pos = - layout_[IntPairToInt64(row_block_id, col_block_id)]->values - values; - for (int r = 0; r < row_block_size; ++r) { - for (int c = 0; c < col_block_size; ++c, ++pos) { - rows[pos] = blocks_[row_block_id].position + r; - cols[pos] = blocks_[col_block_id].position + c; - values[pos] = 1.0; - DCHECK_LT(rows[pos], tsm_->num_rows()); - DCHECK_LT(cols[pos], tsm_->num_rows()); - } + double* values = bsm_->mutable_values(); + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + const auto& cells = block_structure_->rows[row_block_id].cells; + for (auto& c : cells) { + const int col_block_id = c.block_id; + double* const data = values + c.position; + layout_[IntPairToInt64(row_block_id, col_block_id)] = + std::make_unique<CellInfo>(data); } } } @@ -126,35 +119,41 @@ // Assume that the user does not hold any locks on any cell blocks // when they are calling SetZero. void BlockRandomAccessSparseMatrix::SetZero() { - ParallelSetZero( - context_, num_threads_, tsm_->mutable_values(), tsm_->num_nonzeros()); + bsm_->SetZero(context_, num_threads_); } void BlockRandomAccessSparseMatrix::SymmetricRightMultiplyAndAccumulate( const double* x, double* y) const { - for (const auto& cell_position_and_data : cell_values_) { - const int row = cell_position_and_data.first.first; - const int row_block_size = blocks_[row].size; - const int row_block_pos = blocks_[row].position; + const auto bs = bsm_->block_structure(); + const auto values = bsm_->values(); + const int num_blocks = blocks_.size(); - const int col = cell_position_and_data.first.second; - const int col_block_size = blocks_[col].size; - const int col_block_pos = blocks_[col].position; + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + const auto& row_block = bs->rows[row_block_id]; + const int row_block_size = row_block.block.size; + const int row_block_pos = row_block.block.position; - MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( - cell_position_and_data.second, - row_block_size, - col_block_size, - x + col_block_pos, - y + row_block_pos); + for (auto& c : row_block.cells) { + const int col_block_id = c.block_id; + const int col_block_size = blocks_[col_block_id].size; + const int col_block_pos = blocks_[col_block_id].position; - // Since the matrix is symmetric, but only the upper triangular - // part is stored, if the block being accessed is not a diagonal - // block, then use the same block to do the corresponding lower - // triangular multiply also. - if (row != col) { + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + values + c.position, + row_block_size, + col_block_size, + x + col_block_pos, + y + row_block_pos); + if (col_block_id == row_block_id) { + continue; + } + + // Since the matrix is symmetric, but only the upper triangular + // part is stored, if the block being accessed is not a diagonal + // block, then use the same block to do the corresponding lower + // triangular multiply also MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( - cell_position_and_data.second, + values + c.position, row_block_size, col_block_size, x + row_block_pos,
diff --git a/internal/ceres/block_random_access_sparse_matrix.h b/internal/ceres/block_random_access_sparse_matrix.h index a64a139..cb6f511 100644 --- a/internal/ceres/block_random_access_sparse_matrix.h +++ b/internal/ceres/block_random_access_sparse_matrix.h
@@ -39,18 +39,18 @@ #include <vector> #include "ceres/block_random_access_matrix.h" +#include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" #include "ceres/small_blas.h" -#include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" namespace ceres::internal { // A thread safe square block sparse implementation of -// BlockRandomAccessMatrix. Internally a TripletSparseMatrix is used +// BlockRandomAccessMatrix. Internally a BlockSparseMatrix is used // for doing the actual storage. This class augments this matrix with // an unordered_map that allows random read/write access. class CERES_NO_EXPORT BlockRandomAccessSparseMatrix @@ -81,19 +81,19 @@ // locked. void SetZero() final; - // Assume that the matrix is symmetric and only one half of the - // matrix is stored. + // Assume that the matrix is symmetric and only one half of the matrix is + // stored. // // y += S * x void SymmetricRightMultiplyAndAccumulate(const double* x, double* y) const; // Since the matrix is square, num_rows() == num_cols(). - int num_rows() const final { return tsm_->num_rows(); } - int num_cols() const final { return tsm_->num_cols(); } + int num_rows() const final { return bsm_->num_rows(); } + int num_cols() const final { return bsm_->num_cols(); } // Access to the underlying matrix object. - const TripletSparseMatrix* matrix() const { return tsm_.get(); } - TripletSparseMatrix* mutable_matrix() { return tsm_.get(); } + const BlockSparseMatrix* matrix() const { return bsm_.get(); } + BlockSparseMatrix* mutable_matrix() { return bsm_.get(); } private: int64_t IntPairToInt64(int row, int col) const { @@ -117,12 +117,8 @@ using LayoutType = std::unordered_map<int64_t, std::unique_ptr<CellInfo>>; LayoutType layout_; - // In order traversal of contents of the matrix. This allows us to - // implement a matrix-vector product which is 20% faster than using the - // iterator in the Layout object instead. - std::vector<std::pair<std::pair<int, int>, double*>> cell_values_; // The underlying matrix object which actually stores the cells. - std::unique_ptr<TripletSparseMatrix> tsm_; + std::unique_ptr<BlockSparseMatrix> bsm_; friend class BlockRandomAccessSparseMatrixTest; };
diff --git a/internal/ceres/block_random_access_sparse_matrix_test.cc b/internal/ceres/block_random_access_sparse_matrix_test.cc index 2e7d439..f8afc1e 100644 --- a/internal/ceres/block_random_access_sparse_matrix_test.cc +++ b/internal/ceres/block_random_access_sparse_matrix_test.cc
@@ -91,12 +91,11 @@ Matrix::Ones(blocks[row_block_id].size, blocks[col_block_id].size); } - const TripletSparseMatrix* tsm = m.matrix(); - EXPECT_EQ(tsm->num_nonzeros(), num_nonzeros); - EXPECT_EQ(tsm->max_num_nonzeros(), num_nonzeros); + const BlockSparseMatrix* bsm = m.matrix(); + EXPECT_EQ(bsm->num_nonzeros(), num_nonzeros); Matrix dense; - tsm->ToDenseMatrix(&dense); + bsm->ToDenseMatrix(&dense); double kTolerance = 1e-14;
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc index a60ee89..df2d9e1 100644 --- a/internal/ceres/block_sparse_matrix.cc +++ b/internal/ceres/block_sparse_matrix.cc
@@ -58,6 +58,120 @@ rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz; } } + +template <bool transpose> +std::unique_ptr<CompressedRowSparseMatrix> +CreateStructureOfCompressedRowSparseMatrix( + const double* values, + int num_rows, + int num_cols, + int num_nonzeros, + const CompressedRowBlockStructure* block_structure) { + auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>( + num_rows, num_cols, num_nonzeros); + auto crs_cols = crs_matrix->mutable_cols(); + auto crs_rows = crs_matrix->mutable_rows(); + int value_offset = 0; + const int num_row_blocks = block_structure->rows.size(); + const auto& cols = block_structure->cols; + *crs_rows++ = 0; + for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { + const auto& row_block = block_structure->rows[row_block_id]; + // Empty row block: only requires setting row offsets + if (row_block.cells.empty()) { + std::fill(crs_rows, crs_rows + row_block.block.size, value_offset); + crs_rows += row_block.block.size; + continue; + } + + // Compute number of non-zeros per row from number of non-zeros in row-block + int row_block_nnz; + if constexpr (transpose) { + // Transposed block structure comes with nnz filled-in + row_block_nnz = row_block.nnz; + } else { + // Non-transposed block structure has sequential layout, but nnz field of + // block structure is not filled. Difference between values position of + // the first and the last cells in row-block gives number of non-zero + // elements in row-block excluding the last cell. + const auto& front = row_block.cells.front(); + const auto& back = row_block.cells.back(); + row_block_nnz = + (back.position - front.position) + + block_structure->cols[back.block_id].size * row_block.block.size; + } + const int row_nnz = row_block_nnz / row_block.block.size; + + // Row-wise setup of matrix structure + for (int row = 0; row < row_block.block.size; ++row) { + value_offset += row_nnz; + *crs_rows++ = value_offset; + for (auto& c : row_block.cells) { + const int col_block_size = cols[c.block_id].size; + const int col_position = cols[c.block_id].position; + std::iota(crs_cols, crs_cols + col_block_size, col_position); + crs_cols += col_block_size; + } + } + } + return crs_matrix; +} + +template <bool transpose> +void UpdateCompressedRowSparseMatrixImpl( + CompressedRowSparseMatrix* crs_matrix, + const double* values, + const CompressedRowBlockStructure* block_structure) { + auto crs_values = crs_matrix->mutable_values(); + auto crs_rows = crs_matrix->mutable_rows(); + const int num_row_blocks = block_structure->rows.size(); + const auto& cols = block_structure->cols; + for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { + const auto& row_block = block_structure->rows[row_block_id]; + const int row_block_size = row_block.block.size; + const int row_nnz = crs_rows[1] - crs_rows[0]; + crs_rows += row_block_size; + + if (row_nnz == 0) { + continue; + } + + MatrixRef crs_row_block(crs_values, row_block_size, row_nnz); + int col_offset = 0; + for (auto& c : row_block.cells) { + const int col_block_size = cols[c.block_id].size; + auto crs_cell = + crs_row_block.block(0, col_offset, row_block_size, col_block_size); + if constexpr (transpose) { + // Transposed matrix is filled using transposed block-strucutre + ConstMatrixRef cell( + values + c.position, col_block_size, row_block_size); + crs_cell = cell.transpose(); + } else { + ConstMatrixRef cell( + values + c.position, row_block_size, col_block_size); + crs_cell = cell; + } + col_offset += col_block_size; + } + crs_values += row_nnz * row_block_size; + } +} + +void SetBlockStructureOfCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix, + CompressedRowBlockStructure* block_structure) { + const int num_row_blocks = block_structure->rows.size(); + auto& row_blocks = *crs_matrix->mutable_row_blocks(); + row_blocks.resize(num_row_blocks); + for (int i = 0; i < num_row_blocks; ++i) { + row_blocks[i] = block_structure->rows[i].block; + } + + auto& col_blocks = *crs_matrix->mutable_col_blocks(); + col_blocks = block_structure->cols; +} + } // namespace BlockSparseMatrix::BlockSparseMatrix( @@ -330,29 +444,47 @@ transpose_bs->rows.data(), [](const CompressedRow& row) { return row.cumulative_nnz; }); } +std::unique_ptr<CompressedRowSparseMatrix> +BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const { + auto bs = transpose_block_structure_.get(); + auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>( + values(), num_cols_, num_rows_, num_nonzeros_, bs); -void BlockSparseMatrix::ToCompressedRowSparseMatrix( + SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs); + + UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get()); + return crs_matrix; +} + +std::unique_ptr<CompressedRowSparseMatrix> +BlockSparseMatrix::ToCompressedRowSparseMatrix() const { + auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>( + values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get()); + + SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), + block_structure_.get()); + + UpdateCompressedRowSparseMatrix(crs_matrix.get()); + return crs_matrix; +} + +void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose( CompressedRowSparseMatrix* crs_matrix) const { - { - TripletSparseMatrix ts_matrix; - this->ToTripletSparseMatrix(&ts_matrix); - *crs_matrix = - *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix); - } - - int num_row_blocks = block_structure_->rows.size(); - auto& row_blocks = *crs_matrix->mutable_row_blocks(); - row_blocks.resize(num_row_blocks); - for (int i = 0; i < num_row_blocks; ++i) { - row_blocks[i] = block_structure_->rows[i].block; - } - - int num_col_blocks = block_structure_->cols.size(); - auto& col_blocks = *crs_matrix->mutable_col_blocks(); - col_blocks.resize(num_col_blocks); - for (int i = 0; i < num_col_blocks; ++i) { - col_blocks[i] = block_structure_->cols[i]; - } + CHECK(crs_matrix != nullptr); + CHECK_EQ(crs_matrix->num_rows(), num_cols_); + CHECK_EQ(crs_matrix->num_cols(), num_rows_); + CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); + UpdateCompressedRowSparseMatrixImpl<true>( + crs_matrix, values(), transpose_block_structure_.get()); +} +void BlockSparseMatrix::UpdateCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix) const { + CHECK(crs_matrix != nullptr); + CHECK_EQ(crs_matrix->num_rows(), num_rows_); + CHECK_EQ(crs_matrix->num_cols(), num_cols_); + CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); + UpdateCompressedRowSparseMatrixImpl<false>( + crs_matrix, values(), block_structure_.get()); } void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
diff --git a/internal/ceres/block_sparse_matrix.h b/internal/ceres/block_sparse_matrix.h index ed3146f..55f1cc4 100644 --- a/internal/ceres/block_sparse_matrix.h +++ b/internal/ceres/block_sparse_matrix.h
@@ -91,7 +91,20 @@ void ScaleColumns(const double* scale, ContextImpl* context, int num_threads) final; - void ToCompressedRowSparseMatrix(CompressedRowSparseMatrix* matrix) const; + + // Convert to CompressedRowSparseMatrix + std::unique_ptr<CompressedRowSparseMatrix> ToCompressedRowSparseMatrix() + const; + // Create CompressedRowSparseMatrix corresponding to transposed matrix + std::unique_ptr<CompressedRowSparseMatrix> + ToCompressedRowSparseMatrixTranspose() const; + // Copy values to CompressedRowSparseMatrix that has compatible structure + void UpdateCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix) const; + // Copy values to CompressedRowSparseMatrix that has structure of transposed + // matrix + void UpdateCompressedRowSparseMatrixTranspose( + CompressedRowSparseMatrix* crs_matrix) const; void ToDenseMatrix(Matrix* dense_matrix) const final; void ToTextFile(FILE* file) const final;
diff --git a/internal/ceres/block_sparse_matrix_test.cc b/internal/ceres/block_sparse_matrix_test.cc index 5036aa2..f749df3 100644 --- a/internal/ceres/block_sparse_matrix_test.cc +++ b/internal/ceres/block_sparse_matrix_test.cc
@@ -482,38 +482,73 @@ TEST(BlockSparseMatrix, ToCRSMatrix) { { std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(0); - CompressedRowSparseMatrix m_crs( - m->num_rows(), m->num_cols(), m->num_nonzeros()); - m->ToCompressedRowSparseMatrix(&m_crs); + auto m_crs = m->ToCompressedRowSparseMatrix(); std::vector<int> rows_expected = {0, 2, 4, 7, 10}; std::vector<int> cols_expected = {0, 1, 0, 1, 2, 3, 4, 2, 3, 4}; std::vector<double> values_expected = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; for (int i = 0; i < rows_expected.size(); ++i) { - EXPECT_EQ(m_crs.rows()[i], rows_expected[i]); + EXPECT_EQ(m_crs->rows()[i], rows_expected[i]); } for (int i = 0; i < cols_expected.size(); ++i) { - EXPECT_EQ(m_crs.cols()[i], cols_expected[i]); + EXPECT_EQ(m_crs->cols()[i], cols_expected[i]); } for (int i = 0; i < values_expected.size(); ++i) { - EXPECT_EQ(m_crs.values()[i], values_expected[i]); + EXPECT_EQ(m_crs->values()[i], values_expected[i]); } } { std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(1); - CompressedRowSparseMatrix m_crs( - m->num_rows(), m->num_cols(), m->num_nonzeros()); - m->ToCompressedRowSparseMatrix(&m_crs); + auto m_crs = m->ToCompressedRowSparseMatrix(); std::vector<int> rows_expected = {0, 4, 8, 9}; std::vector<int> cols_expected = {0, 1, 3, 4, 0, 1, 3, 4, 2}; std::vector<double> values_expected = {1, 2, 5, 6, 3, 4, 7, 8, 9}; for (int i = 0; i < rows_expected.size(); ++i) { - EXPECT_EQ(m_crs.rows()[i], rows_expected[i]); + EXPECT_EQ(m_crs->rows()[i], rows_expected[i]); } for (int i = 0; i < cols_expected.size(); ++i) { - EXPECT_EQ(m_crs.cols()[i], cols_expected[i]); + EXPECT_EQ(m_crs->cols()[i], cols_expected[i]); } for (int i = 0; i < values_expected.size(); ++i) { - EXPECT_EQ(m_crs.values()[i], values_expected[i]); + EXPECT_EQ(m_crs->values()[i], values_expected[i]); + } + } +} + +TEST(BlockSparseMatrix, ToCRSMatrixTranspose) { + { + std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(0); + auto m_crs_transpose = m->ToCompressedRowSparseMatrixTranspose(); + std::vector<int> rows_expected = {0, 2, 4, 6, 8, 10, 10}; + std::vector<int> cols_expected = {0, 1, 0, 1, 2, 3, 2, 3, 2, 3}; + std::vector<double> values_expected = {1, 3, 2, 4, 5, 8, 6, 9, 7, 10}; + EXPECT_EQ(m_crs_transpose->num_nonzeros(), cols_expected.size()); + EXPECT_EQ(m_crs_transpose->num_rows(), rows_expected.size() - 1); + for (int i = 0; i < rows_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->rows()[i], rows_expected[i]); + } + for (int i = 0; i < cols_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->cols()[i], cols_expected[i]); + } + for (int i = 0; i < values_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->values()[i], values_expected[i]); + } + } + { + std::unique_ptr<BlockSparseMatrix> m = CreateTestMatrixFromId(1); + auto m_crs_transpose = m->ToCompressedRowSparseMatrixTranspose(); + std::vector<int> rows_expected = {0, 2, 4, 5, 7, 9, 9}; + std::vector<int> cols_expected = {0, 1, 0, 1, 2, 0, 1, 0, 1}; + std::vector<double> values_expected = {1, 3, 2, 4, 9, 5, 7, 6, 8}; + EXPECT_EQ(m_crs_transpose->num_nonzeros(), cols_expected.size()); + EXPECT_EQ(m_crs_transpose->num_rows(), rows_expected.size() - 1); + for (int i = 0; i < rows_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->rows()[i], rows_expected[i]); + } + for (int i = 0; i < cols_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->cols()[i], cols_expected[i]); + } + for (int i = 0; i < values_expected.size(); ++i) { + EXPECT_EQ(m_crs_transpose->values()[i], values_expected[i]); } } }
diff --git a/internal/ceres/cuda_sparse_matrix_test.cc b/internal/ceres/cuda_sparse_matrix_test.cc index a12ad40..74e74fd 100644 --- a/internal/ceres/cuda_sparse_matrix_test.cc +++ b/internal/ceres/cuda_sparse_matrix_test.cc
@@ -80,10 +80,8 @@ TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) { std::string message; - CompressedRowSparseMatrix A_crs( - A_->num_rows(), A_->num_cols(), A_->num_nonzeros()); - A_->ToCompressedRowSparseMatrix(&A_crs); - CudaSparseMatrix A_gpu(&context_, A_crs); + auto A_crs = A_->ToCompressedRowSparseMatrix(); + CudaSparseMatrix A_gpu(&context_, *A_crs); CudaVector x_gpu(&context_, A_gpu.num_cols()); CudaVector res_gpu(&context_, A_gpu.num_rows()); x_gpu.CopyFromCpu(x_);
diff --git a/internal/ceres/evaluation_benchmark.cc b/internal/ceres/evaluation_benchmark.cc index 5dc1ef4..ae1a65d 100644 --- a/internal/ceres/evaluation_benchmark.cc +++ b/internal/ceres/evaluation_benchmark.cc
@@ -128,13 +128,7 @@ std::unique_ptr<CompressedRowSparseMatrix> CreateCompressedRowSparseJacobian( ContextImpl* context) { auto block_sparse = BlockSparseJacobian(context); - auto crs_jacobian = std::make_unique<CompressedRowSparseMatrix>( - block_sparse->num_rows(), - block_sparse->num_cols(), - block_sparse->num_nonzeros()); - - block_sparse->ToCompressedRowSparseMatrix(crs_jacobian.get()); - return crs_jacobian; + return block_sparse->ToCompressedRowSparseMatrix(); } const BlockSparseMatrix* BlockSparseJacobian(ContextImpl* context) {
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index aedc73a..e49d5bb 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc
@@ -299,32 +299,36 @@ summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; - const TripletSparseMatrix* tsm = + const BlockSparseMatrix* bsm = down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix(); - if (tsm->num_rows() == 0) { + if (bsm->num_rows() == 0) { return summary; } - std::unique_ptr<CompressedRowSparseMatrix> lhs; const CompressedRowSparseMatrix::StorageType storage_type = sparse_cholesky_->StorageType(); if (storage_type == CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm); - lhs->set_storage_type( - CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + if (!crs_lhs_) { + crs_lhs_ = bsm->ToCompressedRowSparseMatrix(); + crs_lhs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get()); + } } else { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm); - lhs->set_storage_type( - CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + if (!crs_lhs_) { + crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose(); + crs_lhs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get()); + } } - *lhs->mutable_col_blocks() = blocks_; - *lhs->mutable_row_blocks() = blocks_; - summary.num_iterations = 1; summary.termination_type = sparse_cholesky_->FactorAndSolve( - lhs.get(), rhs().data(), solution, &summary.message); + crs_lhs_.get(), rhs().data(), solution, &summary.message); return summary; }
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h index bb2cd88..9f6146b 100644 --- a/internal/ceres/schur_complement_solver.h +++ b/internal/ceres/schur_complement_solver.h
@@ -186,6 +186,7 @@ std::vector<Block> blocks_; std::unique_ptr<SparseCholesky> sparse_cholesky_; std::unique_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_; + std::unique_ptr<CompressedRowSparseMatrix> crs_lhs_; Vector cg_solution_; Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr}; };
diff --git a/internal/ceres/spmv_benchmark.cc b/internal/ceres/spmv_benchmark.cc index 2a6e679..4211784 100644 --- a/internal/ceres/spmv_benchmark.cc +++ b/internal/ceres/spmv_benchmark.cc
@@ -183,21 +183,18 @@ auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); - CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), - bsm_jacobian->num_cols(), - bsm_jacobian->num_nonzeros()); - bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix(); ContextImpl context; context.EnsureMinimumThreads(num_threads); - Vector x(jacobian.num_cols()); - Vector y(jacobian.num_rows()); + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); x.setRandom(); y.setRandom(); double sum = 0; for (auto _ : state) { - jacobian.RightMultiplyAndAccumulate( + jacobian->RightMultiplyAndAccumulate( x.data(), y.data(), &context, num_threads); sum += y.norm(); } @@ -225,21 +222,18 @@ std::mt19937 prng; auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); - CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), - bsm_jacobian->num_cols(), - bsm_jacobian->num_nonzeros()); - bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix(); ContextImpl context; context.EnsureMinimumThreads(num_threads); - Vector x(jacobian.num_cols()); - Vector y(jacobian.num_rows()); + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); x.setRandom(); y.setRandom(); double sum = 0; for (auto _ : state) { - jacobian.RightMultiplyAndAccumulate( + jacobian->RightMultiplyAndAccumulate( x.data(), y.data(), &context, num_threads); sum += y.norm(); } @@ -258,19 +252,16 @@ // Perform setup here auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); - CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), - bsm_jacobian->num_cols(), - bsm_jacobian->num_nonzeros()); - bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix(); - Vector x(jacobian.num_rows()); - Vector y(jacobian.num_cols()); + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); x.setRandom(); y.setRandom(); double sum = 0; for (auto _ : state) { // This code gets timed - jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); + jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); sum += y.norm(); } CHECK_NE(sum, 0.0); @@ -291,19 +282,16 @@ std::mt19937 prng; auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); - CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), - bsm_jacobian->num_cols(), - bsm_jacobian->num_nonzeros()); - bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix(); - Vector x(jacobian.num_rows()); - Vector y(jacobian.num_cols()); + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); x.setRandom(); y.setRandom(); double sum = 0; for (auto _ : state) { // This code gets timed - jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); + jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); sum += y.norm(); } CHECK_NE(sum, 0.0); @@ -319,10 +307,8 @@ ContextImpl context; std::string message; context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); + CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs); CudaVector cuda_x(&context, 0); CudaVector cuda_y(&context, 0); @@ -360,10 +346,8 @@ ContextImpl context; std::string message; context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); + CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs); CudaVector cuda_x(&context, 0); CudaVector cuda_y(&context, 0); @@ -392,10 +376,8 @@ ContextImpl context; std::string message; context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); + CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs); CudaVector cuda_x(&context, 0); CudaVector cuda_y(&context, 0); @@ -433,10 +415,8 @@ ContextImpl context; std::string message; context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix(); + CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs); CudaVector cuda_x(&context, 0); CudaVector cuda_y(&context, 0);
diff --git a/internal/ceres/visibility_based_preconditioner.cc b/internal/ceres/visibility_based_preconditioner.cc index d2e4ee0..18bb864 100644 --- a/internal/ceres/visibility_based_preconditioner.cc +++ b/internal/ceres/visibility_based_preconditioner.cc
@@ -392,27 +392,33 @@ // Compute the sparse Cholesky factorization of the preconditioner // matrix. LinearSolverTerminationType VisibilityBasedPreconditioner::Factorize() { - // Extract the TripletSparseMatrix that is used for actually storing + // Extract the BlockSparseMatrix that is used for actually storing // S and convert it into a CompressedRowSparseMatrix. - const TripletSparseMatrix* tsm = - down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->mutable_matrix(); - - std::unique_ptr<CompressedRowSparseMatrix> lhs; + const BlockSparseMatrix* bsm = + down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->matrix(); const CompressedRowSparseMatrix::StorageType storage_type = sparse_cholesky_->StorageType(); if (storage_type == CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm); - lhs->set_storage_type( - CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + if (!m_crs_) { + m_crs_ = bsm->ToCompressedRowSparseMatrix(); + m_crs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrix(m_crs_.get()); + } } else { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm); - lhs->set_storage_type( - CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + if (!m_crs_) { + m_crs_ = bsm->ToCompressedRowSparseMatrixTranspose(); + m_crs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrixTranspose(m_crs_.get()); + } } std::string message; - return sparse_cholesky_->Factorize(lhs.get(), &message); + return sparse_cholesky_->Factorize(m_crs_.get(), &message); } void VisibilityBasedPreconditioner::RightMultiplyAndAccumulate(
diff --git a/internal/ceres/visibility_based_preconditioner.h b/internal/ceres/visibility_based_preconditioner.h index 84773f8..96c9721 100644 --- a/internal/ceres/visibility_based_preconditioner.h +++ b/internal/ceres/visibility_based_preconditioner.h
@@ -194,6 +194,7 @@ // Preconditioner matrix. std::unique_ptr<BlockRandomAccessSparseMatrix> m_; + std::unique_ptr<CompressedRowSparseMatrix> m_crs_; std::unique_ptr<SparseCholesky> sparse_cholesky_; };