| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2022 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: sameeragarwal@google.com (Sameer Agarwal) |
| |
| #include "ceres/block_sparse_matrix.h" |
| |
| #include <algorithm> |
| #include <cstddef> |
| #include <memory> |
| #include <numeric> |
| #include <random> |
| #include <vector> |
| |
| #include "ceres/block_structure.h" |
| #include "ceres/crs_matrix.h" |
| #include "ceres/internal/eigen.h" |
| #include "ceres/parallel_for.h" |
| #include "ceres/parallel_vector_ops.h" |
| #include "ceres/small_blas.h" |
| #include "ceres/triplet_sparse_matrix.h" |
| #include "glog/logging.h" |
| |
| #ifndef CERES_NO_CUDA |
| #include "cuda_runtime.h" |
| #endif |
| |
| namespace ceres::internal { |
| |
| namespace { |
| void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) { |
| if (rows.empty()) { |
| return; |
| } |
| rows[0].cumulative_nnz = rows[0].nnz; |
| for (int c = 1; c < rows.size(); ++c) { |
| const int curr_nnz = rows[c].nnz; |
| rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz; |
| } |
| } |
| |
| template <bool transpose> |
| std::unique_ptr<CompressedRowSparseMatrix> |
| CreateStructureOfCompressedRowSparseMatrix( |
| const double* values, |
| int num_rows, |
| int num_cols, |
| int num_nonzeros, |
| const CompressedRowBlockStructure* block_structure) { |
| auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>( |
| num_rows, num_cols, num_nonzeros); |
| auto crs_cols = crs_matrix->mutable_cols(); |
| auto crs_rows = crs_matrix->mutable_rows(); |
| int value_offset = 0; |
| const int num_row_blocks = block_structure->rows.size(); |
| const auto& cols = block_structure->cols; |
| *crs_rows++ = 0; |
| for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { |
| const auto& row_block = block_structure->rows[row_block_id]; |
| // Empty row block: only requires setting row offsets |
| if (row_block.cells.empty()) { |
| std::fill(crs_rows, crs_rows + row_block.block.size, value_offset); |
| crs_rows += row_block.block.size; |
| continue; |
| } |
| |
| int row_nnz = 0; |
| if constexpr (transpose) { |
| // Transposed block structure comes with nnz in row-block filled-in |
| row_nnz = row_block.nnz / row_block.block.size; |
| } else { |
| // Nnz field of non-transposed block structure is not filled and it can |
| // have non-sequential structure (consider the case of jacobian for |
| // Schur-complement solver: E and F blocks are stored separately). |
| for (auto& c : row_block.cells) { |
| row_nnz += cols[c.block_id].size; |
| } |
| } |
| |
| // Row-wise setup of matrix structure |
| for (int row = 0; row < row_block.block.size; ++row) { |
| value_offset += row_nnz; |
| *crs_rows++ = value_offset; |
| for (auto& c : row_block.cells) { |
| const int col_block_size = cols[c.block_id].size; |
| const int col_position = cols[c.block_id].position; |
| std::iota(crs_cols, crs_cols + col_block_size, col_position); |
| crs_cols += col_block_size; |
| } |
| } |
| } |
| return crs_matrix; |
| } |
| |
| template <bool transpose> |
| void UpdateCompressedRowSparseMatrixImpl( |
| CompressedRowSparseMatrix* crs_matrix, |
| const double* values, |
| const CompressedRowBlockStructure* block_structure) { |
| auto crs_values = crs_matrix->mutable_values(); |
| auto crs_rows = crs_matrix->mutable_rows(); |
| const int num_row_blocks = block_structure->rows.size(); |
| const auto& cols = block_structure->cols; |
| for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { |
| const auto& row_block = block_structure->rows[row_block_id]; |
| const int row_block_size = row_block.block.size; |
| const int row_nnz = crs_rows[1] - crs_rows[0]; |
| crs_rows += row_block_size; |
| |
| if (row_nnz == 0) { |
| continue; |
| } |
| |
| MatrixRef crs_row_block(crs_values, row_block_size, row_nnz); |
| int col_offset = 0; |
| for (auto& c : row_block.cells) { |
| const int col_block_size = cols[c.block_id].size; |
| auto crs_cell = |
| crs_row_block.block(0, col_offset, row_block_size, col_block_size); |
| if constexpr (transpose) { |
| // Transposed matrix is filled using transposed block-strucutre |
| ConstMatrixRef cell( |
| values + c.position, col_block_size, row_block_size); |
| crs_cell = cell.transpose(); |
| } else { |
| ConstMatrixRef cell( |
| values + c.position, row_block_size, col_block_size); |
| crs_cell = cell; |
| } |
| col_offset += col_block_size; |
| } |
| crs_values += row_nnz * row_block_size; |
| } |
| } |
| |
| void SetBlockStructureOfCompressedRowSparseMatrix( |
| CompressedRowSparseMatrix* crs_matrix, |
| CompressedRowBlockStructure* block_structure) { |
| const int num_row_blocks = block_structure->rows.size(); |
| auto& row_blocks = *crs_matrix->mutable_row_blocks(); |
| row_blocks.resize(num_row_blocks); |
| for (int i = 0; i < num_row_blocks; ++i) { |
| row_blocks[i] = block_structure->rows[i].block; |
| } |
| |
| auto& col_blocks = *crs_matrix->mutable_col_blocks(); |
| col_blocks = block_structure->cols; |
| } |
| |
| } // namespace |
| |
| BlockSparseMatrix::BlockSparseMatrix( |
| CompressedRowBlockStructure* block_structure, bool use_page_locked_memory) |
| : use_page_locked_memory_(use_page_locked_memory), |
| num_rows_(0), |
| num_cols_(0), |
| num_nonzeros_(0), |
| block_structure_(block_structure) { |
| CHECK(block_structure_ != nullptr); |
| |
| // Count the number of columns in the matrix. |
| for (auto& col : block_structure_->cols) { |
| num_cols_ += col.size; |
| } |
| |
| // Count the number of non-zero entries and the number of rows in |
| // the matrix. |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_size = block_structure_->rows[i].block.size; |
| num_rows_ += row_block_size; |
| |
| const std::vector<Cell>& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| num_nonzeros_ += col_block_size * row_block_size; |
| } |
| } |
| |
| CHECK_GE(num_rows_, 0); |
| CHECK_GE(num_cols_, 0); |
| CHECK_GE(num_nonzeros_, 0); |
| VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double) |
| << " bytes."; // NOLINT |
| |
| values_ = AllocateValues(num_nonzeros_); |
| max_num_nonzeros_ = num_nonzeros_; |
| CHECK(values_ != nullptr); |
| AddTransposeBlockStructure(); |
| } |
| |
| BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); } |
| |
| void BlockSparseMatrix::AddTransposeBlockStructure() { |
| if (transpose_block_structure_ == nullptr) { |
| transpose_block_structure_ = CreateTranspose(*block_structure_); |
| } |
| } |
| |
| void BlockSparseMatrix::SetZero() { |
| std::fill(values_, values_ + num_nonzeros_, 0.0); |
| } |
| |
| void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) { |
| ParallelSetZero(context, num_threads, values_, num_nonzeros_); |
| } |
| |
| void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, |
| double* y) const { |
| RightMultiplyAndAccumulate(x, y, nullptr, 1); |
| } |
| |
| void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, |
| double* y, |
| ContextImpl* context, |
| int num_threads) const { |
| CHECK(x != nullptr); |
| CHECK(y != nullptr); |
| |
| const auto values = values_; |
| const auto block_structure = block_structure_.get(); |
| const auto num_row_blocks = block_structure->rows.size(); |
| |
| ParallelFor(context, |
| 0, |
| num_row_blocks, |
| num_threads, |
| [values, block_structure, x, y](int row_block_id) { |
| const int row_block_pos = |
| block_structure->rows[row_block_id].block.position; |
| const int row_block_size = |
| block_structure->rows[row_block_id].block.size; |
| const auto& cells = block_structure->rows[row_block_id].cells; |
| for (const auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_size = |
| block_structure->cols[col_block_id].size; |
| const int col_block_pos = |
| block_structure->cols[col_block_id].position; |
| MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, |
| row_block_size, |
| col_block_size, |
| x + col_block_pos, |
| y + row_block_pos); |
| } |
| }); |
| } |
| |
| // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| // might benefit from caching column-block partition |
| void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, |
| double* y, |
| ContextImpl* context, |
| int num_threads) const { |
| // While utilizing transposed structure allows to perform parallel |
| // left-multiplication by dense vector, it makes access patterns to matrix |
| // elements scattered. Thus, multiplication using transposed structure |
| // is only useful for parallel execution |
| CHECK(x != nullptr); |
| CHECK(y != nullptr); |
| if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| LeftMultiplyAndAccumulate(x, y); |
| return; |
| } |
| |
| auto transpose_bs = transpose_block_structure_.get(); |
| const auto values = values_; |
| const int num_col_blocks = transpose_bs->rows.size(); |
| if (!num_col_blocks) { |
| return; |
| } |
| |
| // Use non-zero count as iteration cost for guided parallel-for loop |
| ParallelFor( |
| context, |
| 0, |
| num_col_blocks, |
| num_threads, |
| [values, transpose_bs, x, y](int row_block_id) { |
| int row_block_pos = transpose_bs->rows[row_block_id].block.position; |
| int row_block_size = transpose_bs->rows[row_block_id].block.size; |
| auto& cells = transpose_bs->rows[row_block_id].cells; |
| |
| for (auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_size = transpose_bs->cols[col_block_id].size; |
| const int col_block_pos = transpose_bs->cols[col_block_id].position; |
| MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, |
| col_block_size, |
| row_block_size, |
| x + col_block_pos, |
| y + row_block_pos); |
| } |
| }, |
| transpose_bs->rows.data(), |
| [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| } |
| |
| void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, |
| double* y) const { |
| CHECK(x != nullptr); |
| CHECK(y != nullptr); |
| // Single-threaded left products are always computed using a non-transpose |
| // block structure, because it has linear acess pattern to matrix elements |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_pos = block_structure_->rows[i].block.position; |
| int row_block_size = block_structure_->rows[i].block.size; |
| const auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| int col_block_pos = block_structure_->cols[col_block_id].position; |
| MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values_ + cell.position, |
| row_block_size, |
| col_block_size, |
| x + row_block_pos, |
| y + col_block_pos); |
| } |
| } |
| } |
| |
| void BlockSparseMatrix::SquaredColumnNorm(double* x) const { |
| CHECK(x != nullptr); |
| VectorRef(x, num_cols_).setZero(); |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_size = block_structure_->rows[i].block.size; |
| auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| int col_block_pos = block_structure_->cols[col_block_id].position; |
| const MatrixRef m( |
| values_ + cell.position, row_block_size, col_block_size); |
| VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm(); |
| } |
| } |
| } |
| |
| // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| // might benefit from caching column-block partition |
| void BlockSparseMatrix::SquaredColumnNorm(double* x, |
| ContextImpl* context, |
| int num_threads) const { |
| if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| SquaredColumnNorm(x); |
| return; |
| } |
| |
| CHECK(x != nullptr); |
| ParallelSetZero(context, num_threads, x, num_cols_); |
| |
| auto transpose_bs = transpose_block_structure_.get(); |
| const auto values = values_; |
| const int num_col_blocks = transpose_bs->rows.size(); |
| ParallelFor( |
| context, |
| 0, |
| num_col_blocks, |
| num_threads, |
| [values, transpose_bs, x](int row_block_id) { |
| const auto& row = transpose_bs->rows[row_block_id]; |
| |
| for (auto& cell : row.cells) { |
| const auto& col = transpose_bs->cols[cell.block_id]; |
| const MatrixRef m(values + cell.position, col.size, row.block.size); |
| VectorRef(x + row.block.position, row.block.size) += |
| m.colwise().squaredNorm(); |
| } |
| }, |
| transpose_bs->rows.data(), |
| [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| } |
| |
| void BlockSparseMatrix::ScaleColumns(const double* scale) { |
| CHECK(scale != nullptr); |
| |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_size = block_structure_->rows[i].block.size; |
| auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| int col_block_pos = block_structure_->cols[col_block_id].position; |
| MatrixRef m(values_ + cell.position, row_block_size, col_block_size); |
| m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal(); |
| } |
| } |
| } |
| |
| // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| // might benefit from caching column-block partition |
| void BlockSparseMatrix::ScaleColumns(const double* scale, |
| ContextImpl* context, |
| int num_threads) { |
| if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| ScaleColumns(scale); |
| return; |
| } |
| |
| CHECK(scale != nullptr); |
| auto transpose_bs = transpose_block_structure_.get(); |
| auto values = values_; |
| const int num_col_blocks = transpose_bs->rows.size(); |
| ParallelFor( |
| context, |
| 0, |
| num_col_blocks, |
| num_threads, |
| [values, transpose_bs, scale](int row_block_id) { |
| const auto& row = transpose_bs->rows[row_block_id]; |
| |
| for (auto& cell : row.cells) { |
| const auto& col = transpose_bs->cols[cell.block_id]; |
| MatrixRef m(values + cell.position, col.size, row.block.size); |
| m *= ConstVectorRef(scale + row.block.position, row.block.size) |
| .asDiagonal(); |
| } |
| }, |
| transpose_bs->rows.data(), |
| [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| } |
| std::unique_ptr<CompressedRowSparseMatrix> |
| BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const { |
| auto bs = transpose_block_structure_.get(); |
| auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>( |
| values(), num_cols_, num_rows_, num_nonzeros_, bs); |
| |
| SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs); |
| |
| UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get()); |
| return crs_matrix; |
| } |
| |
| std::unique_ptr<CompressedRowSparseMatrix> |
| BlockSparseMatrix::ToCompressedRowSparseMatrix() const { |
| auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>( |
| values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get()); |
| |
| SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), |
| block_structure_.get()); |
| |
| UpdateCompressedRowSparseMatrix(crs_matrix.get()); |
| return crs_matrix; |
| } |
| |
| void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose( |
| CompressedRowSparseMatrix* crs_matrix) const { |
| CHECK(crs_matrix != nullptr); |
| CHECK_EQ(crs_matrix->num_rows(), num_cols_); |
| CHECK_EQ(crs_matrix->num_cols(), num_rows_); |
| CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); |
| UpdateCompressedRowSparseMatrixImpl<true>( |
| crs_matrix, values(), transpose_block_structure_.get()); |
| } |
| void BlockSparseMatrix::UpdateCompressedRowSparseMatrix( |
| CompressedRowSparseMatrix* crs_matrix) const { |
| CHECK(crs_matrix != nullptr); |
| CHECK_EQ(crs_matrix->num_rows(), num_rows_); |
| CHECK_EQ(crs_matrix->num_cols(), num_cols_); |
| CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); |
| UpdateCompressedRowSparseMatrixImpl<false>( |
| crs_matrix, values(), block_structure_.get()); |
| } |
| |
| void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { |
| CHECK(dense_matrix != nullptr); |
| |
| dense_matrix->resize(num_rows_, num_cols_); |
| dense_matrix->setZero(); |
| Matrix& m = *dense_matrix; |
| |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_pos = block_structure_->rows[i].block.position; |
| int row_block_size = block_structure_->rows[i].block.size; |
| auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| int col_block_pos = block_structure_->cols[col_block_id].position; |
| int jac_pos = cell.position; |
| m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) += |
| MatrixRef(values_ + jac_pos, row_block_size, col_block_size); |
| } |
| } |
| } |
| |
| void BlockSparseMatrix::ToTripletSparseMatrix( |
| TripletSparseMatrix* matrix) const { |
| CHECK(matrix != nullptr); |
| |
| matrix->Reserve(num_nonzeros_); |
| matrix->Resize(num_rows_, num_cols_); |
| matrix->SetZero(); |
| |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| int row_block_pos = block_structure_->rows[i].block.position; |
| int row_block_size = block_structure_->rows[i].block.size; |
| const auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| int col_block_id = cell.block_id; |
| int col_block_size = block_structure_->cols[col_block_id].size; |
| int col_block_pos = block_structure_->cols[col_block_id].position; |
| int jac_pos = cell.position; |
| for (int r = 0; r < row_block_size; ++r) { |
| for (int c = 0; c < col_block_size; ++c, ++jac_pos) { |
| matrix->mutable_rows()[jac_pos] = row_block_pos + r; |
| matrix->mutable_cols()[jac_pos] = col_block_pos + c; |
| matrix->mutable_values()[jac_pos] = values_[jac_pos]; |
| } |
| } |
| } |
| } |
| matrix->set_num_nonzeros(num_nonzeros_); |
| } |
| |
| // Return a pointer to the block structure. We continue to hold |
| // ownership of the object though. |
| const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const { |
| return block_structure_.get(); |
| } |
| |
| // Return a pointer to the block structure of matrix transpose. We continue to |
| // hold ownership of the object though. |
| const CompressedRowBlockStructure* |
| BlockSparseMatrix::transpose_block_structure() const { |
| return transpose_block_structure_.get(); |
| } |
| |
| void BlockSparseMatrix::ToTextFile(FILE* file) const { |
| CHECK(file != nullptr); |
| for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| const int row_block_pos = block_structure_->rows[i].block.position; |
| const int row_block_size = block_structure_->rows[i].block.size; |
| const auto& cells = block_structure_->rows[i].cells; |
| for (const auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_size = block_structure_->cols[col_block_id].size; |
| const int col_block_pos = block_structure_->cols[col_block_id].position; |
| int jac_pos = cell.position; |
| for (int r = 0; r < row_block_size; ++r) { |
| for (int c = 0; c < col_block_size; ++c) { |
| fprintf(file, |
| "% 10d % 10d %17f\n", |
| row_block_pos + r, |
| col_block_pos + c, |
| values_[jac_pos++]); |
| } |
| } |
| } |
| } |
| } |
| |
| std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix( |
| const double* diagonal, const std::vector<Block>& column_blocks) { |
| // Create the block structure for the diagonal matrix. |
| auto* bs = new CompressedRowBlockStructure(); |
| bs->cols = column_blocks; |
| int position = 0; |
| bs->rows.resize(column_blocks.size(), CompressedRow(1)); |
| for (int i = 0; i < column_blocks.size(); ++i) { |
| CompressedRow& row = bs->rows[i]; |
| row.block = column_blocks[i]; |
| Cell& cell = row.cells[0]; |
| cell.block_id = i; |
| cell.position = position; |
| position += row.block.size * row.block.size; |
| } |
| |
| // Create the BlockSparseMatrix with the given block structure. |
| auto matrix = std::make_unique<BlockSparseMatrix>(bs); |
| matrix->SetZero(); |
| |
| // Fill the values array of the block sparse matrix. |
| double* values = matrix->mutable_values(); |
| for (const auto& column_block : column_blocks) { |
| const int size = column_block.size; |
| for (int j = 0; j < size; ++j) { |
| // (j + 1) * size is compact way of accessing the (j,j) entry. |
| values[j * (size + 1)] = diagonal[j]; |
| } |
| diagonal += size; |
| values += size * size; |
| } |
| |
| return matrix; |
| } |
| |
| void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) { |
| CHECK_EQ(m.num_cols(), num_cols()); |
| const CompressedRowBlockStructure* m_bs = m.block_structure(); |
| CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size()); |
| |
| const int old_num_nonzeros = num_nonzeros_; |
| const int old_num_row_blocks = block_structure_->rows.size(); |
| block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size()); |
| |
| for (int i = 0; i < m_bs->rows.size(); ++i) { |
| const CompressedRow& m_row = m_bs->rows[i]; |
| const int row_block_id = old_num_row_blocks + i; |
| CompressedRow& row = block_structure_->rows[row_block_id]; |
| row.block.size = m_row.block.size; |
| row.block.position = num_rows_; |
| num_rows_ += m_row.block.size; |
| row.cells.resize(m_row.cells.size()); |
| if (transpose_block_structure_) { |
| transpose_block_structure_->cols.emplace_back(row.block); |
| } |
| for (int c = 0; c < m_row.cells.size(); ++c) { |
| const int block_id = m_row.cells[c].block_id; |
| row.cells[c].block_id = block_id; |
| row.cells[c].position = num_nonzeros_; |
| |
| const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size; |
| if (transpose_block_structure_) { |
| transpose_block_structure_->rows[block_id].cells.emplace_back( |
| row_block_id, num_nonzeros_); |
| transpose_block_structure_->rows[block_id].nnz += cell_nnz; |
| } |
| |
| num_nonzeros_ += cell_nnz; |
| } |
| } |
| |
| if (num_nonzeros_ > max_num_nonzeros_) { |
| double* old_values = values_; |
| values_ = AllocateValues(num_nonzeros_); |
| std::copy_n(old_values, old_num_nonzeros, values_); |
| max_num_nonzeros_ = num_nonzeros_; |
| FreeValues(old_values); |
| } |
| |
| std::copy( |
| m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros); |
| |
| if (transpose_block_structure_ == nullptr) { |
| return; |
| } |
| ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); |
| } |
| |
| void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) { |
| const int num_row_blocks = block_structure_->rows.size(); |
| const int new_num_row_blocks = num_row_blocks - delta_row_blocks; |
| int delta_num_nonzeros = 0; |
| int delta_num_rows = 0; |
| const std::vector<Block>& column_blocks = block_structure_->cols; |
| for (int i = 0; i < delta_row_blocks; ++i) { |
| const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1]; |
| delta_num_rows += row.block.size; |
| for (int c = 0; c < row.cells.size(); ++c) { |
| const Cell& cell = row.cells[c]; |
| delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size; |
| |
| if (transpose_block_structure_) { |
| auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells; |
| while (!col_cells.empty() && |
| col_cells.back().block_id >= new_num_row_blocks) { |
| const int del_block_id = col_cells.back().block_id; |
| const int del_block_rows = |
| block_structure_->rows[del_block_id].block.size; |
| const int del_block_cols = column_blocks[cell.block_id].size; |
| const int del_cell_nnz = del_block_rows * del_block_cols; |
| transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz; |
| col_cells.pop_back(); |
| } |
| } |
| } |
| } |
| num_nonzeros_ -= delta_num_nonzeros; |
| num_rows_ -= delta_num_rows; |
| block_structure_->rows.resize(new_num_row_blocks); |
| |
| if (transpose_block_structure_ == nullptr) { |
| return; |
| } |
| for (int i = 0; i < delta_row_blocks; ++i) { |
| transpose_block_structure_->cols.pop_back(); |
| } |
| |
| ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); |
| } |
| |
| std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix( |
| const BlockSparseMatrix::RandomMatrixOptions& options, |
| std::mt19937& prng, |
| bool use_page_locked_memory) { |
| CHECK_GT(options.num_row_blocks, 0); |
| CHECK_GT(options.min_row_block_size, 0); |
| CHECK_GT(options.max_row_block_size, 0); |
| CHECK_LE(options.min_row_block_size, options.max_row_block_size); |
| CHECK_GT(options.block_density, 0.0); |
| CHECK_LE(options.block_density, 1.0); |
| |
| std::uniform_int_distribution<int> col_distribution( |
| options.min_col_block_size, options.max_col_block_size); |
| std::uniform_int_distribution<int> row_distribution( |
| options.min_row_block_size, options.max_row_block_size); |
| auto bs = std::make_unique<CompressedRowBlockStructure>(); |
| if (options.col_blocks.empty()) { |
| CHECK_GT(options.num_col_blocks, 0); |
| CHECK_GT(options.min_col_block_size, 0); |
| CHECK_GT(options.max_col_block_size, 0); |
| CHECK_LE(options.min_col_block_size, options.max_col_block_size); |
| |
| // Generate the col block structure. |
| int col_block_position = 0; |
| for (int i = 0; i < options.num_col_blocks; ++i) { |
| const int col_block_size = col_distribution(prng); |
| bs->cols.emplace_back(col_block_size, col_block_position); |
| col_block_position += col_block_size; |
| } |
| } else { |
| bs->cols = options.col_blocks; |
| } |
| |
| bool matrix_has_blocks = false; |
| std::uniform_real_distribution<double> uniform01(0.0, 1.0); |
| while (!matrix_has_blocks) { |
| VLOG(1) << "Clearing"; |
| bs->rows.clear(); |
| int row_block_position = 0; |
| int value_position = 0; |
| for (int r = 0; r < options.num_row_blocks; ++r) { |
| const int row_block_size = row_distribution(prng); |
| bs->rows.emplace_back(); |
| CompressedRow& row = bs->rows.back(); |
| row.block.size = row_block_size; |
| row.block.position = row_block_position; |
| row_block_position += row_block_size; |
| for (int c = 0; c < bs->cols.size(); ++c) { |
| if (uniform01(prng) > options.block_density) continue; |
| |
| row.cells.emplace_back(); |
| Cell& cell = row.cells.back(); |
| cell.block_id = c; |
| cell.position = value_position; |
| value_position += row_block_size * bs->cols[c].size; |
| matrix_has_blocks = true; |
| } |
| } |
| } |
| |
| auto matrix = |
| std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory); |
| double* values = matrix->mutable_values(); |
| std::normal_distribution<double> standard_normal_distribution; |
| std::generate_n( |
| values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] { |
| return standard_normal_distribution(prng); |
| }); |
| |
| return matrix; |
| } |
| |
| std::unique_ptr<CompressedRowBlockStructure> CreateTranspose( |
| const CompressedRowBlockStructure& bs) { |
| auto transpose = std::make_unique<CompressedRowBlockStructure>(); |
| |
| transpose->rows.resize(bs.cols.size()); |
| for (int i = 0; i < bs.cols.size(); ++i) { |
| transpose->rows[i].block = bs.cols[i]; |
| transpose->rows[i].nnz = 0; |
| } |
| |
| transpose->cols.resize(bs.rows.size()); |
| for (int i = 0; i < bs.rows.size(); ++i) { |
| auto& row = bs.rows[i]; |
| transpose->cols[i] = row.block; |
| |
| const int nrows = row.block.size; |
| for (auto& cell : row.cells) { |
| transpose->rows[cell.block_id].cells.emplace_back(i, cell.position); |
| const int ncols = transpose->rows[cell.block_id].block.size; |
| transpose->rows[cell.block_id].nnz += nrows * ncols; |
| } |
| } |
| ComputeCumulativeNumberOfNonZeros(transpose->rows); |
| return transpose; |
| } |
| |
| double* BlockSparseMatrix::AllocateValues(int size) { |
| if (!use_page_locked_memory_) { |
| return new double[size]; |
| } |
| |
| #ifndef CERES_NO_CUDA |
| |
| double* values = nullptr; |
| CHECK_EQ(cudaSuccess, |
| cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault)); |
| return values; |
| #else |
| LOG(FATAL) << "Page locked memory requested when CUDA is not available. " |
| << "This is a Ceres bug; please contact the developers!"; |
| return nullptr; |
| #endif |
| }; |
| |
| void BlockSparseMatrix::FreeValues(double*& values) { |
| if (!use_page_locked_memory_) { |
| delete[] values; |
| values = nullptr; |
| return; |
| } |
| |
| #ifndef CERES_NO_CUDA |
| CHECK_EQ(cudaSuccess, cudaFreeHost(values)); |
| #else |
| LOG(FATAL) << "Page locked memory requested when CUDA is not available. " |
| << "This is a Ceres bug; please contact the developers!"; |
| #endif |
| |
| values = nullptr; |
| }; |
| |
| } // namespace ceres::internal |