| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2023 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: sameeragarwal@google.com (Sameer Agarwal) |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <memory> |
| #include <vector> |
| |
| #include "absl/log/check.h" |
| #include "ceres/block_sparse_matrix.h" |
| #include "ceres/block_structure.h" |
| #include "ceres/internal/eigen.h" |
| #include "ceres/parallel_for.h" |
| #include "ceres/partition_range_for_parallel_for.h" |
| #include "ceres/partitioned_matrix_view.h" |
| #include "ceres/small_blas.h" |
| |
| namespace ceres::internal { |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| PartitionedMatrixView(const LinearSolver::Options& options, |
| const BlockSparseMatrix& matrix) |
| |
| : options_(options), matrix_(matrix) { |
| const CompressedRowBlockStructure* bs = matrix_.block_structure(); |
| CHECK(bs != nullptr); |
| |
| num_col_blocks_e_ = options_.elimination_groups[0]; |
| num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_; |
| |
| // Compute the number of row blocks in E. The number of row blocks |
| // in E maybe less than the number of row blocks in the input matrix |
| // as some of the row blocks at the bottom may not have any |
| // e_blocks. For a definition of what an e_block is, please see |
| // schur_complement_solver.h |
| num_row_blocks_e_ = 0; |
| for (const auto& row : bs->rows) { |
| const std::vector<Cell>& cells = row.cells; |
| if (cells[0].block_id < num_col_blocks_e_) { |
| ++num_row_blocks_e_; |
| } |
| } |
| |
| // Compute the number of columns in E and F. |
| num_cols_e_ = 0; |
| num_cols_f_ = 0; |
| |
| for (int c = 0; c < bs->cols.size(); ++c) { |
| const Block& block = bs->cols[c]; |
| if (c < num_col_blocks_e_) { |
| num_cols_e_ += block.size; |
| } else { |
| num_cols_f_ += block.size; |
| } |
| } |
| |
| CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols()); |
| |
| auto transpose_bs = matrix_.transpose_block_structure(); |
| const int num_threads = options_.num_threads; |
| if (transpose_bs != nullptr && num_threads > 1) { |
| int kMaxPartitions = num_threads * 4; |
| e_cols_partition_ = PartitionRangeForParallelFor( |
| 0, |
| num_col_blocks_e_, |
| kMaxPartitions, |
| transpose_bs->rows.data(), |
| [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| |
| f_cols_partition_ = PartitionRangeForParallelFor( |
| num_col_blocks_e_, |
| num_col_blocks_e_ + num_col_blocks_f_, |
| kMaxPartitions, |
| transpose_bs->rows.data(), |
| [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| } |
| } |
| |
| // The next four methods don't seem to be particularly cache |
| // friendly. This is an artifact of how the BlockStructure of the |
| // input matrix is constructed. These methods will benefit from |
| // multithreading as well as improved data layout. |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| RightMultiplyAndAccumulateE(const double* x, double* y) const { |
| // Iterate over the first num_row_blocks_e_ row blocks, and multiply |
| // by the first cell in each row block. |
| auto bs = matrix_.block_structure(); |
| const double* values = matrix_.values(); |
| ParallelFor(options_.context, |
| 0, |
| num_row_blocks_e_, |
| options_.num_threads, |
| [values, bs, x, y](int row_block_id) { |
| const Cell& cell = bs->rows[row_block_id].cells[0]; |
| const int row_block_pos = bs->rows[row_block_id].block.position; |
| const int row_block_size = bs->rows[row_block_id].block.size; |
| const int col_block_id = cell.block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| x + col_block_pos, |
| y + row_block_pos); |
| // clang-format on |
| }); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| RightMultiplyAndAccumulateF(const double* x, double* y) const { |
| // Iterate over row blocks, and if the row block is in E, then |
| // multiply by all the cells except the first one which is of type |
| // E. If the row block is not in E (i.e its in the bottom |
| // num_row_blocks - num_row_blocks_e row blocks), then all the cells |
| // are of type F and multiply by them all. |
| const CompressedRowBlockStructure* bs = matrix_.block_structure(); |
| const int num_row_blocks = bs->rows.size(); |
| const int num_cols_e = num_cols_e_; |
| const double* values = matrix_.values(); |
| ParallelFor(options_.context, |
| 0, |
| num_row_blocks_e_, |
| options_.num_threads, |
| [values, bs, num_cols_e, x, y](int row_block_id) { |
| const int row_block_pos = bs->rows[row_block_id].block.position; |
| const int row_block_size = bs->rows[row_block_id].block.size; |
| const auto& cells = bs->rows[row_block_id].cells; |
| for (int c = 1; c < cells.size(); ++c) { |
| const int col_block_id = cells[c].block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>( |
| values + cells[c].position, row_block_size, col_block_size, |
| x + col_block_pos - num_cols_e, |
| y + row_block_pos); |
| // clang-format on |
| } |
| }); |
| ParallelFor(options_.context, |
| num_row_blocks_e_, |
| num_row_blocks, |
| options_.num_threads, |
| [values, bs, num_cols_e, x, y](int row_block_id) { |
| const int row_block_pos = bs->rows[row_block_id].block.position; |
| const int row_block_size = bs->rows[row_block_id].block.size; |
| const auto& cells = bs->rows[row_block_id].cells; |
| for (const auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| x + col_block_pos - num_cols_e, |
| y + row_block_pos); |
| // clang-format on |
| } |
| }); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateE(const double* x, double* y) const { |
| if (!num_col_blocks_e_) return; |
| if (!num_row_blocks_e_) return; |
| if (options_.num_threads == 1) { |
| LeftMultiplyAndAccumulateESingleThreaded(x, y); |
| } else { |
| CHECK(options_.context != nullptr); |
| LeftMultiplyAndAccumulateEMultiThreaded(x, y); |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const { |
| const CompressedRowBlockStructure* bs = matrix_.block_structure(); |
| |
| // Iterate over the first num_row_blocks_e_ row blocks, and multiply |
| // by the first cell in each row block. |
| const double* values = matrix_.values(); |
| for (int r = 0; r < num_row_blocks_e_; ++r) { |
| const Cell& cell = bs->rows[r].cells[0]; |
| const int row_block_pos = bs->rows[r].block.position; |
| const int row_block_size = bs->rows[r].block.size; |
| const int col_block_id = cell.block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| x + row_block_pos, |
| y + col_block_pos); |
| // clang-format on |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const { |
| auto transpose_bs = matrix_.transpose_block_structure(); |
| CHECK(transpose_bs != nullptr); |
| |
| // Local copies of class members in order to avoid capturing pointer to the |
| // whole object in lambda function |
| auto values = matrix_.values(); |
| const int num_row_blocks_e = num_row_blocks_e_; |
| ParallelFor( |
| options_.context, |
| 0, |
| num_col_blocks_e_, |
| options_.num_threads, |
| [values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) { |
| int row_block_pos = transpose_bs->rows[row_block_id].block.position; |
| int row_block_size = transpose_bs->rows[row_block_id].block.size; |
| auto& cells = transpose_bs->rows[row_block_id].cells; |
| |
| for (auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_size = transpose_bs->cols[col_block_id].size; |
| const int col_block_pos = transpose_bs->cols[col_block_id].position; |
| if (col_block_id >= num_row_blocks_e) break; |
| MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( |
| values + cell.position, |
| col_block_size, |
| row_block_size, |
| x + col_block_pos, |
| y + row_block_pos); |
| } |
| }, |
| e_cols_partition()); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateF(const double* x, double* y) const { |
| if (!num_col_blocks_f_) return; |
| if (options_.num_threads == 1) { |
| LeftMultiplyAndAccumulateFSingleThreaded(x, y); |
| } else { |
| CHECK(options_.context != nullptr); |
| LeftMultiplyAndAccumulateFMultiThreaded(x, y); |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const { |
| const CompressedRowBlockStructure* bs = matrix_.block_structure(); |
| |
| // Iterate over row blocks, and if the row block is in E, then |
| // multiply by all the cells except the first one which is of type |
| // E. If the row block is not in E (i.e its in the bottom |
| // num_row_blocks - num_row_blocks_e row blocks), then all the cells |
| // are of type F and multiply by them all. |
| const double* values = matrix_.values(); |
| for (int r = 0; r < num_row_blocks_e_; ++r) { |
| const int row_block_pos = bs->rows[r].block.position; |
| const int row_block_size = bs->rows[r].block.size; |
| const std::vector<Cell>& cells = bs->rows[r].cells; |
| for (int c = 1; c < cells.size(); ++c) { |
| const int col_block_id = cells[c].block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>( |
| values + cells[c].position, row_block_size, col_block_size, |
| x + row_block_pos, |
| y + col_block_pos - num_cols_e_); |
| // clang-format on |
| } |
| } |
| |
| for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { |
| const int row_block_pos = bs->rows[r].block.position; |
| const int row_block_size = bs->rows[r].block.size; |
| const std::vector<Cell>& cells = bs->rows[r].cells; |
| for (const auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_pos = bs->cols[col_block_id].position; |
| const int col_block_size = bs->cols[col_block_id].size; |
| // clang-format off |
| MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| x + row_block_pos, |
| y + col_block_pos - num_cols_e_); |
| // clang-format on |
| } |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const { |
| auto transpose_bs = matrix_.transpose_block_structure(); |
| CHECK(transpose_bs != nullptr); |
| // Local copies of class members in order to avoid capturing pointer to the |
| // whole object in lambda function |
| auto values = matrix_.values(); |
| const int num_row_blocks_e = num_row_blocks_e_; |
| const int num_cols_e = num_cols_e_; |
| ParallelFor( |
| options_.context, |
| num_col_blocks_e_, |
| num_col_blocks_e_ + num_col_blocks_f_, |
| options_.num_threads, |
| [values, transpose_bs, num_row_blocks_e, num_cols_e, x, y]( |
| int row_block_id) { |
| int row_block_pos = transpose_bs->rows[row_block_id].block.position; |
| int row_block_size = transpose_bs->rows[row_block_id].block.size; |
| auto& cells = transpose_bs->rows[row_block_id].cells; |
| |
| const int num_cells = cells.size(); |
| int cell_idx = 0; |
| for (; cell_idx < num_cells; ++cell_idx) { |
| auto& cell = cells[cell_idx]; |
| const int col_block_id = cell.block_id; |
| const int col_block_size = transpose_bs->cols[col_block_id].size; |
| const int col_block_pos = transpose_bs->cols[col_block_id].position; |
| if (col_block_id >= num_row_blocks_e) break; |
| |
| MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>( |
| values + cell.position, |
| col_block_size, |
| row_block_size, |
| x + col_block_pos, |
| y + row_block_pos - num_cols_e); |
| } |
| for (; cell_idx < num_cells; ++cell_idx) { |
| auto& cell = cells[cell_idx]; |
| const int col_block_id = cell.block_id; |
| const int col_block_size = transpose_bs->cols[col_block_id].size; |
| const int col_block_pos = transpose_bs->cols[col_block_id].position; |
| MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, |
| col_block_size, |
| row_block_size, |
| x + col_block_pos, |
| y + row_block_pos - num_cols_e); |
| } |
| }, |
| f_cols_partition()); |
| } |
| |
| // Given a range of columns blocks of a matrix m, compute the block |
| // structure of the block diagonal of the matrix m(:, |
| // start_col_block:end_col_block)'m(:, start_col_block:end_col_block) |
| // and return a BlockSparseMatrix with this block structure. The |
| // caller owns the result. |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| std::unique_ptr<BlockSparseMatrix> |
| PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| CreateBlockDiagonalMatrixLayout(int start_col_block, |
| int end_col_block) const { |
| const CompressedRowBlockStructure* bs = matrix_.block_structure(); |
| auto* block_diagonal_structure = new CompressedRowBlockStructure; |
| |
| int block_position = 0; |
| int diagonal_cell_position = 0; |
| |
| // Iterate over the column blocks, creating a new diagonal block for |
| // each column block. |
| for (int c = start_col_block; c < end_col_block; ++c) { |
| const Block& block = bs->cols[c]; |
| block_diagonal_structure->cols.emplace_back(); |
| Block& diagonal_block = block_diagonal_structure->cols.back(); |
| diagonal_block.size = block.size; |
| diagonal_block.position = block_position; |
| |
| block_diagonal_structure->rows.emplace_back(); |
| CompressedRow& row = block_diagonal_structure->rows.back(); |
| row.block = diagonal_block; |
| |
| row.cells.emplace_back(); |
| Cell& cell = row.cells.back(); |
| cell.block_id = c - start_col_block; |
| cell.position = diagonal_cell_position; |
| |
| block_position += block.size; |
| diagonal_cell_position += block.size * block.size; |
| } |
| |
| // Build a BlockSparseMatrix with the just computed block |
| // structure. |
| return std::make_unique<BlockSparseMatrix>(block_diagonal_structure); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| std::unique_ptr<BlockSparseMatrix> |
| PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| CreateBlockDiagonalEtE() const { |
| std::unique_ptr<BlockSparseMatrix> block_diagonal = |
| CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_); |
| UpdateBlockDiagonalEtE(block_diagonal.get()); |
| return block_diagonal; |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| std::unique_ptr<BlockSparseMatrix> |
| PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| CreateBlockDiagonalFtF() const { |
| std::unique_ptr<BlockSparseMatrix> block_diagonal = |
| CreateBlockDiagonalMatrixLayout(num_col_blocks_e_, |
| num_col_blocks_e_ + num_col_blocks_f_); |
| UpdateBlockDiagonalFtF(block_diagonal.get()); |
| return block_diagonal; |
| } |
| |
| // Similar to the code in RightMultiplyAndAccumulateE, except instead of the |
| // matrix vector multiply it's an outer product. |
| // |
| // block_diagonal = block_diagonal(E'E) |
| // |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalEtESingleThreaded( |
| BlockSparseMatrix* block_diagonal) const { |
| auto bs = matrix_.block_structure(); |
| auto block_diagonal_structure = block_diagonal->block_structure(); |
| |
| block_diagonal->SetZero(); |
| const double* values = matrix_.values(); |
| for (int r = 0; r < num_row_blocks_e_; ++r) { |
| const Cell& cell = bs->rows[r].cells[0]; |
| const int row_block_size = bs->rows[r].block.size; |
| const int block_id = cell.block_id; |
| const int col_block_size = bs->cols[block_id].size; |
| const int cell_position = |
| block_diagonal_structure->rows[block_id].cells[0].position; |
| |
| // clang-format off |
| MatrixTransposeMatrixMultiply |
| <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| values + cell.position, row_block_size, col_block_size, |
| block_diagonal->mutable_values() + cell_position, |
| 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalEtEMultiThreaded( |
| BlockSparseMatrix* block_diagonal) const { |
| auto transpose_block_structure = matrix_.transpose_block_structure(); |
| CHECK(transpose_block_structure != nullptr); |
| auto block_diagonal_structure = block_diagonal->block_structure(); |
| |
| const double* values = matrix_.values(); |
| double* values_diagonal = block_diagonal->mutable_values(); |
| ParallelFor( |
| options_.context, |
| 0, |
| num_col_blocks_e_, |
| options_.num_threads, |
| [values, |
| transpose_block_structure, |
| values_diagonal, |
| block_diagonal_structure](int col_block_id) { |
| int cell_position = |
| block_diagonal_structure->rows[col_block_id].cells[0].position; |
| double* cell_values = values_diagonal + cell_position; |
| int col_block_size = |
| transpose_block_structure->rows[col_block_id].block.size; |
| auto& cells = transpose_block_structure->rows[col_block_id].cells; |
| MatrixRef(cell_values, col_block_size, col_block_size).setZero(); |
| |
| for (auto& c : cells) { |
| int row_block_size = transpose_block_structure->cols[c.block_id].size; |
| // clang-format off |
| MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( |
| values + c.position, row_block_size, col_block_size, |
| values + c.position, row_block_size, col_block_size, |
| cell_values, 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| }, |
| e_cols_partition_); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { |
| if (options_.num_threads == 1) { |
| UpdateBlockDiagonalEtESingleThreaded(block_diagonal); |
| } else { |
| CHECK(options_.context != nullptr); |
| UpdateBlockDiagonalEtEMultiThreaded(block_diagonal); |
| } |
| } |
| |
| // Similar to the code in RightMultiplyAndAccumulateF, except instead of the |
| // matrix vector multiply it's an outer product. |
| // |
| // block_diagonal = block_diagonal(F'F) |
| // |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalFtFSingleThreaded( |
| BlockSparseMatrix* block_diagonal) const { |
| auto bs = matrix_.block_structure(); |
| auto block_diagonal_structure = block_diagonal->block_structure(); |
| |
| block_diagonal->SetZero(); |
| const double* values = matrix_.values(); |
| for (int r = 0; r < num_row_blocks_e_; ++r) { |
| const int row_block_size = bs->rows[r].block.size; |
| const std::vector<Cell>& cells = bs->rows[r].cells; |
| for (int c = 1; c < cells.size(); ++c) { |
| const int col_block_id = cells[c].block_id; |
| const int col_block_size = bs->cols[col_block_id].size; |
| const int diagonal_block_id = col_block_id - num_col_blocks_e_; |
| const int cell_position = |
| block_diagonal_structure->rows[diagonal_block_id].cells[0].position; |
| |
| // clang-format off |
| MatrixTransposeMatrixMultiply |
| <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( |
| values + cells[c].position, row_block_size, col_block_size, |
| values + cells[c].position, row_block_size, col_block_size, |
| block_diagonal->mutable_values() + cell_position, |
| 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| } |
| |
| for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { |
| const int row_block_size = bs->rows[r].block.size; |
| const std::vector<Cell>& cells = bs->rows[r].cells; |
| for (const auto& cell : cells) { |
| const int col_block_id = cell.block_id; |
| const int col_block_size = bs->cols[col_block_id].size; |
| const int diagonal_block_id = col_block_id - num_col_blocks_e_; |
| const int cell_position = |
| block_diagonal_structure->rows[diagonal_block_id].cells[0].position; |
| |
| // clang-format off |
| MatrixTransposeMatrixMultiply |
| <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| values + cell.position, row_block_size, col_block_size, |
| block_diagonal->mutable_values() + cell_position, |
| 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| } |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalFtFMultiThreaded( |
| BlockSparseMatrix* block_diagonal) const { |
| auto transpose_block_structure = matrix_.transpose_block_structure(); |
| CHECK(transpose_block_structure != nullptr); |
| auto block_diagonal_structure = block_diagonal->block_structure(); |
| |
| const double* values = matrix_.values(); |
| double* values_diagonal = block_diagonal->mutable_values(); |
| |
| const int num_col_blocks_e = num_col_blocks_e_; |
| const int num_row_blocks_e = num_row_blocks_e_; |
| ParallelFor( |
| options_.context, |
| num_col_blocks_e_, |
| num_col_blocks_e + num_col_blocks_f_, |
| options_.num_threads, |
| [transpose_block_structure, |
| block_diagonal_structure, |
| num_col_blocks_e, |
| num_row_blocks_e, |
| values, |
| values_diagonal](int col_block_id) { |
| const int col_block_size = |
| transpose_block_structure->rows[col_block_id].block.size; |
| const int diagonal_block_id = col_block_id - num_col_blocks_e; |
| const int cell_position = |
| block_diagonal_structure->rows[diagonal_block_id].cells[0].position; |
| double* cell_values = values_diagonal + cell_position; |
| |
| MatrixRef(cell_values, col_block_size, col_block_size).setZero(); |
| |
| auto& cells = transpose_block_structure->rows[col_block_id].cells; |
| const int num_cells = cells.size(); |
| int i = 0; |
| for (; i < num_cells; ++i) { |
| auto& cell = cells[i]; |
| const int row_block_id = cell.block_id; |
| if (row_block_id >= num_row_blocks_e) break; |
| const int row_block_size = |
| transpose_block_structure->cols[row_block_id].size; |
| // clang-format off |
| MatrixTransposeMatrixMultiply |
| <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| values + cell.position, row_block_size, col_block_size, |
| cell_values, 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| for (; i < num_cells; ++i) { |
| auto& cell = cells[i]; |
| const int row_block_id = cell.block_id; |
| const int row_block_size = |
| transpose_block_structure->cols[row_block_id].size; |
| // clang-format off |
| MatrixTransposeMatrixMultiply |
| <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( |
| values + cell.position, row_block_size, col_block_size, |
| values + cell.position, row_block_size, col_block_size, |
| cell_values, 0, 0, col_block_size, col_block_size); |
| // clang-format on |
| } |
| }, |
| f_cols_partition_); |
| } |
| |
| template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> |
| void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: |
| UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { |
| if (options_.num_threads == 1) { |
| UpdateBlockDiagonalFtFSingleThreaded(block_diagonal); |
| } else { |
| CHECK(options_.context != nullptr); |
| UpdateBlockDiagonalFtFMultiThreaded(block_diagonal); |
| } |
| } |
| |
| } // namespace ceres::internal |