blob: b2802ad76fca17c4385748995192ca6a12df080b [file] [log] [blame]
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#include <algorithm>
#include <cstring>
#include <memory>
#include <vector>
#include "absl/log/check.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/parallel_for.h"
#include "ceres/partition_range_for_parallel_for.h"
#include "ceres/partitioned_matrix_view.h"
#include "ceres/small_blas.h"
namespace ceres::internal {
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
PartitionedMatrixView(const LinearSolver::Options& options,
const BlockSparseMatrix& matrix)
: options_(options), matrix_(matrix) {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
CHECK(bs != nullptr);
num_col_blocks_e_ = options_.elimination_groups[0];
num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
// Compute the number of row blocks in E. The number of row blocks
// in E maybe less than the number of row blocks in the input matrix
// as some of the row blocks at the bottom may not have any
// e_blocks. For a definition of what an e_block is, please see
// schur_complement_solver.h
num_row_blocks_e_ = 0;
for (const auto& row : bs->rows) {
const std::vector<Cell>& cells = row.cells;
if (cells[0].block_id < num_col_blocks_e_) {
++num_row_blocks_e_;
}
}
// Compute the number of columns in E and F.
num_cols_e_ = 0;
num_cols_f_ = 0;
for (int c = 0; c < bs->cols.size(); ++c) {
const Block& block = bs->cols[c];
if (c < num_col_blocks_e_) {
num_cols_e_ += block.size;
} else {
num_cols_f_ += block.size;
}
}
CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
auto transpose_bs = matrix_.transpose_block_structure();
const int num_threads = options_.num_threads;
if (transpose_bs != nullptr && num_threads > 1) {
int kMaxPartitions = num_threads * 4;
e_cols_partition_ = PartitionRangeForParallelFor(
0,
num_col_blocks_e_,
kMaxPartitions,
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
f_cols_partition_ = PartitionRangeForParallelFor(
num_col_blocks_e_,
num_col_blocks_e_ + num_col_blocks_f_,
kMaxPartitions,
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
}
}
// The next four methods don't seem to be particularly cache
// friendly. This is an artifact of how the BlockStructure of the
// input matrix is constructed. These methods will benefit from
// multithreading as well as improved data layout.
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyAndAccumulateE(const double* x, double* y) const {
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
// by the first cell in each row block.
auto bs = matrix_.block_structure();
const double* values = matrix_.values();
ParallelFor(options_.context,
0,
num_row_blocks_e_,
options_.num_threads,
[values, bs, x, y](int row_block_id) {
const Cell& cell = bs->rows[row_block_id].cells[0];
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos,
y + row_block_pos);
// clang-format on
});
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyAndAccumulateF(const double* x, double* y) const {
// Iterate over row blocks, and if the row block is in E, then
// multiply by all the cells except the first one which is of type
// E. If the row block is not in E (i.e its in the bottom
// num_row_blocks - num_row_blocks_e row blocks), then all the cells
// are of type F and multiply by them all.
const CompressedRowBlockStructure* bs = matrix_.block_structure();
const int num_row_blocks = bs->rows.size();
const int num_cols_e = num_cols_e_;
const double* values = matrix_.values();
ParallelFor(options_.context,
0,
num_row_blocks_e_,
options_.num_threads,
[values, bs, num_cols_e, x, y](int row_block_id) {
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const auto& cells = bs->rows[row_block_id].cells;
for (int c = 1; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cells[c].position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e,
y + row_block_pos);
// clang-format on
}
});
ParallelFor(options_.context,
num_row_blocks_e_,
num_row_blocks,
options_.num_threads,
[values, bs, num_cols_e, x, y](int row_block_id) {
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const auto& cells = bs->rows[row_block_id].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e,
y + row_block_pos);
// clang-format on
}
});
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateE(const double* x, double* y) const {
if (!num_col_blocks_e_) return;
if (!num_row_blocks_e_) return;
if (options_.num_threads == 1) {
LeftMultiplyAndAccumulateESingleThreaded(x, y);
} else {
CHECK(options_.context != nullptr);
LeftMultiplyAndAccumulateEMultiThreaded(x, y);
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
// by the first cell in each row block.
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const Cell& cell = bs->rows[r].cells[0];
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
x + row_block_pos,
y + col_block_pos);
// clang-format on
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const {
auto transpose_bs = matrix_.transpose_block_structure();
CHECK(transpose_bs != nullptr);
// Local copies of class members in order to avoid capturing pointer to the
// whole object in lambda function
auto values = matrix_.values();
const int num_row_blocks_e = num_row_blocks_e_;
ParallelFor(
options_.context,
0,
num_col_blocks_e_,
options_.num_threads,
[values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) {
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
int row_block_size = transpose_bs->rows[row_block_id].block.size;
auto& cells = transpose_bs->rows[row_block_id].cells;
for (auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
if (col_block_id >= num_row_blocks_e) break;
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos);
}
},
e_cols_partition());
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateF(const double* x, double* y) const {
if (!num_col_blocks_f_) return;
if (options_.num_threads == 1) {
LeftMultiplyAndAccumulateFSingleThreaded(x, y);
} else {
CHECK(options_.context != nullptr);
LeftMultiplyAndAccumulateFMultiThreaded(x, y);
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
// Iterate over row blocks, and if the row block is in E, then
// multiply by all the cells except the first one which is of type
// E. If the row block is not in E (i.e its in the bottom
// num_row_blocks - num_row_blocks_e row blocks), then all the cells
// are of type F and multiply by them all.
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (int c = 1; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cells[c].position, row_block_size, col_block_size,
x + row_block_pos,
y + col_block_pos - num_cols_e_);
// clang-format on
}
}
for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
x + row_block_pos,
y + col_block_pos - num_cols_e_);
// clang-format on
}
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const {
auto transpose_bs = matrix_.transpose_block_structure();
CHECK(transpose_bs != nullptr);
// Local copies of class members in order to avoid capturing pointer to the
// whole object in lambda function
auto values = matrix_.values();
const int num_row_blocks_e = num_row_blocks_e_;
const int num_cols_e = num_cols_e_;
ParallelFor(
options_.context,
num_col_blocks_e_,
num_col_blocks_e_ + num_col_blocks_f_,
options_.num_threads,
[values, transpose_bs, num_row_blocks_e, num_cols_e, x, y](
int row_block_id) {
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
int row_block_size = transpose_bs->rows[row_block_id].block.size;
auto& cells = transpose_bs->rows[row_block_id].cells;
const int num_cells = cells.size();
int cell_idx = 0;
for (; cell_idx < num_cells; ++cell_idx) {
auto& cell = cells[cell_idx];
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
if (col_block_id >= num_row_blocks_e) break;
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos - num_cols_e);
}
for (; cell_idx < num_cells; ++cell_idx) {
auto& cell = cells[cell_idx];
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos - num_cols_e);
}
},
f_cols_partition());
}
// Given a range of columns blocks of a matrix m, compute the block
// structure of the block diagonal of the matrix m(:,
// start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
// and return a BlockSparseMatrix with this block structure. The
// caller owns the result.
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
std::unique_ptr<BlockSparseMatrix>
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalMatrixLayout(int start_col_block,
int end_col_block) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
auto* block_diagonal_structure = new CompressedRowBlockStructure;
int block_position = 0;
int diagonal_cell_position = 0;
// Iterate over the column blocks, creating a new diagonal block for
// each column block.
for (int c = start_col_block; c < end_col_block; ++c) {
const Block& block = bs->cols[c];
block_diagonal_structure->cols.emplace_back();
Block& diagonal_block = block_diagonal_structure->cols.back();
diagonal_block.size = block.size;
diagonal_block.position = block_position;
block_diagonal_structure->rows.emplace_back();
CompressedRow& row = block_diagonal_structure->rows.back();
row.block = diagonal_block;
row.cells.emplace_back();
Cell& cell = row.cells.back();
cell.block_id = c - start_col_block;
cell.position = diagonal_cell_position;
block_position += block.size;
diagonal_cell_position += block.size * block.size;
}
// Build a BlockSparseMatrix with the just computed block
// structure.
return std::make_unique<BlockSparseMatrix>(block_diagonal_structure);
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
std::unique_ptr<BlockSparseMatrix>
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalEtE() const {
std::unique_ptr<BlockSparseMatrix> block_diagonal =
CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_);
UpdateBlockDiagonalEtE(block_diagonal.get());
return block_diagonal;
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
std::unique_ptr<BlockSparseMatrix>
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalFtF() const {
std::unique_ptr<BlockSparseMatrix> block_diagonal =
CreateBlockDiagonalMatrixLayout(num_col_blocks_e_,
num_col_blocks_e_ + num_col_blocks_f_);
UpdateBlockDiagonalFtF(block_diagonal.get());
return block_diagonal;
}
// Similar to the code in RightMultiplyAndAccumulateE, except instead of the
// matrix vector multiply it's an outer product.
//
// block_diagonal = block_diagonal(E'E)
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtESingleThreaded(
BlockSparseMatrix* block_diagonal) const {
auto bs = matrix_.block_structure();
auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const Cell& cell = bs->rows[r].cells[0];
const int row_block_size = bs->rows[r].block.size;
const int block_id = cell.block_id;
const int col_block_size = bs->cols[block_id].size;
const int cell_position =
block_diagonal_structure->rows[block_id].cells[0].position;
// clang-format off
MatrixTransposeMatrixMultiply
<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
block_diagonal->mutable_values() + cell_position,
0, 0, col_block_size, col_block_size);
// clang-format on
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtEMultiThreaded(
BlockSparseMatrix* block_diagonal) const {
auto transpose_block_structure = matrix_.transpose_block_structure();
CHECK(transpose_block_structure != nullptr);
auto block_diagonal_structure = block_diagonal->block_structure();
const double* values = matrix_.values();
double* values_diagonal = block_diagonal->mutable_values();
ParallelFor(
options_.context,
0,
num_col_blocks_e_,
options_.num_threads,
[values,
transpose_block_structure,
values_diagonal,
block_diagonal_structure](int col_block_id) {
int cell_position =
block_diagonal_structure->rows[col_block_id].cells[0].position;
double* cell_values = values_diagonal + cell_position;
int col_block_size =
transpose_block_structure->rows[col_block_id].block.size;
auto& cells = transpose_block_structure->rows[col_block_id].cells;
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
for (auto& c : cells) {
int row_block_size = transpose_block_structure->cols[c.block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
values + c.position, row_block_size, col_block_size,
values + c.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
},
e_cols_partition_);
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
if (options_.num_threads == 1) {
UpdateBlockDiagonalEtESingleThreaded(block_diagonal);
} else {
CHECK(options_.context != nullptr);
UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);
}
}
// Similar to the code in RightMultiplyAndAccumulateF, except instead of the
// matrix vector multiply it's an outer product.
//
// block_diagonal = block_diagonal(F'F)
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtFSingleThreaded(
BlockSparseMatrix* block_diagonal) const {
auto bs = matrix_.block_structure();
auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (int c = 1; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_size = bs->cols[col_block_id].size;
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
// clang-format off
MatrixTransposeMatrixMultiply
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
values + cells[c].position, row_block_size, col_block_size,
values + cells[c].position, row_block_size, col_block_size,
block_diagonal->mutable_values() + cell_position,
0, 0, col_block_size, col_block_size);
// clang-format on
}
}
for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size = bs->cols[col_block_id].size;
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
// clang-format off
MatrixTransposeMatrixMultiply
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
block_diagonal->mutable_values() + cell_position,
0, 0, col_block_size, col_block_size);
// clang-format on
}
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtFMultiThreaded(
BlockSparseMatrix* block_diagonal) const {
auto transpose_block_structure = matrix_.transpose_block_structure();
CHECK(transpose_block_structure != nullptr);
auto block_diagonal_structure = block_diagonal->block_structure();
const double* values = matrix_.values();
double* values_diagonal = block_diagonal->mutable_values();
const int num_col_blocks_e = num_col_blocks_e_;
const int num_row_blocks_e = num_row_blocks_e_;
ParallelFor(
options_.context,
num_col_blocks_e_,
num_col_blocks_e + num_col_blocks_f_,
options_.num_threads,
[transpose_block_structure,
block_diagonal_structure,
num_col_blocks_e,
num_row_blocks_e,
values,
values_diagonal](int col_block_id) {
const int col_block_size =
transpose_block_structure->rows[col_block_id].block.size;
const int diagonal_block_id = col_block_id - num_col_blocks_e;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
double* cell_values = values_diagonal + cell_position;
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
auto& cells = transpose_block_structure->rows[col_block_id].cells;
const int num_cells = cells.size();
int i = 0;
for (; i < num_cells; ++i) {
auto& cell = cells[i];
const int row_block_id = cell.block_id;
if (row_block_id >= num_row_blocks_e) break;
const int row_block_size =
transpose_block_structure->cols[row_block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
for (; i < num_cells; ++i) {
auto& cell = cells[i];
const int row_block_id = cell.block_id;
const int row_block_size =
transpose_block_structure->cols[row_block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
},
f_cols_partition_);
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
if (options_.num_threads == 1) {
UpdateBlockDiagonalFtFSingleThreaded(block_diagonal);
} else {
CHECK(options_.context != nullptr);
UpdateBlockDiagonalFtFMultiThreaded(block_diagonal);
}
}
} // namespace ceres::internal