|  | // Ceres Solver - A fast non-linear least squares minimizer | 
|  | // Copyright 2023 Google Inc. All rights reserved. | 
|  | // http://ceres-solver.org/ | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | // * Redistributions of source code must retain the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer. | 
|  | // * Redistributions in binary form must reproduce the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer in the documentation | 
|  | //   and/or other materials provided with the distribution. | 
|  | // * Neither the name of Google Inc. nor the names of its contributors may be | 
|  | //   used to endorse or promote products derived from this software without | 
|  | //   specific prior written permission. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  | // | 
|  | // Author: sameeragarwal@google.com (Sameer Agarwal) | 
|  |  | 
|  | #include "ceres/inner_product_computer.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <memory> | 
|  |  | 
|  | #include "absl/log/check.h" | 
|  | #include "ceres/small_blas.h" | 
|  |  | 
|  | namespace ceres::internal { | 
|  |  | 
|  | // Create the CompressedRowSparseMatrix matrix that will contain the | 
|  | // inner product. | 
|  | // | 
|  | // storage_type controls whether the result matrix contains the upper | 
|  | // or the lower triangular part of the product. | 
|  | // | 
|  | // num_nonzeros is the number of non-zeros in the result matrix. | 
|  | std::unique_ptr<CompressedRowSparseMatrix> | 
|  | InnerProductComputer::CreateResultMatrix( | 
|  | const CompressedRowSparseMatrix::StorageType storage_type, | 
|  | const int num_nonzeros) { | 
|  | auto matrix = std::make_unique<CompressedRowSparseMatrix>( | 
|  | m_.num_cols(), m_.num_cols(), num_nonzeros); | 
|  | matrix->set_storage_type(storage_type); | 
|  | const CompressedRowBlockStructure* bs = m_.block_structure(); | 
|  | *matrix->mutable_row_blocks() = bs->cols; | 
|  | *matrix->mutable_col_blocks() = bs->cols; | 
|  | return matrix; | 
|  | } | 
|  |  | 
|  | // Given the set of product terms in the inner product, return the | 
|  | // total number of non-zeros in the result and for each row block of | 
|  | // the result matrix, compute the number of non-zeros in any one row | 
|  | // of the row block. | 
|  | int InnerProductComputer::ComputeNonzeros( | 
|  | const std::vector<InnerProductComputer::ProductTerm>& product_terms, | 
|  | std::vector<int>* row_nnz) { | 
|  | const CompressedRowBlockStructure* bs = m_.block_structure(); | 
|  | const std::vector<Block>& blocks = bs->cols; | 
|  |  | 
|  | row_nnz->resize(blocks.size()); | 
|  | std::fill(row_nnz->begin(), row_nnz->end(), 0); | 
|  |  | 
|  | if (product_terms.empty()) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // First product term. | 
|  | (*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size; | 
|  | int num_nonzeros = | 
|  | blocks[product_terms[0].row].size * blocks[product_terms[0].col].size; | 
|  |  | 
|  | // Remaining product terms. | 
|  | for (int i = 1; i < product_terms.size(); ++i) { | 
|  | const ProductTerm& previous = product_terms[i - 1]; | 
|  | const ProductTerm& current = product_terms[i]; | 
|  |  | 
|  | // Each (row, col) block counts only once. | 
|  | // This check depends on product sorted on (row, col). | 
|  | if (current.row != previous.row || current.col != previous.col) { | 
|  | (*row_nnz)[current.row] += blocks[current.col].size; | 
|  | num_nonzeros += blocks[current.row].size * blocks[current.col].size; | 
|  | } | 
|  | } | 
|  |  | 
|  | return num_nonzeros; | 
|  | } | 
|  |  | 
|  | InnerProductComputer::InnerProductComputer(const BlockSparseMatrix& m, | 
|  | const int start_row_block, | 
|  | const int end_row_block) | 
|  | : m_(m), start_row_block_(start_row_block), end_row_block_(end_row_block) {} | 
|  |  | 
|  | // Compute the sparsity structure of the product m.transpose() * m | 
|  | // and create a CompressedRowSparseMatrix corresponding to it. | 
|  | // | 
|  | // Also compute the "program" vector, which for every term in the | 
|  | // block outer product provides the information for the entry in the | 
|  | // values array of the result matrix where it should be accumulated. | 
|  | // | 
|  | // Since the entries of the program are the same for rows with the | 
|  | // same sparsity structure, the program only stores the result for one | 
|  | // row per row block. The Compute function reuses this information for | 
|  | // each row in the row block. | 
|  | // | 
|  | // product_storage_type controls the form of the output matrix. It | 
|  | // can be LOWER_TRIANGULAR or UPPER_TRIANGULAR. | 
|  | std::unique_ptr<InnerProductComputer> InnerProductComputer::Create( | 
|  | const BlockSparseMatrix& m, | 
|  | CompressedRowSparseMatrix::StorageType product_storage_type) { | 
|  | return InnerProductComputer::Create( | 
|  | m, 0, m.block_structure()->rows.size(), product_storage_type); | 
|  | } | 
|  |  | 
|  | std::unique_ptr<InnerProductComputer> InnerProductComputer::Create( | 
|  | const BlockSparseMatrix& m, | 
|  | const int start_row_block, | 
|  | const int end_row_block, | 
|  | CompressedRowSparseMatrix::StorageType product_storage_type) { | 
|  | CHECK(product_storage_type == | 
|  | CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR || | 
|  | product_storage_type == | 
|  | CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); | 
|  | CHECK_GT(m.num_nonzeros(), 0) | 
|  | << "Congratulations, you found a bug in Ceres. Please report it."; | 
|  | std::unique_ptr<InnerProductComputer> inner_product_computer( | 
|  | new InnerProductComputer(m, start_row_block, end_row_block)); | 
|  | inner_product_computer->Init(product_storage_type); | 
|  | return inner_product_computer; | 
|  | } | 
|  |  | 
|  | void InnerProductComputer::Init( | 
|  | const CompressedRowSparseMatrix::StorageType product_storage_type) { | 
|  | std::vector<InnerProductComputer::ProductTerm> product_terms; | 
|  | const CompressedRowBlockStructure* bs = m_.block_structure(); | 
|  |  | 
|  | // Give input matrix m in Block Sparse format | 
|  | //     (row_block, col_block) | 
|  | // represent each block multiplication | 
|  | //     (row_block, col_block1)' X (row_block, col_block2) | 
|  | // by its product term: | 
|  | //     (col_block1, col_block2, index) | 
|  | for (int row_block = start_row_block_; row_block < end_row_block_; | 
|  | ++row_block) { | 
|  | const CompressedRow& row = bs->rows[row_block]; | 
|  | for (int c1 = 0; c1 < row.cells.size(); ++c1) { | 
|  | const Cell& cell1 = row.cells[c1]; | 
|  | int c2_begin, c2_end; | 
|  | if (product_storage_type == | 
|  | CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) { | 
|  | c2_begin = 0; | 
|  | c2_end = c1 + 1; | 
|  | } else { | 
|  | c2_begin = c1; | 
|  | c2_end = row.cells.size(); | 
|  | } | 
|  |  | 
|  | for (int c2 = c2_begin; c2 < c2_end; ++c2) { | 
|  | const Cell& cell2 = row.cells[c2]; | 
|  | product_terms.emplace_back( | 
|  | cell1.block_id, cell2.block_id, product_terms.size()); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | std::sort(product_terms.begin(), product_terms.end()); | 
|  | ComputeOffsetsAndCreateResultMatrix(product_storage_type, product_terms); | 
|  | } | 
|  |  | 
|  | void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix( | 
|  | const CompressedRowSparseMatrix::StorageType product_storage_type, | 
|  | const std::vector<InnerProductComputer::ProductTerm>& product_terms) { | 
|  | const std::vector<Block>& col_blocks = m_.block_structure()->cols; | 
|  |  | 
|  | std::vector<int> row_block_nnz; | 
|  | const int num_nonzeros = ComputeNonzeros(product_terms, &row_block_nnz); | 
|  |  | 
|  | result_ = CreateResultMatrix(product_storage_type, num_nonzeros); | 
|  |  | 
|  | // Populate the row non-zero counts in the result matrix. | 
|  | int* crsm_rows = result_->mutable_rows(); | 
|  | crsm_rows[0] = 0; | 
|  | for (int i = 0; i < col_blocks.size(); ++i) { | 
|  | for (int j = 0; j < col_blocks[i].size; ++j, ++crsm_rows) { | 
|  | *(crsm_rows + 1) = *crsm_rows + row_block_nnz[i]; | 
|  | } | 
|  | } | 
|  | result_offsets_.resize(product_terms.size()); | 
|  | if (num_nonzeros == 0) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // The following macro FILL_CRSM_COL_BLOCK is key to understanding | 
|  | // how this class works. | 
|  | // | 
|  | // It does two things. | 
|  | // | 
|  | // Sets the value for the current term in the result_offsets_ array | 
|  | // and populates the cols array of the result matrix. | 
|  | // | 
|  | // row_block and col_block as the names imply, refer to the row and | 
|  | // column blocks of the current term. | 
|  | // | 
|  | // row_nnz is the number of nonzeros in the result_matrix at the | 
|  | // beginning of the first row of row_block. | 
|  | // | 
|  | // col_nnz is the number of nonzeros in the first row of the row | 
|  | // block that occur before the current column block, i.e. this is | 
|  | // sum of the sizes of all the column blocks in this row block that | 
|  | // came before this column block. | 
|  | // | 
|  | // Given these two numbers and the total number of nonzeros in this | 
|  | // row (nnz_in_row), we can now populate the cols array as follows: | 
|  | // | 
|  | // nnz + j * nnz_in_row is the beginning of the j^th row. | 
|  | // | 
|  | // nnz + j * nnz_in_row + col_nnz is the beginning of the column | 
|  | // block in the j^th row. | 
|  | // | 
|  | // nnz + j * nnz_in_row + col_nnz + k is then the j^th row and the | 
|  | // k^th column of the product block, whose value is | 
|  | // | 
|  | // col_blocks[col_block].position + k, which is the column number of | 
|  | // the k^th column of the current column block. | 
|  | #define FILL_CRSM_COL_BLOCK                                \ | 
|  | const int row_block = current->row;                      \ | 
|  | const int col_block = current->col;                      \ | 
|  | const int nnz_in_row = row_block_nnz[row_block];         \ | 
|  | int* crsm_cols = result_->mutable_cols();                \ | 
|  | result_offsets_[current->index] = nnz + col_nnz;         \ | 
|  | for (int j = 0; j < col_blocks[row_block].size; ++j) {   \ | 
|  | for (int k = 0; k < col_blocks[col_block].size; ++k) { \ | 
|  | crsm_cols[nnz + j * nnz_in_row + col_nnz + k] =      \ | 
|  | col_blocks[col_block].position + k;              \ | 
|  | }                                                      \ | 
|  | } | 
|  |  | 
|  | int col_nnz = 0; | 
|  | int nnz = 0; | 
|  |  | 
|  | // Process the first term. | 
|  | const InnerProductComputer::ProductTerm* current = product_terms.data(); | 
|  | FILL_CRSM_COL_BLOCK; | 
|  |  | 
|  | // Process the rest of the terms. | 
|  | for (int i = 1; i < product_terms.size(); ++i) { | 
|  | current = &product_terms[i]; | 
|  | const InnerProductComputer::ProductTerm* previous = &product_terms[i - 1]; | 
|  |  | 
|  | // If the current term is the same as the previous term, then it | 
|  | // stores its product at the same location as the previous term. | 
|  | if (previous->row == current->row && previous->col == current->col) { | 
|  | result_offsets_[current->index] = result_offsets_[previous->index]; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (previous->row == current->row) { | 
|  | // if the current and previous terms are in the same row block, | 
|  | // then they differ in the column block, in which case advance | 
|  | // col_nnz by the column size of the previous term. | 
|  | col_nnz += col_blocks[previous->col].size; | 
|  | } else { | 
|  | // If we have moved to a new row-block , then col_nnz is zero, | 
|  | // and nnz is set to the beginning of the row block. | 
|  | col_nnz = 0; | 
|  | nnz += row_block_nnz[previous->row] * col_blocks[previous->row].size; | 
|  | } | 
|  |  | 
|  | FILL_CRSM_COL_BLOCK; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Use the results_offsets_ array to numerically compute the product | 
|  | // m' * m and store it in result_. | 
|  | // | 
|  | // TODO(sameeragarwal): Multithreading support. | 
|  | void InnerProductComputer::Compute() { | 
|  | const double* m_values = m_.values(); | 
|  | const CompressedRowBlockStructure* bs = m_.block_structure(); | 
|  |  | 
|  | const CompressedRowSparseMatrix::StorageType storage_type = | 
|  | result_->storage_type(); | 
|  | result_->SetZero(); | 
|  | double* values = result_->mutable_values(); | 
|  | const int* rows = result_->rows(); | 
|  | int cursor = 0; | 
|  |  | 
|  | // Iterate row blocks. | 
|  | for (int r = start_row_block_; r < end_row_block_; ++r) { | 
|  | const CompressedRow& m_row = bs->rows[r]; | 
|  | for (int c1 = 0; c1 < m_row.cells.size(); ++c1) { | 
|  | const Cell& cell1 = m_row.cells[c1]; | 
|  | const int c1_size = bs->cols[cell1.block_id].size; | 
|  | const int row_nnz = rows[bs->cols[cell1.block_id].position + 1] - | 
|  | rows[bs->cols[cell1.block_id].position]; | 
|  |  | 
|  | int c2_begin, c2_end; | 
|  | if (storage_type == | 
|  | CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) { | 
|  | c2_begin = 0; | 
|  | c2_end = c1 + 1; | 
|  | } else { | 
|  | c2_begin = c1; | 
|  | c2_end = m_row.cells.size(); | 
|  | } | 
|  |  | 
|  | for (int c2 = c2_begin; c2 < c2_end; ++c2, ++cursor) { | 
|  | const Cell& cell2 = m_row.cells[c2]; | 
|  | const int c2_size = bs->cols[cell2.block_id].size; | 
|  | // clang-format off | 
|  | MatrixTransposeMatrixMultiply<Eigen::Dynamic, Eigen::Dynamic, | 
|  | Eigen::Dynamic, Eigen::Dynamic, 1>( | 
|  | m_values + cell1.position, | 
|  | m_row.block.size, c1_size, | 
|  | m_values + cell2.position, | 
|  | m_row.block.size, c2_size, | 
|  | values + result_offsets_[cursor], | 
|  | 0, 0, c1_size, row_nnz); | 
|  | // clang-format on | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | CHECK_EQ(cursor, result_offsets_.size()); | 
|  | } | 
|  |  | 
|  | }  // namespace ceres::internal |