| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2017 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: sameeragarwal@google.com (Sameer Agarwal) |
| |
| #include "ceres/inner_product_computer.h" |
| |
| #include <algorithm> |
| |
| namespace ceres { |
| namespace internal { |
| namespace { |
| |
| // Compute the product (in MATLAB notation) |
| // |
| // c(0:a_cols, 0:b_cols) = a' * b |
| // |
| // Where: |
| // |
| // a is ab_rows x a_cols |
| // b is ab_rows x b_cols |
| // c is a_cos x c_col_stride |
| // |
| // a, b and c are row-major matrices. |
| // |
| // Performance note: |
| // ---------------- |
| // |
| // Technically this function is a repeat of a similarly named function |
| // in small_blas.h but its performance is considerably better than |
| // that of the version there due to the way it accesses memory. |
| // |
| // TODO(sameeragarwal): Measure and tune the performance of |
| // small_blas.h based on the insights gained here. |
| EIGEN_STRONG_INLINE void MatrixTransposeMatrixMultiply(const int ab_rows, |
| const double* a, |
| const int a_cols, |
| const double* b, |
| const int b_cols, |
| double* c, |
| int c_cols) { |
| // Compute c as the sum of ab_rows, rank 1 outer products of the |
| // corresponding rows of a and b. |
| for (int r = 0; r < ab_rows; ++r) { |
| double* c_r = c; |
| for (int i1 = 0; i1 < a_cols; ++i1) { |
| const double a_v = a[i1]; |
| for (int i2 = 0; i2 < b_cols; ++i2) { |
| c_r[i2] += a_v * b[i2]; |
| } |
| c_r += c_cols; |
| } |
| a += a_cols; |
| b += b_cols; |
| } |
| } |
| |
| } // namespace |
| |
| // Create the CompressedRowSparseMatrix matrix that will contain the |
| // inner product. |
| // |
| // storage_type controls whether the result matrix contains the upper |
| // or the lower triangular part of the product. |
| // |
| // num_nonzeros is the number of non-zeros in the result matrix. |
| CompressedRowSparseMatrix* InnerProductComputer::CreateResultMatrix( |
| const CompressedRowSparseMatrix::StorageType storage_type, |
| const int num_nonzeros) { |
| CompressedRowSparseMatrix* matrix = |
| new CompressedRowSparseMatrix(m_.num_cols(), m_.num_cols(), num_nonzeros); |
| matrix->set_storage_type(storage_type); |
| |
| const CompressedRowBlockStructure* bs = m_.block_structure(); |
| const std::vector<Block>& blocks = bs->cols; |
| matrix->mutable_row_blocks()->resize(blocks.size()); |
| matrix->mutable_col_blocks()->resize(blocks.size()); |
| for (int i = 0; i < blocks.size(); ++i) { |
| (*(matrix->mutable_row_blocks()))[i] = blocks[i].size; |
| (*(matrix->mutable_col_blocks()))[i] = blocks[i].size; |
| } |
| |
| return matrix; |
| } |
| |
| // Given the set of product terms in the inner product, return the |
| // total number of non-zeros in the result and for each row block of |
| // the result matrix, compute the number of non-zeros in any one row |
| // of the row block. |
| int InnerProductComputer::ComputeNonzeros( |
| const std::vector<InnerProductComputer::ProductTerm>& product_terms, |
| std::vector<int>* row_nnz) { |
| const CompressedRowBlockStructure* bs = m_.block_structure(); |
| const std::vector<Block>& blocks = bs->cols; |
| |
| row_nnz->resize(blocks.size()); |
| std::fill(row_nnz->begin(), row_nnz->end(), 0); |
| |
| // First product term. |
| (*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size; |
| int num_nonzeros = |
| blocks[product_terms[0].row].size * blocks[product_terms[0].col].size; |
| |
| // Remaining product terms. |
| for (int i = 1; i < product_terms.size(); ++i) { |
| const ProductTerm& previous = product_terms[i - 1]; |
| const ProductTerm& current = product_terms[i]; |
| |
| // Each (row, col) block counts only once. |
| // This check depends on product sorted on (row, col). |
| if (current.row != previous.row || current.col != previous.col) { |
| (*row_nnz)[current.row] += blocks[current.col].size; |
| num_nonzeros += blocks[current.row].size * blocks[current.col].size; |
| } |
| } |
| |
| return num_nonzeros; |
| } |
| |
| InnerProductComputer::InnerProductComputer(const BlockSparseMatrix& m, |
| const int start_row_block, |
| const int end_row_block) |
| : m_(m), start_row_block_(start_row_block), end_row_block_(end_row_block) {} |
| |
| // Compute the sparsity structure of the product m.transpose() * m |
| // and create a CompressedRowSparseMatrix corresponding to it. |
| // |
| // Also compute the "program" vector, which for every term in the |
| // block outer product provides the information for the entry in the |
| // values array of the result matrix where it should be accumulated. |
| // |
| // Since the entries of the program are the same for rows with the |
| // same sparsity structure, the program only stores the result for one |
| // row per row block. The Compute function reuses this information for |
| // each row in the row block. |
| // |
| // product_storage_type controls the form of the output matrix. It |
| // can be LOWER_TRIANGULAR or UPPER_TRIANGULAR. |
| InnerProductComputer* InnerProductComputer::Create( |
| const BlockSparseMatrix& m, |
| CompressedRowSparseMatrix::StorageType product_storage_type) { |
| return InnerProductComputer::Create( |
| m, 0, m.block_structure()->rows.size(), product_storage_type); |
| } |
| |
| InnerProductComputer* InnerProductComputer::Create( |
| const BlockSparseMatrix& m, |
| const int start_row_block, |
| const int end_row_block, |
| CompressedRowSparseMatrix::StorageType product_storage_type) { |
| CHECK(product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR || |
| product_storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR); |
| CHECK_GT(m.num_nonzeros(), 0) |
| << "Congratulations, you found a bug in Ceres. Please report it."; |
| InnerProductComputer* inner_product_computer = |
| new InnerProductComputer(m, start_row_block, end_row_block); |
| inner_product_computer->Init(product_storage_type); |
| return inner_product_computer; |
| } |
| |
| void InnerProductComputer::Init( |
| const CompressedRowSparseMatrix::StorageType product_storage_type) { |
| std::vector<InnerProductComputer::ProductTerm> product_terms; |
| const CompressedRowBlockStructure* bs = m_.block_structure(); |
| |
| // Give input matrix m in Block Sparse format |
| // (row_block, col_block) |
| // represent each block multiplication |
| // (row_block, col_block1)' X (row_block, col_block2) |
| // by its product term: |
| // (col_block1, col_block2, index) |
| for (int row_block = start_row_block_; row_block < end_row_block_; |
| ++row_block) { |
| const CompressedRow& row = bs->rows[row_block]; |
| for (int c1 = 0; c1 < row.cells.size(); ++c1) { |
| const Cell& cell1 = row.cells[c1]; |
| int c2_begin, c2_end; |
| if (product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) { |
| c2_begin = 0; |
| c2_end = c1 + 1; |
| } else { |
| c2_begin = c1; |
| c2_end = row.cells.size(); |
| } |
| |
| for (int c2 = c2_begin; c2 < c2_end; ++c2) { |
| const Cell& cell2 = row.cells[c2]; |
| product_terms.push_back(InnerProductComputer::ProductTerm( |
| cell1.block_id, cell2.block_id, product_terms.size())); |
| } |
| } |
| } |
| |
| std::sort(product_terms.begin(), product_terms.end()); |
| ComputeOffsetsAndCreateResultMatrix(product_storage_type, product_terms); |
| } |
| |
| void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix( |
| const CompressedRowSparseMatrix::StorageType product_storage_type, |
| const std::vector<InnerProductComputer::ProductTerm>& product_terms) { |
| const std::vector<Block>& col_blocks = m_.block_structure()->cols; |
| |
| std::vector<int> row_block_nnz; |
| const int num_nonzeros = ComputeNonzeros(product_terms, &row_block_nnz); |
| |
| result_.reset(CreateResultMatrix(product_storage_type, num_nonzeros)); |
| |
| // Populate the row non-zero counts in the result matrix. |
| int* crsm_rows = result_->mutable_rows(); |
| crsm_rows[0] = 0; |
| for (int i = 0; i < col_blocks.size(); ++i) { |
| for (int j = 0; j < col_blocks[i].size; ++j, ++crsm_rows) { |
| *(crsm_rows + 1) = *crsm_rows + row_block_nnz[i]; |
| } |
| } |
| |
| // The following macro FILL_CRSM_COL_BLOCK is key to understanding |
| // how this class works. |
| // |
| // It does two things. |
| // |
| // Sets the value for the current term in the result_offsets_ array |
| // and populates the cols array of the result matrix. |
| // |
| // row_block and col_block as the names imply, refer to the row and |
| // column blocks of the current term. |
| // |
| // row_nnz is the number of nonzeros in the result_matrix at the |
| // beginning of the first row of row_block. |
| // |
| // col_nnz is the number of nonzeros in the first row of the row |
| // block that occur before the current column block, i.e. this is |
| // sum of the sizes of all the column blocks in this row block that |
| // came before this column block. |
| // |
| // Given these two numbers and the total number of nonzeros in this |
| // row (nnz_in_row), we can now populate the cols array as follows: |
| // |
| // nnz + j * nnz_in_row is the beginning of the j^th row. |
| // |
| // nnz + j * nnz_in_row + col_nnz is the beginning of the column |
| // block in the j^th row. |
| // |
| // nnz + j * nnz_in_row + col_nnz + k is then the j^th row and the |
| // k^th column of the product block, whose value is |
| // |
| // col_blocks[col_block].position + k, which is the column number of |
| // the k^th column of the current column block. |
| #define FILL_CRSM_COL_BLOCK \ |
| const int row_block = current->row; \ |
| const int col_block = current->col; \ |
| const int nnz_in_row = row_block_nnz[row_block]; \ |
| int* crsm_cols = result_->mutable_cols(); \ |
| result_offsets_[current->index] = nnz + col_nnz; \ |
| for (int j = 0; j < col_blocks[row_block].size; ++j) { \ |
| for (int k = 0; k < col_blocks[col_block].size; ++k) { \ |
| crsm_cols[nnz + j * nnz_in_row + col_nnz + k] = \ |
| col_blocks[col_block].position + k; \ |
| } \ |
| } |
| |
| result_offsets_.resize(product_terms.size()); |
| int col_nnz = 0; |
| int nnz = 0; |
| |
| // Process the first term. |
| const InnerProductComputer::ProductTerm* current = &product_terms[0]; |
| FILL_CRSM_COL_BLOCK; |
| |
| // Process the rest of the terms. |
| for (int i = 1; i < product_terms.size(); ++i) { |
| current = &product_terms[i]; |
| const InnerProductComputer::ProductTerm* previous = &product_terms[i - 1]; |
| |
| // If the current term is the same as the previous term, then it |
| // stores its product at the same location as the previous term. |
| if (previous->row == current->row && previous->col == current->col) { |
| result_offsets_[current->index] = result_offsets_[previous->index]; |
| continue; |
| } |
| |
| if (previous->row == current->row) { |
| // if the current and previous terms are in the same row block, |
| // then they differ in the column block, in which case advance |
| // col_nnz by the column size of the prevous term. |
| col_nnz += col_blocks[previous->col].size; |
| } else { |
| // If we have moved to a new row-block , then col_nnz is zero, |
| // and nnz is set to the beginning of the row block. |
| col_nnz = 0; |
| nnz += row_block_nnz[previous->row] * col_blocks[previous->row].size; |
| } |
| |
| FILL_CRSM_COL_BLOCK; |
| } |
| } |
| |
| // Use the results_offsets_ array to numerically compute the product |
| // m' * m and store it in result_. |
| // |
| // TODO(sameeragarwal): Multithreading support. |
| void InnerProductComputer::Compute() { |
| const double* m_values = m_.values(); |
| const CompressedRowBlockStructure* bs = m_.block_structure(); |
| |
| const CompressedRowSparseMatrix::StorageType storage_type = |
| result_->storage_type(); |
| result_->SetZero(); |
| double* values = result_->mutable_values(); |
| const int* rows = result_->rows(); |
| int cursor = 0; |
| |
| // Iterate row blocks. |
| for (int r = start_row_block_; r < end_row_block_; ++r) { |
| const CompressedRow& m_row = bs->rows[r]; |
| for (int c1 = 0; c1 < m_row.cells.size(); ++c1) { |
| const Cell& cell1 = m_row.cells[c1]; |
| const int c1_size = bs->cols[cell1.block_id].size; |
| const int row_nnz = rows[bs->cols[cell1.block_id].position + 1] - |
| rows[bs->cols[cell1.block_id].position]; |
| |
| int c2_begin, c2_end; |
| if (storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) { |
| c2_begin = 0; |
| c2_end = c1 + 1; |
| } else { |
| c2_begin = c1; |
| c2_end = m_row.cells.size(); |
| } |
| |
| for (int c2 = c2_begin; c2 < c2_end; ++c2, ++cursor) { |
| const Cell& cell2 = m_row.cells[c2]; |
| const int c2_size = bs->cols[cell2.block_id].size; |
| MatrixTransposeMatrixMultiply(m_row.block.size, |
| m_values + cell1.position, c1_size, |
| m_values + cell2.position, c2_size, |
| values + result_offsets_[cursor], |
| row_nnz); |
| } |
| } |
| } |
| |
| CHECK_EQ(cursor, result_offsets_.size()); |
| } |
| |
| } // namespace internal |
| } // namespace ceres |