|  | // Ceres Solver - A fast non-linear least squares minimizer | 
|  | // Copyright 2023 Google Inc. All rights reserved. | 
|  | // http://ceres-solver.org/ | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | // * Redistributions of source code must retain the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer. | 
|  | // * Redistributions in binary form must reproduce the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer in the documentation | 
|  | //   and/or other materials provided with the distribution. | 
|  | // * Neither the name of Google Inc. nor the names of its contributors may be | 
|  | //   used to endorse or promote products derived from this software without | 
|  | //   specific prior written permission. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  | // | 
|  | // Author: sameeragarwal@google.com (Sameer Agarwal) | 
|  |  | 
|  | #include "ceres/implicit_schur_complement.h" | 
|  |  | 
|  | #include "Eigen/Dense" | 
|  | #include "ceres/block_sparse_matrix.h" | 
|  | #include "ceres/block_structure.h" | 
|  | #include "ceres/internal/eigen.h" | 
|  | #include "ceres/linear_solver.h" | 
|  | #include "ceres/parallel_for.h" | 
|  | #include "ceres/parallel_vector_ops.h" | 
|  | #include "ceres/types.h" | 
|  | #include "glog/logging.h" | 
|  |  | 
|  | namespace ceres::internal { | 
|  |  | 
|  | ImplicitSchurComplement::ImplicitSchurComplement( | 
|  | const LinearSolver::Options& options) | 
|  | : options_(options) {} | 
|  |  | 
|  | void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, | 
|  | const double* D, | 
|  | const double* b) { | 
|  | // Since initialization is reasonably heavy, perhaps we can save on | 
|  | // constructing a new object everytime. | 
|  | if (A_ == nullptr) { | 
|  | A_ = PartitionedMatrixViewBase::Create(options_, A); | 
|  | } | 
|  |  | 
|  | D_ = D; | 
|  | b_ = b; | 
|  |  | 
|  | compute_ftf_inverse_ = | 
|  | options_.use_spse_initialization || | 
|  | options_.preconditioner_type == JACOBI || | 
|  | options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION; | 
|  |  | 
|  | // Initialize temporary storage and compute the block diagonals of | 
|  | // E'E and F'E. | 
|  | if (block_diagonal_EtE_inverse_ == nullptr) { | 
|  | block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE(); | 
|  | if (compute_ftf_inverse_) { | 
|  | block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF(); | 
|  | } | 
|  | rhs_.resize(A_->num_cols_f()); | 
|  | rhs_.setZero(); | 
|  | tmp_rows_.resize(A_->num_rows()); | 
|  | tmp_e_cols_.resize(A_->num_cols_e()); | 
|  | tmp_e_cols_2_.resize(A_->num_cols_e()); | 
|  | tmp_f_cols_.resize(A_->num_cols_f()); | 
|  | } else { | 
|  | A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get()); | 
|  | if (compute_ftf_inverse_) { | 
|  | A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get()); | 
|  | } | 
|  | } | 
|  |  | 
|  | // The block diagonals of the augmented linear system contain | 
|  | // contributions from the diagonal D if it is non-null. Add that to | 
|  | // the block diagonals and invert them. | 
|  | AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get()); | 
|  | if (compute_ftf_inverse_) { | 
|  | AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(), | 
|  | block_diagonal_FtF_inverse_.get()); | 
|  | } | 
|  |  | 
|  | // Compute the RHS of the Schur complement system. | 
|  | UpdateRhs(); | 
|  | } | 
|  |  | 
|  | // Evaluate the product | 
|  | // | 
|  | //   Sx = [F'F - F'E (E'E)^-1 E'F]x | 
|  | // | 
|  | // By breaking it down into individual matrix vector products | 
|  | // involving the matrices E and F. This is implemented using a | 
|  | // PartitionedMatrixView of the input matrix A. | 
|  | void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x, | 
|  | double* y) const { | 
|  | // y1 = F x | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); | 
|  | A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); | 
|  |  | 
|  | // y2 = E' y1 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); | 
|  | A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); | 
|  |  | 
|  | // y3 = -(E'E)^-1 y2 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); | 
|  | block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), | 
|  | tmp_e_cols_2_.data(), | 
|  | options_.context, | 
|  | options_.num_threads); | 
|  |  | 
|  | ParallelAssign( | 
|  | options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_); | 
|  |  | 
|  | // y1 = y1 + E y3 | 
|  | A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); | 
|  |  | 
|  | // y5 = D * x | 
|  | if (D_ != nullptr) { | 
|  | ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols()); | 
|  | VectorRef y_cols(y, num_cols()); | 
|  | ParallelAssign( | 
|  | options_.context, | 
|  | options_.num_threads, | 
|  | y_cols, | 
|  | (Dref.array().square() * ConstVectorRef(x, num_cols()).array())); | 
|  | } else { | 
|  | ParallelSetZero(options_.context, options_.num_threads, y, num_cols()); | 
|  | } | 
|  |  | 
|  | // y = y5 + F' y1 | 
|  | A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y); | 
|  | } | 
|  |  | 
|  | void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate( | 
|  | const double* x, double* y) const { | 
|  | CHECK(compute_ftf_inverse_); | 
|  | // y1 = F x | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); | 
|  | A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); | 
|  |  | 
|  | // y2 = E' y1 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); | 
|  | A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); | 
|  |  | 
|  | // y3 = (E'E)^-1 y2 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); | 
|  | block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), | 
|  | tmp_e_cols_2_.data(), | 
|  | options_.context, | 
|  | options_.num_threads); | 
|  | // y1 = E y3 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); | 
|  | A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); | 
|  |  | 
|  | // y4 = F' y1 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_); | 
|  | A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data()); | 
|  |  | 
|  | // y += (F'F)^-1 y4 | 
|  | block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate( | 
|  | tmp_f_cols_.data(), y, options_.context, options_.num_threads); | 
|  | } | 
|  |  | 
|  | // Given a block diagonal matrix and an optional array of diagonal | 
|  | // entries D, add them to the diagonal of the matrix and compute the | 
|  | // inverse of each diagonal block. | 
|  | void ImplicitSchurComplement::AddDiagonalAndInvert( | 
|  | const double* D, BlockSparseMatrix* block_diagonal) { | 
|  | const CompressedRowBlockStructure* block_diagonal_structure = | 
|  | block_diagonal->block_structure(); | 
|  | ParallelFor(options_.context, | 
|  | 0, | 
|  | block_diagonal_structure->rows.size(), | 
|  | options_.num_threads, | 
|  | [block_diagonal_structure, D, block_diagonal](int row_block_id) { | 
|  | auto& row = block_diagonal_structure->rows[row_block_id]; | 
|  | const int row_block_pos = row.block.position; | 
|  | const int row_block_size = row.block.size; | 
|  | const Cell& cell = row.cells[0]; | 
|  | MatrixRef m(block_diagonal->mutable_values() + cell.position, | 
|  | row_block_size, | 
|  | row_block_size); | 
|  |  | 
|  | if (D != nullptr) { | 
|  | ConstVectorRef d(D + row_block_pos, row_block_size); | 
|  | m += d.array().square().matrix().asDiagonal(); | 
|  | } | 
|  |  | 
|  | m = m.selfadjointView<Eigen::Upper>().llt().solve( | 
|  | Matrix::Identity(row_block_size, row_block_size)); | 
|  | }); | 
|  | } | 
|  |  | 
|  | // Similar to RightMultiplyAndAccumulate, use the block structure of the matrix | 
|  | // A to compute y = (E'E)^-1 (E'b - E'F x). | 
|  | void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) { | 
|  | const int num_cols_e = A_->num_cols_e(); | 
|  | const int num_cols_f = A_->num_cols_f(); | 
|  | const int num_cols = A_->num_cols(); | 
|  | const int num_rows = A_->num_rows(); | 
|  |  | 
|  | // y1 = F x | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); | 
|  | A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); | 
|  |  | 
|  | // y2 = b - y1 | 
|  | ParallelAssign(options_.context, | 
|  | options_.num_threads, | 
|  | tmp_rows_, | 
|  | ConstVectorRef(b_, num_rows) - tmp_rows_); | 
|  |  | 
|  | // y3 = E' y2 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); | 
|  | A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); | 
|  |  | 
|  | // y = (E'E)^-1 y3 | 
|  | ParallelSetZero(options_.context, options_.num_threads, y, num_cols); | 
|  | block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate( | 
|  | tmp_e_cols_.data(), y, options_.context, options_.num_threads); | 
|  |  | 
|  | // The full solution vector y has two blocks. The first block of | 
|  | // variables corresponds to the eliminated variables, which we just | 
|  | // computed via back substitution. The second block of variables | 
|  | // corresponds to the Schur complement system, so we just copy those | 
|  | // values from the solution to the Schur complement. | 
|  | VectorRef y_cols_f(y + num_cols_e, num_cols_f); | 
|  | ParallelAssign(options_.context, | 
|  | options_.num_threads, | 
|  | y_cols_f, | 
|  | ConstVectorRef(x, num_cols_f)); | 
|  | } | 
|  |  | 
|  | // Compute the RHS of the Schur complement system. | 
|  | // | 
|  | // rhs = F'b - F'E (E'E)^-1 E'b | 
|  | // | 
|  | // Like BackSubstitute, we use the block structure of A to implement | 
|  | // this using a series of matrix vector products. | 
|  | void ImplicitSchurComplement::UpdateRhs() { | 
|  | // y1 = E'b | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); | 
|  | A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data()); | 
|  |  | 
|  | // y2 = (E'E)^-1 y1 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); | 
|  | block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), | 
|  | tmp_e_cols_2_.data(), | 
|  | options_.context, | 
|  | options_.num_threads); | 
|  |  | 
|  | // y3 = E y2 | 
|  | ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); | 
|  | A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); | 
|  |  | 
|  | // y3 = b - y3 | 
|  | ParallelAssign(options_.context, | 
|  | options_.num_threads, | 
|  | tmp_rows_, | 
|  | ConstVectorRef(b_, A_->num_rows()) - tmp_rows_); | 
|  |  | 
|  | // rhs = F' y3 | 
|  | ParallelSetZero(options_.context, options_.num_threads, rhs_); | 
|  | A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data()); | 
|  | } | 
|  |  | 
|  | }  // namespace ceres::internal |