blob: 053a9e57912ada4482dab6e821b6045046bfc4cd [file] [log] [blame]
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#include "ceres/implicit_schur_complement.h"
#include "Eigen/Dense"
#include "absl/log/check.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/types.h"
namespace ceres::internal {
ImplicitSchurComplement::ImplicitSchurComplement(
const LinearSolver::Options& options)
: options_(options) {}
void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
const double* D,
const double* b) {
// Since initialization is reasonably heavy, perhaps we can save on
// constructing a new object everytime.
if (A_ == nullptr) {
A_ = PartitionedMatrixViewBase::Create(options_, A);
}
D_ = D;
b_ = b;
compute_ftf_inverse_ =
options_.use_spse_initialization ||
options_.preconditioner_type == JACOBI ||
options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION;
// Initialize temporary storage and compute the block diagonals of
// E'E and F'E.
if (block_diagonal_EtE_inverse_ == nullptr) {
block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE();
if (compute_ftf_inverse_) {
block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF();
}
rhs_.resize(A_->num_cols_f());
rhs_.setZero();
tmp_rows_.resize(A_->num_rows());
tmp_e_cols_.resize(A_->num_cols_e());
tmp_e_cols_2_.resize(A_->num_cols_e());
tmp_f_cols_.resize(A_->num_cols_f());
} else {
A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get());
if (compute_ftf_inverse_) {
A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get());
}
}
// The block diagonals of the augmented linear system contain
// contributions from the diagonal D if it is non-null. Add that to
// the block diagonals and invert them.
AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get());
if (compute_ftf_inverse_) {
AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(),
block_diagonal_FtF_inverse_.get());
}
// Compute the RHS of the Schur complement system.
UpdateRhs();
}
// Evaluate the product
//
// Sx = [F'F - F'E (E'E)^-1 E'F]x
//
// By breaking it down into individual matrix vector products
// involving the matrices E and F. This is implemented using a
// PartitionedMatrixView of the input matrix A.
void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x,
double* y) const {
// y1 = F x
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = E' y1
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y3 = -(E'E)^-1 y2
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
ParallelAssign(
options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_);
// y1 = y1 + E y3
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y5 = D * x
if (D_ != nullptr) {
ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols());
VectorRef y_cols(y, num_cols());
ParallelAssign(
options_.context,
options_.num_threads,
y_cols,
(Dref.array().square() * ConstVectorRef(x, num_cols()).array()));
} else {
ParallelSetZero(options_.context, options_.num_threads, y, num_cols());
}
// y = y5 + F' y1
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y);
}
void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate(
const double* x, double* y) const {
CHECK(compute_ftf_inverse_);
// y1 = F x
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = E' y1
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y3 = (E'E)^-1 y2
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
// y1 = E y3
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y4 = F' y1
ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_);
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data());
// y += (F'F)^-1 y4
block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate(
tmp_f_cols_.data(), y, options_.context, options_.num_threads);
}
// Given a block diagonal matrix and an optional array of diagonal
// entries D, add them to the diagonal of the matrix and compute the
// inverse of each diagonal block.
void ImplicitSchurComplement::AddDiagonalAndInvert(
const double* D, BlockSparseMatrix* block_diagonal) {
const CompressedRowBlockStructure* block_diagonal_structure =
block_diagonal->block_structure();
ParallelFor(options_.context,
0,
block_diagonal_structure->rows.size(),
options_.num_threads,
[block_diagonal_structure, D, block_diagonal](int row_block_id) {
auto& row = block_diagonal_structure->rows[row_block_id];
const int row_block_pos = row.block.position;
const int row_block_size = row.block.size;
const Cell& cell = row.cells[0];
MatrixRef m(block_diagonal->mutable_values() + cell.position,
row_block_size,
row_block_size);
if (D != nullptr) {
ConstVectorRef d(D + row_block_pos, row_block_size);
m += d.array().square().matrix().asDiagonal();
}
m = m.selfadjointView<Eigen::Upper>().llt().solve(
Matrix::Identity(row_block_size, row_block_size));
});
}
// Similar to RightMultiplyAndAccumulate, use the block structure of the matrix
// A to compute y = (E'E)^-1 (E'b - E'F x).
void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
const int num_cols_e = A_->num_cols_e();
const int num_cols_f = A_->num_cols_f();
const int num_cols = A_->num_cols();
const int num_rows = A_->num_rows();
// y1 = F x
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = b - y1
ParallelAssign(options_.context,
options_.num_threads,
tmp_rows_,
ConstVectorRef(b_, num_rows) - tmp_rows_);
// y3 = E' y2
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y = (E'E)^-1 y3
ParallelSetZero(options_.context, options_.num_threads, y, num_cols);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(
tmp_e_cols_.data(), y, options_.context, options_.num_threads);
// The full solution vector y has two blocks. The first block of
// variables corresponds to the eliminated variables, which we just
// computed via back substitution. The second block of variables
// corresponds to the Schur complement system, so we just copy those
// values from the solution to the Schur complement.
VectorRef y_cols_f(y + num_cols_e, num_cols_f);
ParallelAssign(options_.context,
options_.num_threads,
y_cols_f,
ConstVectorRef(x, num_cols_f));
}
// Compute the RHS of the Schur complement system.
//
// rhs = F'b - F'E (E'E)^-1 E'b
//
// Like BackSubstitute, we use the block structure of A to implement
// this using a series of matrix vector products.
void ImplicitSchurComplement::UpdateRhs() {
// y1 = E'b
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data());
// y2 = (E'E)^-1 y1
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
// y3 = E y2
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y3 = b - y3
ParallelAssign(options_.context,
options_.num_threads,
tmp_rows_,
ConstVectorRef(b_, A_->num_rows()) - tmp_rows_);
// rhs = F' y3
ParallelSetZero(options_.context, options_.num_threads, rhs_);
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data());
}
} // namespace ceres::internal