internal/ceres/implicit_schur_complement.cc - ceres-solver - Git at Google

 // Ceres Solver - A fast non-linear least squares minimizer
 // Copyright 2023 Google Inc. All rights reserved.
 // http://ceres-solver.org/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 //   this list of conditions and the following disclaimer.
 // * Redistributions in binary form must reproduce the above copyright notice,
 //   this list of conditions and the following disclaimer in the documentation
 //   and/or other materials provided with the distribution.
 // * Neither the name of Google Inc. nor the names of its contributors may be
 //   used to endorse or promote products derived from this software without
 //   specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)

 #include "ceres/implicit_schur_complement.h"

 #include "Eigen/Dense"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/linear_solver.h"
 #include "ceres/parallel_for.h"
 #include "ceres/parallel_vector_ops.h"
 #include "ceres/types.h"
 #include "glog/logging.h"

 namespace ceres::internal {

 ImplicitSchurComplement::ImplicitSchurComplement(
     const LinearSolver::Options& options)
     : options_(options) {}

 void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
                                    const double* D,
                                    const double* b) {
   // Since initialization is reasonably heavy, perhaps we can save on
   // constructing a new object everytime.
   if (A_ == nullptr) {
     A_ = PartitionedMatrixViewBase::Create(options_, A);
   }

   D_ = D;
   b_ = b;

   compute_ftf_inverse_ =
       options_.use_spse_initialization ||
       options_.preconditioner_type == JACOBI ||
       options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION;

   // Initialize temporary storage and compute the block diagonals of
   // E'E and F'E.
   if (block_diagonal_EtE_inverse_ == nullptr) {
     block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE();
     if (compute_ftf_inverse_) {
       block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF();
     }
     rhs_.resize(A_->num_cols_f());
     rhs_.setZero();
     tmp_rows_.resize(A_->num_rows());
     tmp_e_cols_.resize(A_->num_cols_e());
     tmp_e_cols_2_.resize(A_->num_cols_e());
     tmp_f_cols_.resize(A_->num_cols_f());
   } else {
     A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get());
     if (compute_ftf_inverse_) {
       A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get());
     }
   }

   // The block diagonals of the augmented linear system contain
   // contributions from the diagonal D if it is non-null. Add that to
   // the block diagonals and invert them.
   AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get());
   if (compute_ftf_inverse_) {
     AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(),
                          block_diagonal_FtF_inverse_.get());
   }

   // Compute the RHS of the Schur complement system.
   UpdateRhs();
 }

 // Evaluate the product
 //
 //   Sx = [F'F - F'E (E'E)^-1 E'F]x
 //
 // By breaking it down into individual matrix vector products
 // involving the matrices E and F. This is implemented using a
 // PartitionedMatrixView of the input matrix A.
 void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x,
                                                          double* y) const {
   // y1 = F x
   ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
   A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

   // y2 = E' y1
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
   A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

   // y3 = -(E'E)^-1 y2
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
   block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
                                                           tmp_e_cols_2_.data(),
                                                           options_.context,
                                                           options_.num_threads);

   ParallelAssign(
       options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_);

   // y1 = y1 + E y3
   A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

   // y5 = D * x
   if (D_ != nullptr) {
     ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols());
     VectorRef y_cols(y, num_cols());
     ParallelAssign(
         options_.context,
         options_.num_threads,
         y_cols,
         (Dref.array().square() * ConstVectorRef(x, num_cols()).array()));
   } else {
     ParallelSetZero(options_.context, options_.num_threads, y, num_cols());
   }

   // y = y5 + F' y1
   A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y);
 }

 void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate(
     const double* x, double* y) const {
   CHECK(compute_ftf_inverse_);
   // y1 = F x
   ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
   A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

   // y2 = E' y1
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
   A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

   // y3 = (E'E)^-1 y2
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
   block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
                                                           tmp_e_cols_2_.data(),
                                                           options_.context,
                                                           options_.num_threads);
   // y1 = E y3
   ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
   A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

   // y4 = F' y1
   ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_);
   A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data());

   // y += (F'F)^-1 y4
   block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate(
       tmp_f_cols_.data(), y, options_.context, options_.num_threads);
 }

 // Given a block diagonal matrix and an optional array of diagonal
 // entries D, add them to the diagonal of the matrix and compute the
 // inverse of each diagonal block.
 void ImplicitSchurComplement::AddDiagonalAndInvert(
     const double* D, BlockSparseMatrix* block_diagonal) {
   const CompressedRowBlockStructure* block_diagonal_structure =
       block_diagonal->block_structure();
   ParallelFor(options_.context,
               0,
               block_diagonal_structure->rows.size(),
               options_.num_threads,
               [block_diagonal_structure, D, block_diagonal](int row_block_id) {
                 auto& row = block_diagonal_structure->rows[row_block_id];
                 const int row_block_pos = row.block.position;
                 const int row_block_size = row.block.size;
                 const Cell& cell = row.cells[0];
                 MatrixRef m(block_diagonal->mutable_values() + cell.position,
                             row_block_size,
                             row_block_size);

                 if (D != nullptr) {
                   ConstVectorRef d(D + row_block_pos, row_block_size);
                   m += d.array().square().matrix().asDiagonal();
                 }

                 m = m.selfadjointView<Eigen::Upper>().llt().solve(
                     Matrix::Identity(row_block_size, row_block_size));
               });
 }

 // Similar to RightMultiplyAndAccumulate, use the block structure of the matrix
 // A to compute y = (E'E)^-1 (E'b - E'F x).
 void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
   const int num_cols_e = A_->num_cols_e();
   const int num_cols_f = A_->num_cols_f();
   const int num_cols = A_->num_cols();
   const int num_rows = A_->num_rows();

   // y1 = F x
   ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
   A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

   // y2 = b - y1
   ParallelAssign(options_.context,
                  options_.num_threads,
                  tmp_rows_,
                  ConstVectorRef(b_, num_rows) - tmp_rows_);

   // y3 = E' y2
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
   A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

   // y = (E'E)^-1 y3
   ParallelSetZero(options_.context, options_.num_threads, y, num_cols);
   block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(
       tmp_e_cols_.data(), y, options_.context, options_.num_threads);

   // The full solution vector y has two blocks. The first block of
   // variables corresponds to the eliminated variables, which we just
   // computed via back substitution. The second block of variables
   // corresponds to the Schur complement system, so we just copy those
   // values from the solution to the Schur complement.
   VectorRef y_cols_f(y + num_cols_e, num_cols_f);
   ParallelAssign(options_.context,
                  options_.num_threads,
                  y_cols_f,
                  ConstVectorRef(x, num_cols_f));
 }

 // Compute the RHS of the Schur complement system.
 //
 // rhs = F'b - F'E (E'E)^-1 E'b
 //
 // Like BackSubstitute, we use the block structure of A to implement
 // this using a series of matrix vector products.
 void ImplicitSchurComplement::UpdateRhs() {
   // y1 = E'b
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
   A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data());

   // y2 = (E'E)^-1 y1
   ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
   block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
                                                           tmp_e_cols_2_.data(),
                                                           options_.context,
                                                           options_.num_threads);

   // y3 = E y2
   ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
   A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

   // y3 = b - y3
   ParallelAssign(options_.context,
                  options_.num_threads,
                  tmp_rows_,
                  ConstVectorRef(b_, A_->num_rows()) - tmp_rows_);

   // rhs = F' y3
   ParallelSetZero(options_.context, options_.num_threads, rhs_);
   A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data());
 }

 }  // namespace ceres::internal
	// Ceres Solver - A fast non-linear least squares minimizer
	// Copyright 2023 Google Inc. All rights reserved.
	// http://ceres-solver.org/
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	// * Neither the name of Google Inc. nor the names of its contributors may be
	// used to endorse or promote products derived from this software without
	// specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.
	//
	// Author: sameeragarwal@google.com (Sameer Agarwal)

	#include "ceres/implicit_schur_complement.h"

	#include "Eigen/Dense"
	#include "ceres/block_sparse_matrix.h"
	#include "ceres/block_structure.h"
	#include "ceres/internal/eigen.h"
	#include "ceres/linear_solver.h"
	#include "ceres/parallel_for.h"
	#include "ceres/parallel_vector_ops.h"
	#include "ceres/types.h"
	#include "glog/logging.h"

	namespace ceres::internal {

	ImplicitSchurComplement::ImplicitSchurComplement(
	const LinearSolver::Options& options)
	: options_(options) {}

	void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
	const double* D,
	const double* b) {
	// Since initialization is reasonably heavy, perhaps we can save on
	// constructing a new object everytime.
	if (A_ == nullptr) {
	A_ = PartitionedMatrixViewBase::Create(options_, A);
	}

	D_ = D;
	b_ = b;

	compute_ftf_inverse_ =
	options_.use_spse_initialization \|\|
	options_.preconditioner_type == JACOBI \|\|
	options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION;

	// Initialize temporary storage and compute the block diagonals of
	// E'E and F'E.
	if (block_diagonal_EtE_inverse_ == nullptr) {
	block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE();
	if (compute_ftf_inverse_) {
	block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF();
	}
	rhs_.resize(A_->num_cols_f());
	rhs_.setZero();
	tmp_rows_.resize(A_->num_rows());
	tmp_e_cols_.resize(A_->num_cols_e());
	tmp_e_cols_2_.resize(A_->num_cols_e());
	tmp_f_cols_.resize(A_->num_cols_f());
	} else {
	A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get());
	if (compute_ftf_inverse_) {
	A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get());
	}
	}

	// The block diagonals of the augmented linear system contain
	// contributions from the diagonal D if it is non-null. Add that to
	// the block diagonals and invert them.
	AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get());
	if (compute_ftf_inverse_) {
	AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(),
	block_diagonal_FtF_inverse_.get());
	}

	// Compute the RHS of the Schur complement system.
	UpdateRhs();
	}

	// Evaluate the product
	//
	// Sx = [F'F - F'E (E'E)^-1 E'F]x
	//
	// By breaking it down into individual matrix vector products
	// involving the matrices E and F. This is implemented using a
	// PartitionedMatrixView of the input matrix A.
	void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x,
	double* y) const {
	// y1 = F x
	ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
	A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

	// y2 = E' y1
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
	A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

	// y3 = -(E'E)^-1 y2
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
	block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
	tmp_e_cols_2_.data(),
	options_.context,
	options_.num_threads);

	ParallelAssign(
	options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_);

	// y1 = y1 + E y3
	A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

	// y5 = D * x
	if (D_ != nullptr) {
	ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols());
	VectorRef y_cols(y, num_cols());
	ParallelAssign(
	options_.context,
	options_.num_threads,
	y_cols,
	(Dref.array().square() * ConstVectorRef(x, num_cols()).array()));
	} else {
	ParallelSetZero(options_.context, options_.num_threads, y, num_cols());
	}

	// y = y5 + F' y1
	A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y);
	}

	void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate(
	const double* x, double* y) const {
	CHECK(compute_ftf_inverse_);
	// y1 = F x
	ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
	A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

	// y2 = E' y1
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
	A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

	// y3 = (E'E)^-1 y2
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
	block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
	tmp_e_cols_2_.data(),
	options_.context,
	options_.num_threads);
	// y1 = E y3
	ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
	A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

	// y4 = F' y1
	ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_);
	A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data());

	// y += (F'F)^-1 y4
	block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate(
	tmp_f_cols_.data(), y, options_.context, options_.num_threads);
	}

	// Given a block diagonal matrix and an optional array of diagonal
	// entries D, add them to the diagonal of the matrix and compute the
	// inverse of each diagonal block.
	void ImplicitSchurComplement::AddDiagonalAndInvert(
	const double* D, BlockSparseMatrix* block_diagonal) {
	const CompressedRowBlockStructure* block_diagonal_structure =
	block_diagonal->block_structure();
	ParallelFor(options_.context,
	0,
	block_diagonal_structure->rows.size(),
	options_.num_threads,
	[block_diagonal_structure, D, block_diagonal](int row_block_id) {
	auto& row = block_diagonal_structure->rows[row_block_id];
	const int row_block_pos = row.block.position;
	const int row_block_size = row.block.size;
	const Cell& cell = row.cells[0];
	MatrixRef m(block_diagonal->mutable_values() + cell.position,
	row_block_size,
	row_block_size);

	if (D != nullptr) {
	ConstVectorRef d(D + row_block_pos, row_block_size);
	m += d.array().square().matrix().asDiagonal();
	}

	m = m.selfadjointView<Eigen::Upper>().llt().solve(
	Matrix::Identity(row_block_size, row_block_size));
	});
	}

	// Similar to RightMultiplyAndAccumulate, use the block structure of the matrix
	// A to compute y = (E'E)^-1 (E'b - E'F x).
	void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
	const int num_cols_e = A_->num_cols_e();
	const int num_cols_f = A_->num_cols_f();
	const int num_cols = A_->num_cols();
	const int num_rows = A_->num_rows();

	// y1 = F x
	ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
	A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());

	// y2 = b - y1
	ParallelAssign(options_.context,
	options_.num_threads,
	tmp_rows_,
	ConstVectorRef(b_, num_rows) - tmp_rows_);

	// y3 = E' y2
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
	A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());

	// y = (E'E)^-1 y3
	ParallelSetZero(options_.context, options_.num_threads, y, num_cols);
	block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(
	tmp_e_cols_.data(), y, options_.context, options_.num_threads);

	// The full solution vector y has two blocks. The first block of
	// variables corresponds to the eliminated variables, which we just
	// computed via back substitution. The second block of variables
	// corresponds to the Schur complement system, so we just copy those
	// values from the solution to the Schur complement.
	VectorRef y_cols_f(y + num_cols_e, num_cols_f);
	ParallelAssign(options_.context,
	options_.num_threads,
	y_cols_f,
	ConstVectorRef(x, num_cols_f));
	}

	// Compute the RHS of the Schur complement system.
	//
	// rhs = F'b - F'E (E'E)^-1 E'b
	//
	// Like BackSubstitute, we use the block structure of A to implement
	// this using a series of matrix vector products.
	void ImplicitSchurComplement::UpdateRhs() {
	// y1 = E'b
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
	A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data());

	// y2 = (E'E)^-1 y1
	ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
	block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
	tmp_e_cols_2_.data(),
	options_.context,
	options_.num_threads);

	// y3 = E y2
	ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
	A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());

	// y3 = b - y3
	ParallelAssign(options_.context,
	options_.num_threads,
	tmp_rows_,
	ConstVectorRef(b_, A_->num_rows()) - tmp_rows_);

	// rhs = F' y3
	ParallelSetZero(options_.context, options_.num_threads, rhs_);
	A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data());
	}

	} // namespace ceres::internal