blob: da63484a734e12eb814e7170913dc9b5cfd80e7e [file] [log] [blame]
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: keir@google.com (Keir Mierle)
#include "ceres/cgnr_solver.h"
#include <memory>
#include <utility>
#include "ceres/block_jacobi_preconditioner.h"
#include "ceres/conjugate_gradients_solver.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_vector.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#include "ceres/subset_preconditioner.h"
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres::internal {
// A linear operator which takes a matrix A and a diagonal vector D and
// performs products of the form
//
// (A^T A + D^T D)x
//
// This is used to implement iterative general sparse linear solving with
// conjugate gradients, where A is the Jacobian and D is a regularizing
// parameter. A brief proof that D^T D is the correct regularizer:
//
// Given a regularized least squares problem:
//
// min ||Ax - b||^2 + ||Dx||^2
// x
//
// First expand into matrix notation:
//
// (Ax - b)^T (Ax - b) + xD^TDx
//
// Then multiply out to get:
//
// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
//
// Take the derivative:
//
// 0 = 2A^TAx - 2A^T b + 2 D^TDx
// 0 = A^TAx - A^T b + D^TDx
// 0 = (A^TA + D^TD)x - A^T b
//
// Thus, the symmetric system we need to solve for CGNR is
//
// Sx = z
//
// with S = A^TA + D^TD
// and z = A^T b
//
// Note: This class is not thread safe, since it uses some temporary storage.
class CERES_NO_EXPORT CgnrLinearOperator final
: public ConjugateGradientsLinearOperator<Vector> {
public:
CgnrLinearOperator(const LinearOperator& A,
const double* D,
ContextImpl* context,
int num_threads)
: A_(A),
D_(D),
z_(Vector::Zero(A.num_rows())),
context_(context),
num_threads_(num_threads) {}
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
// z = Ax
// y = y + Atz
z_.setZero();
A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_);
A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_);
// y = y + DtDx
if (D_ != nullptr) {
int n = A_.num_cols();
ParallelAssign(
context_,
num_threads_,
y,
y.array() + ConstVectorRef(D_, n).array().square() * x.array());
}
}
private:
const LinearOperator& A_;
const double* D_;
Vector z_;
ContextImpl* context_;
int num_threads_;
};
CgnrSolver::CgnrSolver(LinearSolver::Options options)
: options_(std::move(options)) {
if (options_.preconditioner_type != JACOBI &&
options_.preconditioner_type != IDENTITY &&
options_.preconditioner_type != SUBSET) {
LOG(FATAL)
<< "Preconditioner = "
<< PreconditionerTypeToString(options_.preconditioner_type) << ". "
<< "Congratulations, you found a bug in Ceres. Please report it.";
}
}
CgnrSolver::~CgnrSolver() {
for (int i = 0; i < 4; ++i) {
if (scratch_[i]) {
delete scratch_[i];
scratch_[i] = nullptr;
}
}
}
LinearSolver::Summary CgnrSolver::SolveImpl(
BlockSparseMatrix* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
EventLogger event_logger("CgnrSolver::Solve");
if (!preconditioner_) {
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.subset_preconditioner_start_row_block =
options_.subset_preconditioner_start_row_block;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.ordering_type = options_.ordering_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.context = options_.context;
if (options_.preconditioner_type == JACOBI) {
preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>(
preconditioner_options, *A);
} else if (options_.preconditioner_type == SUBSET) {
preconditioner_ =
std::make_unique<SubsetPreconditioner>(preconditioner_options, *A);
} else {
preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols());
}
}
preconditioner_->Update(*A, per_solve_options.D);
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
cg_options.residual_reset_period = options_.residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
cg_options.context = options_.context;
cg_options.num_threads = options_.num_threads;
// lhs = AtA + DtD
CgnrLinearOperator lhs(
*A, per_solve_options.D, options_.context, options_.num_threads);
// rhs = Atb.
Vector rhs(A->num_cols());
rhs.setZero();
A->LeftMultiplyAndAccumulate(
b, rhs.data(), options_.context, options_.num_threads);
cg_solution_ = Vector::Zero(A->num_cols());
for (int i = 0; i < 4; ++i) {
if (scratch_[i] == nullptr) {
scratch_[i] = new Vector(A->num_cols());
}
}
event_logger.AddEvent("Setup");
LinearOperatorAdapter preconditioner(*preconditioner_);
auto summary = ConjugateGradientsSolver(
cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_);
VectorRef(x, A->num_cols()) = cg_solution_;
event_logger.AddEvent("Solve");
return summary;
}
#ifndef CERES_NO_CUDA
// A linear operator which takes a matrix A and a diagonal vector D and
// performs products of the form
//
// (A^T A + D^T D)x
//
// This is used to implement iterative general sparse linear solving with
// conjugate gradients, where A is the Jacobian and D is a regularizing
// parameter. A brief proof is included in cgnr_linear_operator.h.
class CERES_NO_EXPORT CudaCgnrLinearOperator final
: public ConjugateGradientsLinearOperator<CudaVector> {
public:
CudaCgnrLinearOperator(CudaSparseMatrix& A,
const CudaVector& D,
CudaVector* z)
: A_(A), D_(D), z_(z) {}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
// z = Ax
z_->SetZero();
A_.RightMultiplyAndAccumulate(x, z_);
// y = y + Atz
// = y + AtAx
A_.LeftMultiplyAndAccumulate(*z_, &y);
// y = y + DtDx
y.DtDxpy(D_, x);
}
private:
CudaSparseMatrix& A_;
const CudaVector& D_;
CudaVector* z_ = nullptr;
};
class CERES_NO_EXPORT CudaIdentityPreconditioner final
: public CudaPreconditioner {
public:
void Update(const CompressedRowSparseMatrix& A, const double* D) final {}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
y.Axpby(1.0, x, 1.0);
}
};
// This class wraps the existing CPU Jacobi preconditioner, caches the structure
// of the block diagonal, and for each CGNR solve updates the values on the CPU
// and then copies them over to the GPU.
class CERES_NO_EXPORT CudaJacobiPreconditioner final
: public CudaPreconditioner {
public:
explicit CudaJacobiPreconditioner(Preconditioner::Options options,
const CompressedRowSparseMatrix& A)
: options_(std::move(options)),
cpu_preconditioner_(options_, A),
m_(options_.context, cpu_preconditioner_.matrix()) {}
~CudaJacobiPreconditioner() = default;
void Update(const CompressedRowSparseMatrix& A, const double* D) final {
cpu_preconditioner_.Update(A, D);
m_.CopyValuesFromCpu(cpu_preconditioner_.matrix());
}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
m_.RightMultiplyAndAccumulate(x, &y);
}
private:
Preconditioner::Options options_;
BlockCRSJacobiPreconditioner cpu_preconditioner_;
CudaSparseMatrix m_;
};
CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options)
: options_(std::move(options)) {}
CudaCgnrSolver::~CudaCgnrSolver() {
for (int i = 0; i < 4; ++i) {
if (scratch_[i]) {
delete scratch_[i];
scratch_[i] = nullptr;
}
}
}
std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create(
LinearSolver::Options options, std::string* error) {
CHECK(error != nullptr);
if (options.preconditioner_type != IDENTITY &&
options.preconditioner_type != JACOBI) {
*error =
"CudaCgnrSolver does not support preconditioner type " +
std::string(PreconditionerTypeToString(options.preconditioner_type)) +
". ";
return nullptr;
}
CHECK(options.context->IsCudaInitialized())
<< "CudaCgnrSolver requires CUDA initialization.";
auto solver = std::make_unique<CudaCgnrSolver>(options);
return solver;
}
void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
const double* b,
const double* D) {
if (A_ == nullptr) {
// Assume structure is not cached, do an initialization and structural copy.
A_ = std::make_unique<CudaSparseMatrix>(options_.context, A);
b_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
x_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
D_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.subset_preconditioner_start_row_block =
options_.subset_preconditioner_start_row_block;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.ordering_type = options_.ordering_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.context = options_.context;
if (options_.preconditioner_type == JACOBI) {
preconditioner_ =
std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A);
} else {
preconditioner_ = std::make_unique<CudaIdentityPreconditioner>();
}
for (int i = 0; i < 4; ++i) {
scratch_[i] = new CudaVector(options_.context, A.num_cols());
}
} else {
// Assume structure is cached, do a value copy.
A_->CopyValuesFromCpu(A);
}
b_->CopyFromCpu(ConstVectorRef(b, A.num_rows()));
D_->CopyFromCpu(ConstVectorRef(D, A.num_cols()));
}
LinearSolver::Summary CudaCgnrSolver::SolveImpl(
CompressedRowSparseMatrix* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
EventLogger event_logger("CudaCgnrSolver::Solve");
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
CpuToGpuTransfer(*A, b, per_solve_options.D);
event_logger.AddEvent("CPU to GPU Transfer");
preconditioner_->Update(*A, per_solve_options.D);
event_logger.AddEvent("Preconditioner Update");
// Form z = Atb.
Atb_->SetZero();
A_->LeftMultiplyAndAccumulate(*b_, Atb_.get());
// Solve (AtA + DtD)x = z (= Atb).
x_->SetZero();
CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get());
event_logger.AddEvent("Setup");
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
cg_options.residual_reset_period = options_.residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
summary = ConjugateGradientsSolver(
cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_);
x_->CopyTo(x);
event_logger.AddEvent("Solve");
return summary;
}
#endif // CERES_NO_CUDA
} // namespace ceres::internal