| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2023 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: keir@google.com (Keir Mierle) |
| |
| #include "ceres/cgnr_solver.h" |
| |
| #include <memory> |
| #include <utility> |
| |
| #include "ceres/block_jacobi_preconditioner.h" |
| #include "ceres/conjugate_gradients_solver.h" |
| #include "ceres/cuda_sparse_matrix.h" |
| #include "ceres/cuda_vector.h" |
| #include "ceres/internal/eigen.h" |
| #include "ceres/linear_solver.h" |
| #include "ceres/subset_preconditioner.h" |
| #include "ceres/wall_time.h" |
| #include "glog/logging.h" |
| |
| namespace ceres::internal { |
| |
| // A linear operator which takes a matrix A and a diagonal vector D and |
| // performs products of the form |
| // |
| // (A^T A + D^T D)x |
| // |
| // This is used to implement iterative general sparse linear solving with |
| // conjugate gradients, where A is the Jacobian and D is a regularizing |
| // parameter. A brief proof that D^T D is the correct regularizer: |
| // |
| // Given a regularized least squares problem: |
| // |
| // min ||Ax - b||^2 + ||Dx||^2 |
| // x |
| // |
| // First expand into matrix notation: |
| // |
| // (Ax - b)^T (Ax - b) + xD^TDx |
| // |
| // Then multiply out to get: |
| // |
| // = xA^TAx - 2b^T Ax + b^Tb + xD^TDx |
| // |
| // Take the derivative: |
| // |
| // 0 = 2A^TAx - 2A^T b + 2 D^TDx |
| // 0 = A^TAx - A^T b + D^TDx |
| // 0 = (A^TA + D^TD)x - A^T b |
| // |
| // Thus, the symmetric system we need to solve for CGNR is |
| // |
| // Sx = z |
| // |
| // with S = A^TA + D^TD |
| // and z = A^T b |
| // |
| // Note: This class is not thread safe, since it uses some temporary storage. |
| class CERES_NO_EXPORT CgnrLinearOperator final |
| : public ConjugateGradientsLinearOperator<Vector> { |
| public: |
| CgnrLinearOperator(const LinearOperator& A, |
| const double* D, |
| ContextImpl* context, |
| int num_threads) |
| : A_(A), |
| D_(D), |
| z_(Vector::Zero(A.num_rows())), |
| context_(context), |
| num_threads_(num_threads) {} |
| |
| void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { |
| // z = Ax |
| // y = y + Atz |
| z_.setZero(); |
| A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_); |
| A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_); |
| |
| // y = y + DtDx |
| if (D_ != nullptr) { |
| int n = A_.num_cols(); |
| ParallelAssign( |
| context_, |
| num_threads_, |
| y, |
| y.array() + ConstVectorRef(D_, n).array().square() * x.array()); |
| } |
| } |
| |
| private: |
| const LinearOperator& A_; |
| const double* D_; |
| Vector z_; |
| |
| ContextImpl* context_; |
| int num_threads_; |
| }; |
| |
| CgnrSolver::CgnrSolver(LinearSolver::Options options) |
| : options_(std::move(options)) { |
| if (options_.preconditioner_type != JACOBI && |
| options_.preconditioner_type != IDENTITY && |
| options_.preconditioner_type != SUBSET) { |
| LOG(FATAL) |
| << "Preconditioner = " |
| << PreconditionerTypeToString(options_.preconditioner_type) << ". " |
| << "Congratulations, you found a bug in Ceres. Please report it."; |
| } |
| } |
| |
| CgnrSolver::~CgnrSolver() { |
| for (int i = 0; i < 4; ++i) { |
| if (scratch_[i]) { |
| delete scratch_[i]; |
| scratch_[i] = nullptr; |
| } |
| } |
| } |
| |
| LinearSolver::Summary CgnrSolver::SolveImpl( |
| BlockSparseMatrix* A, |
| const double* b, |
| const LinearSolver::PerSolveOptions& per_solve_options, |
| double* x) { |
| EventLogger event_logger("CgnrSolver::Solve"); |
| if (!preconditioner_) { |
| Preconditioner::Options preconditioner_options; |
| preconditioner_options.type = options_.preconditioner_type; |
| preconditioner_options.subset_preconditioner_start_row_block = |
| options_.subset_preconditioner_start_row_block; |
| preconditioner_options.sparse_linear_algebra_library_type = |
| options_.sparse_linear_algebra_library_type; |
| preconditioner_options.ordering_type = options_.ordering_type; |
| preconditioner_options.num_threads = options_.num_threads; |
| preconditioner_options.context = options_.context; |
| |
| if (options_.preconditioner_type == JACOBI) { |
| preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>( |
| preconditioner_options, *A); |
| } else if (options_.preconditioner_type == SUBSET) { |
| preconditioner_ = |
| std::make_unique<SubsetPreconditioner>(preconditioner_options, *A); |
| } else { |
| preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols()); |
| } |
| } |
| preconditioner_->Update(*A, per_solve_options.D); |
| |
| ConjugateGradientsSolverOptions cg_options; |
| cg_options.min_num_iterations = options_.min_num_iterations; |
| cg_options.max_num_iterations = options_.max_num_iterations; |
| cg_options.residual_reset_period = options_.residual_reset_period; |
| cg_options.q_tolerance = per_solve_options.q_tolerance; |
| cg_options.r_tolerance = per_solve_options.r_tolerance; |
| cg_options.context = options_.context; |
| cg_options.num_threads = options_.num_threads; |
| |
| // lhs = AtA + DtD |
| CgnrLinearOperator lhs( |
| *A, per_solve_options.D, options_.context, options_.num_threads); |
| // rhs = Atb. |
| Vector rhs(A->num_cols()); |
| rhs.setZero(); |
| A->LeftMultiplyAndAccumulate( |
| b, rhs.data(), options_.context, options_.num_threads); |
| |
| cg_solution_ = Vector::Zero(A->num_cols()); |
| for (int i = 0; i < 4; ++i) { |
| if (scratch_[i] == nullptr) { |
| scratch_[i] = new Vector(A->num_cols()); |
| } |
| } |
| event_logger.AddEvent("Setup"); |
| |
| LinearOperatorAdapter preconditioner(*preconditioner_); |
| auto summary = ConjugateGradientsSolver( |
| cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_); |
| VectorRef(x, A->num_cols()) = cg_solution_; |
| event_logger.AddEvent("Solve"); |
| return summary; |
| } |
| |
| #ifndef CERES_NO_CUDA |
| |
| // A linear operator which takes a matrix A and a diagonal vector D and |
| // performs products of the form |
| // |
| // (A^T A + D^T D)x |
| // |
| // This is used to implement iterative general sparse linear solving with |
| // conjugate gradients, where A is the Jacobian and D is a regularizing |
| // parameter. A brief proof is included in cgnr_linear_operator.h. |
| class CERES_NO_EXPORT CudaCgnrLinearOperator final |
| : public ConjugateGradientsLinearOperator<CudaVector> { |
| public: |
| CudaCgnrLinearOperator(CudaSparseMatrix& A, |
| const CudaVector& D, |
| CudaVector* z) |
| : A_(A), D_(D), z_(z) {} |
| |
| void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { |
| // z = Ax |
| z_->SetZero(); |
| A_.RightMultiplyAndAccumulate(x, z_); |
| |
| // y = y + Atz |
| // = y + AtAx |
| A_.LeftMultiplyAndAccumulate(*z_, &y); |
| |
| // y = y + DtDx |
| y.DtDxpy(D_, x); |
| } |
| |
| private: |
| CudaSparseMatrix& A_; |
| const CudaVector& D_; |
| CudaVector* z_ = nullptr; |
| }; |
| |
| class CERES_NO_EXPORT CudaIdentityPreconditioner final |
| : public CudaPreconditioner { |
| public: |
| void Update(const CompressedRowSparseMatrix& A, const double* D) final {} |
| void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { |
| y.Axpby(1.0, x, 1.0); |
| } |
| }; |
| |
| // This class wraps the existing CPU Jacobi preconditioner, caches the structure |
| // of the block diagonal, and for each CGNR solve updates the values on the CPU |
| // and then copies them over to the GPU. |
| class CERES_NO_EXPORT CudaJacobiPreconditioner final |
| : public CudaPreconditioner { |
| public: |
| explicit CudaJacobiPreconditioner(Preconditioner::Options options, |
| const CompressedRowSparseMatrix& A) |
| : options_(std::move(options)), |
| cpu_preconditioner_(options_, A), |
| m_(options_.context, cpu_preconditioner_.matrix()) {} |
| ~CudaJacobiPreconditioner() = default; |
| |
| void Update(const CompressedRowSparseMatrix& A, const double* D) final { |
| cpu_preconditioner_.Update(A, D); |
| m_.CopyValuesFromCpu(cpu_preconditioner_.matrix()); |
| } |
| |
| void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { |
| m_.RightMultiplyAndAccumulate(x, &y); |
| } |
| |
| private: |
| Preconditioner::Options options_; |
| BlockCRSJacobiPreconditioner cpu_preconditioner_; |
| CudaSparseMatrix m_; |
| }; |
| |
| CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options) |
| : options_(std::move(options)) {} |
| |
| CudaCgnrSolver::~CudaCgnrSolver() { |
| for (int i = 0; i < 4; ++i) { |
| if (scratch_[i]) { |
| delete scratch_[i]; |
| scratch_[i] = nullptr; |
| } |
| } |
| } |
| |
| std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create( |
| LinearSolver::Options options, std::string* error) { |
| CHECK(error != nullptr); |
| if (options.preconditioner_type != IDENTITY && |
| options.preconditioner_type != JACOBI) { |
| *error = |
| "CudaCgnrSolver does not support preconditioner type " + |
| std::string(PreconditionerTypeToString(options.preconditioner_type)) + |
| ". "; |
| return nullptr; |
| } |
| CHECK(options.context->IsCudaInitialized()) |
| << "CudaCgnrSolver requires CUDA initialization."; |
| auto solver = std::make_unique<CudaCgnrSolver>(options); |
| return solver; |
| } |
| |
| void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A, |
| const double* b, |
| const double* D) { |
| if (A_ == nullptr) { |
| // Assume structure is not cached, do an initialization and structural copy. |
| A_ = std::make_unique<CudaSparseMatrix>(options_.context, A); |
| b_ = std::make_unique<CudaVector>(options_.context, A.num_rows()); |
| x_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); |
| Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); |
| Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows()); |
| D_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); |
| |
| Preconditioner::Options preconditioner_options; |
| preconditioner_options.type = options_.preconditioner_type; |
| preconditioner_options.subset_preconditioner_start_row_block = |
| options_.subset_preconditioner_start_row_block; |
| preconditioner_options.sparse_linear_algebra_library_type = |
| options_.sparse_linear_algebra_library_type; |
| preconditioner_options.ordering_type = options_.ordering_type; |
| preconditioner_options.num_threads = options_.num_threads; |
| preconditioner_options.context = options_.context; |
| |
| if (options_.preconditioner_type == JACOBI) { |
| preconditioner_ = |
| std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A); |
| } else { |
| preconditioner_ = std::make_unique<CudaIdentityPreconditioner>(); |
| } |
| for (int i = 0; i < 4; ++i) { |
| scratch_[i] = new CudaVector(options_.context, A.num_cols()); |
| } |
| } else { |
| // Assume structure is cached, do a value copy. |
| A_->CopyValuesFromCpu(A); |
| } |
| b_->CopyFromCpu(ConstVectorRef(b, A.num_rows())); |
| D_->CopyFromCpu(ConstVectorRef(D, A.num_cols())); |
| } |
| |
| LinearSolver::Summary CudaCgnrSolver::SolveImpl( |
| CompressedRowSparseMatrix* A, |
| const double* b, |
| const LinearSolver::PerSolveOptions& per_solve_options, |
| double* x) { |
| EventLogger event_logger("CudaCgnrSolver::Solve"); |
| LinearSolver::Summary summary; |
| summary.num_iterations = 0; |
| summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; |
| |
| CpuToGpuTransfer(*A, b, per_solve_options.D); |
| event_logger.AddEvent("CPU to GPU Transfer"); |
| preconditioner_->Update(*A, per_solve_options.D); |
| event_logger.AddEvent("Preconditioner Update"); |
| |
| // Form z = Atb. |
| Atb_->SetZero(); |
| A_->LeftMultiplyAndAccumulate(*b_, Atb_.get()); |
| |
| // Solve (AtA + DtD)x = z (= Atb). |
| x_->SetZero(); |
| CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get()); |
| |
| event_logger.AddEvent("Setup"); |
| |
| ConjugateGradientsSolverOptions cg_options; |
| cg_options.min_num_iterations = options_.min_num_iterations; |
| cg_options.max_num_iterations = options_.max_num_iterations; |
| cg_options.residual_reset_period = options_.residual_reset_period; |
| cg_options.q_tolerance = per_solve_options.q_tolerance; |
| cg_options.r_tolerance = per_solve_options.r_tolerance; |
| |
| summary = ConjugateGradientsSolver( |
| cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_); |
| x_->CopyTo(x); |
| event_logger.AddEvent("Solve"); |
| return summary; |
| } |
| |
| #endif // CERES_NO_CUDA |
| |
| } // namespace ceres::internal |