| // Ceres Solver - A fast non-linear least squares minimizer | 
 | // Copyright 2015 Google Inc. All rights reserved. | 
 | // http://ceres-solver.org/ | 
 | // | 
 | // Redistribution and use in source and binary forms, with or without | 
 | // modification, are permitted provided that the following conditions are met: | 
 | // | 
 | // * Redistributions of source code must retain the above copyright notice, | 
 | //   this list of conditions and the following disclaimer. | 
 | // * Redistributions in binary form must reproduce the above copyright notice, | 
 | //   this list of conditions and the following disclaimer in the documentation | 
 | //   and/or other materials provided with the distribution. | 
 | // * Neither the name of Google Inc. nor the names of its contributors may be | 
 | //   used to endorse or promote products derived from this software without | 
 | //   specific prior written permission. | 
 | // | 
 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 | // POSSIBILITY OF SUCH DAMAGE. | 
 | // | 
 | // Author: keir@google.com (Keir Mierle) | 
 |  | 
 | #include "ceres/cgnr_solver.h" | 
 |  | 
 | #include <memory> | 
 | #include <utility> | 
 |  | 
 | #include "ceres/block_jacobi_preconditioner.h" | 
 | #include "ceres/conjugate_gradients_solver.h" | 
 | #include "ceres/cuda_sparse_matrix.h" | 
 | #include "ceres/cuda_vector.h" | 
 | #include "ceres/internal/eigen.h" | 
 | #include "ceres/linear_solver.h" | 
 | #include "ceres/subset_preconditioner.h" | 
 | #include "ceres/wall_time.h" | 
 | #include "glog/logging.h" | 
 |  | 
 | namespace ceres::internal { | 
 |  | 
 | // A linear operator which takes a matrix A and a diagonal vector D and | 
 | // performs products of the form | 
 | // | 
 | //   (A^T A + D^T D)x | 
 | // | 
 | // This is used to implement iterative general sparse linear solving with | 
 | // conjugate gradients, where A is the Jacobian and D is a regularizing | 
 | // parameter. A brief proof that D^T D is the correct regularizer: | 
 | // | 
 | // Given a regularized least squares problem: | 
 | // | 
 | //   min  ||Ax - b||^2 + ||Dx||^2 | 
 | //    x | 
 | // | 
 | // First expand into matrix notation: | 
 | // | 
 | //   (Ax - b)^T (Ax - b) + xD^TDx | 
 | // | 
 | // Then multiply out to get: | 
 | // | 
 | //   = xA^TAx - 2b^T Ax + b^Tb + xD^TDx | 
 | // | 
 | // Take the derivative: | 
 | // | 
 | //   0 = 2A^TAx - 2A^T b + 2 D^TDx | 
 | //   0 = A^TAx - A^T b + D^TDx | 
 | //   0 = (A^TA + D^TD)x - A^T b | 
 | // | 
 | // Thus, the symmetric system we need to solve for CGNR is | 
 | // | 
 | //   Sx = z | 
 | // | 
 | // with S = A^TA + D^TD | 
 | //  and z = A^T b | 
 | // | 
 | // Note: This class is not thread safe, since it uses some temporary storage. | 
 | class CERES_NO_EXPORT CgnrLinearOperator final | 
 |     : public ConjugateGradientsLinearOperator<Vector> { | 
 |  public: | 
 |   CgnrLinearOperator(const LinearOperator& A, const double* D) | 
 |       : A_(A), D_(D), z_(Vector::Zero(A.num_rows())) {} | 
 |  | 
 |   void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { | 
 |     // z = Ax | 
 |     // y = y + Atz | 
 |     z_.setZero(); | 
 |     A_.RightMultiplyAndAccumulate(x, z_); | 
 |     A_.LeftMultiplyAndAccumulate(z_, y); | 
 |  | 
 |     // y = y + DtDx | 
 |     if (D_ != nullptr) { | 
 |       int n = A_.num_cols(); | 
 |       y.array() += ConstVectorRef(D_, n).array().square() * x.array(); | 
 |     } | 
 |   } | 
 |  | 
 |  private: | 
 |   const LinearOperator& A_; | 
 |   const double* D_; | 
 |   Vector z_; | 
 | }; | 
 |  | 
 | CgnrSolver::CgnrSolver(LinearSolver::Options options) | 
 |     : options_(std::move(options)) { | 
 |   if (options_.preconditioner_type != JACOBI && | 
 |       options_.preconditioner_type != IDENTITY && | 
 |       options_.preconditioner_type != SUBSET) { | 
 |     LOG(FATAL) | 
 |         << "Preconditioner = " | 
 |         << PreconditionerTypeToString(options_.preconditioner_type) << ". " | 
 |         << "Congratulations, you found a bug in Ceres. Please report it."; | 
 |   } | 
 | } | 
 |  | 
 | CgnrSolver::~CgnrSolver() { | 
 |   for (int i = 0; i < 4; ++i) { | 
 |     if (scratch_[i]) { | 
 |       delete scratch_[i]; | 
 |       scratch_[i] = nullptr; | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | LinearSolver::Summary CgnrSolver::SolveImpl( | 
 |     BlockSparseMatrix* A, | 
 |     const double* b, | 
 |     const LinearSolver::PerSolveOptions& per_solve_options, | 
 |     double* x) { | 
 |   EventLogger event_logger("CgnrSolver::Solve"); | 
 |   if (!preconditioner_) { | 
 |     Preconditioner::Options preconditioner_options; | 
 |     preconditioner_options.type = options_.preconditioner_type; | 
 |     preconditioner_options.subset_preconditioner_start_row_block = | 
 |         options_.subset_preconditioner_start_row_block; | 
 |     preconditioner_options.sparse_linear_algebra_library_type = | 
 |         options_.sparse_linear_algebra_library_type; | 
 |     preconditioner_options.ordering_type = options_.ordering_type; | 
 |     preconditioner_options.num_threads = options_.num_threads; | 
 |     preconditioner_options.context = options_.context; | 
 |  | 
 |     if (options_.preconditioner_type == JACOBI) { | 
 |       preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>( | 
 |           preconditioner_options, *A); | 
 |     } else if (options_.preconditioner_type == SUBSET) { | 
 |       preconditioner_ = | 
 |           std::make_unique<SubsetPreconditioner>(preconditioner_options, *A); | 
 |     } else { | 
 |       preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols()); | 
 |     } | 
 |   } | 
 |  | 
 |   preconditioner_->Update(*A, per_solve_options.D); | 
 |  | 
 |   ConjugateGradientsSolverOptions cg_options; | 
 |   cg_options.min_num_iterations = options_.min_num_iterations; | 
 |   cg_options.max_num_iterations = options_.max_num_iterations; | 
 |   cg_options.residual_reset_period = options_.residual_reset_period; | 
 |   cg_options.q_tolerance = per_solve_options.q_tolerance; | 
 |   cg_options.r_tolerance = per_solve_options.r_tolerance; | 
 |  | 
 |   // lhs = AtA + DtD | 
 |   CgnrLinearOperator lhs(*A, per_solve_options.D); | 
 |   // rhs = Atb. | 
 |   Vector rhs(A->num_cols()); | 
 |   rhs.setZero(); | 
 |   A->LeftMultiplyAndAccumulate(b, rhs.data()); | 
 |  | 
 |   cg_solution_ = Vector::Zero(A->num_cols()); | 
 |   for (int i = 0; i < 4; ++i) { | 
 |     if (scratch_[i] == nullptr) { | 
 |       scratch_[i] = new Vector(A->num_cols()); | 
 |     } | 
 |   } | 
 |   event_logger.AddEvent("Setup"); | 
 |  | 
 |   LinearOperatorAdapter preconditioner(*preconditioner_); | 
 |   auto summary = ConjugateGradientsSolver( | 
 |       cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_); | 
 |   VectorRef(x, A->num_cols()) = cg_solution_; | 
 |   event_logger.AddEvent("Solve"); | 
 |   return summary; | 
 | } | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 |  | 
 | // A linear operator which takes a matrix A and a diagonal vector D and | 
 | // performs products of the form | 
 | // | 
 | //   (A^T A + D^T D)x | 
 | // | 
 | // This is used to implement iterative general sparse linear solving with | 
 | // conjugate gradients, where A is the Jacobian and D is a regularizing | 
 | // parameter. A brief proof is included in cgnr_linear_operator.h. | 
 | class CERES_NO_EXPORT CudaCgnrLinearOperator final | 
 |     : public ConjugateGradientsLinearOperator<CudaVector> { | 
 |  public: | 
 |   CudaCgnrLinearOperator(CudaSparseMatrix& A, | 
 |                          const CudaVector& D, | 
 |                          CudaVector* z) | 
 |       : A_(A), D_(D), z_(z) {} | 
 |  | 
 |   void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { | 
 |     // z = Ax | 
 |     z_->SetZero(); | 
 |     A_.RightMultiplyAndAccumulate(x, z_); | 
 |  | 
 |     // y = y + Atz | 
 |     //   = y + AtAx | 
 |     A_.LeftMultiplyAndAccumulate(*z_, &y); | 
 |  | 
 |     // y = y + DtDx | 
 |     y.DtDxpy(D_, x); | 
 |   } | 
 |  | 
 |  private: | 
 |   CudaSparseMatrix& A_; | 
 |   const CudaVector& D_; | 
 |   CudaVector* z_ = nullptr; | 
 | }; | 
 |  | 
 | class CERES_NO_EXPORT CudaIdentityPreconditioner final | 
 |     : public CudaPreconditioner { | 
 |  public: | 
 |   void Update(const CompressedRowSparseMatrix& A, const double* D) final {} | 
 |   void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { | 
 |     y.Axpby(1.0, x, 1.0); | 
 |   } | 
 | }; | 
 |  | 
 | // This class wraps the existing CPU Jacobi preconditioner, caches the structure | 
 | // of the block diagonal, and for each CGNR solve updates the values on the CPU | 
 | // and then copies them over to the GPU. | 
 | class CERES_NO_EXPORT CudaJacobiPreconditioner final | 
 |     : public CudaPreconditioner { | 
 |  public: | 
 |   explicit CudaJacobiPreconditioner(Preconditioner::Options options, | 
 |                                     const CompressedRowSparseMatrix& A) | 
 |       : options_(std::move(options)), | 
 |         cpu_preconditioner_(options_, A), | 
 |         m_(options_.context, cpu_preconditioner_.matrix()) {} | 
 |   ~CudaJacobiPreconditioner() = default; | 
 |  | 
 |   void Update(const CompressedRowSparseMatrix& A, const double* D) final { | 
 |     cpu_preconditioner_.Update(A, D); | 
 |     m_.CopyValuesFromCpu(cpu_preconditioner_.matrix()); | 
 |   } | 
 |  | 
 |   void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { | 
 |     m_.RightMultiplyAndAccumulate(x, &y); | 
 |   } | 
 |  | 
 |  private: | 
 |   Preconditioner::Options options_; | 
 |   BlockCRSJacobiPreconditioner cpu_preconditioner_; | 
 |   CudaSparseMatrix m_; | 
 | }; | 
 |  | 
 | CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options) | 
 |     : options_(std::move(options)) {} | 
 |  | 
 | CudaCgnrSolver::~CudaCgnrSolver() { | 
 |   for (int i = 0; i < 4; ++i) { | 
 |     if (scratch_[i]) { | 
 |       delete scratch_[i]; | 
 |       scratch_[i] = nullptr; | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create( | 
 |     LinearSolver::Options options, std::string* error) { | 
 |   CHECK(error != nullptr); | 
 |   if (options.preconditioner_type != IDENTITY && | 
 |       options.preconditioner_type != JACOBI) { | 
 |     *error = | 
 |         "CudaCgnrSolver does not support preconditioner type " + | 
 |         std::string(PreconditionerTypeToString(options.preconditioner_type)) + | 
 |         ". "; | 
 |     return nullptr; | 
 |   } | 
 |   CHECK(options.context->IsCudaInitialized()) | 
 |       << "CudaCgnrSolver requires CUDA initialization."; | 
 |   auto solver = std::make_unique<CudaCgnrSolver>(options); | 
 |   return solver; | 
 | } | 
 |  | 
 | void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A, | 
 |                                       const double* b, | 
 |                                       const double* D) { | 
 |   if (A_ == nullptr) { | 
 |     // Assume structure is not cached, do an initialization and structural copy. | 
 |     A_ = std::make_unique<CudaSparseMatrix>(options_.context, A); | 
 |     b_ = std::make_unique<CudaVector>(options_.context, A.num_rows()); | 
 |     x_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); | 
 |     Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); | 
 |     Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows()); | 
 |     D_ = std::make_unique<CudaVector>(options_.context, A.num_cols()); | 
 |  | 
 |     Preconditioner::Options preconditioner_options; | 
 |     preconditioner_options.type = options_.preconditioner_type; | 
 |     preconditioner_options.subset_preconditioner_start_row_block = | 
 |         options_.subset_preconditioner_start_row_block; | 
 |     preconditioner_options.sparse_linear_algebra_library_type = | 
 |         options_.sparse_linear_algebra_library_type; | 
 |     preconditioner_options.ordering_type = options_.ordering_type; | 
 |     preconditioner_options.num_threads = options_.num_threads; | 
 |     preconditioner_options.context = options_.context; | 
 |  | 
 |     if (options_.preconditioner_type == JACOBI) { | 
 |       preconditioner_ = | 
 |           std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A); | 
 |     } else { | 
 |       preconditioner_ = std::make_unique<CudaIdentityPreconditioner>(); | 
 |     } | 
 |     for (int i = 0; i < 4; ++i) { | 
 |       scratch_[i] = new CudaVector(options_.context, A.num_cols()); | 
 |     } | 
 |   } else { | 
 |     // Assume structure is cached, do a value copy. | 
 |     A_->CopyValuesFromCpu(A); | 
 |   } | 
 |   b_->CopyFromCpu(ConstVectorRef(b, A.num_rows())); | 
 |   D_->CopyFromCpu(ConstVectorRef(D, A.num_cols())); | 
 | } | 
 |  | 
 | LinearSolver::Summary CudaCgnrSolver::SolveImpl( | 
 |     CompressedRowSparseMatrix* A, | 
 |     const double* b, | 
 |     const LinearSolver::PerSolveOptions& per_solve_options, | 
 |     double* x) { | 
 |   EventLogger event_logger("CudaCgnrSolver::Solve"); | 
 |   LinearSolver::Summary summary; | 
 |   summary.num_iterations = 0; | 
 |   summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; | 
 |  | 
 |   CpuToGpuTransfer(*A, b, per_solve_options.D); | 
 |   event_logger.AddEvent("CPU to GPU Transfer"); | 
 |   preconditioner_->Update(*A, per_solve_options.D); | 
 |   event_logger.AddEvent("Preconditioner Update"); | 
 |  | 
 |   // Form z = Atb. | 
 |   Atb_->SetZero(); | 
 |   A_->LeftMultiplyAndAccumulate(*b_, Atb_.get()); | 
 |  | 
 |   // Solve (AtA + DtD)x = z (= Atb). | 
 |   x_->SetZero(); | 
 |   CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get()); | 
 |  | 
 |   event_logger.AddEvent("Setup"); | 
 |  | 
 |   ConjugateGradientsSolverOptions cg_options; | 
 |   cg_options.min_num_iterations = options_.min_num_iterations; | 
 |   cg_options.max_num_iterations = options_.max_num_iterations; | 
 |   cg_options.residual_reset_period = options_.residual_reset_period; | 
 |   cg_options.q_tolerance = per_solve_options.q_tolerance; | 
 |   cg_options.r_tolerance = per_solve_options.r_tolerance; | 
 |  | 
 |   summary = ConjugateGradientsSolver( | 
 |       cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_); | 
 |   x_->CopyTo(x); | 
 |   event_logger.AddEvent("Solve"); | 
 |   return summary; | 
 | } | 
 |  | 
 | #endif  // CERES_NO_CUDA | 
 |  | 
 | }  // namespace ceres::internal |