| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2022 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Author: sameeragarwal@google.com (Sameer Agarwal) |
| |
| #include "ceres/dense_cholesky.h" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "ceres/internal/config.h" |
| #include "ceres/iterative_refiner.h" |
| |
| #ifndef CERES_NO_CUDA |
| #include "ceres/context_impl.h" |
| #include "ceres/cuda_kernels.h" |
| #include "cuda_runtime.h" |
| #include "cusolverDn.h" |
| #endif // CERES_NO_CUDA |
| |
| #ifndef CERES_NO_LAPACK |
| |
| // C interface to the LAPACK Cholesky factorization and triangular solve. |
| extern "C" void dpotrf_( |
| const char* uplo, const int* n, double* a, const int* lda, int* info); |
| |
| extern "C" void dpotrs_(const char* uplo, |
| const int* n, |
| const int* nrhs, |
| const double* a, |
| const int* lda, |
| double* b, |
| const int* ldb, |
| int* info); |
| |
| extern "C" void spotrf_( |
| const char* uplo, const int* n, float* a, const int* lda, int* info); |
| |
| extern "C" void spotrs_(const char* uplo, |
| const int* n, |
| const int* nrhs, |
| const float* a, |
| const int* lda, |
| float* b, |
| const int* ldb, |
| int* info); |
| #endif |
| |
| namespace ceres::internal { |
| |
| DenseCholesky::~DenseCholesky() = default; |
| |
| std::unique_ptr<DenseCholesky> DenseCholesky::Create( |
| const LinearSolver::Options& options) { |
| std::unique_ptr<DenseCholesky> dense_cholesky; |
| |
| switch (options.dense_linear_algebra_library_type) { |
| case EIGEN: |
| // Eigen mixed precision solver not yet implemented. |
| if (options.use_mixed_precision_solves) { |
| dense_cholesky = std::make_unique<FloatEigenDenseCholesky>(); |
| } else { |
| dense_cholesky = std::make_unique<EigenDenseCholesky>(); |
| } |
| break; |
| |
| case LAPACK: |
| #ifndef CERES_NO_LAPACK |
| // LAPACK mixed precision solver not yet implemented. |
| if (options.use_mixed_precision_solves) { |
| dense_cholesky = std::make_unique<FloatLAPACKDenseCholesky>(); |
| } else { |
| dense_cholesky = std::make_unique<LAPACKDenseCholesky>(); |
| } |
| break; |
| #else |
| LOG(FATAL) << "Ceres was compiled without support for LAPACK."; |
| #endif |
| |
| case CUDA: |
| #ifndef CERES_NO_CUDA |
| if (options.use_mixed_precision_solves) { |
| dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options); |
| } else { |
| dense_cholesky = CUDADenseCholesky::Create(options); |
| } |
| break; |
| #else |
| LOG(FATAL) << "Ceres was compiled without support for CUDA."; |
| #endif |
| |
| default: |
| LOG(FATAL) << "Unknown dense linear algebra library type : " |
| << DenseLinearAlgebraLibraryTypeToString( |
| options.dense_linear_algebra_library_type); |
| } |
| |
| if (options.max_num_refinement_iterations > 0) { |
| auto refiner = std::make_unique<DenseIterativeRefiner>( |
| options.max_num_refinement_iterations); |
| dense_cholesky = std::make_unique<RefinedDenseCholesky>( |
| std::move(dense_cholesky), std::move(refiner)); |
| } |
| |
| return dense_cholesky; |
| } |
| |
| LinearSolverTerminationType DenseCholesky::FactorAndSolve( |
| int num_cols, |
| double* lhs, |
| const double* rhs, |
| double* solution, |
| std::string* message) { |
| LinearSolverTerminationType termination_type = |
| Factorize(num_cols, lhs, message); |
| if (termination_type == LinearSolverTerminationType::SUCCESS) { |
| termination_type = Solve(rhs, solution, message); |
| } |
| return termination_type; |
| } |
| |
| LinearSolverTerminationType EigenDenseCholesky::Factorize( |
| int num_cols, double* lhs, std::string* message) { |
| Eigen::Map<Eigen::MatrixXd> m(lhs, num_cols, num_cols); |
| llt_ = std::make_unique<LLTType>(m); |
| if (llt_->info() != Eigen::Success) { |
| *message = "Eigen failure. Unable to perform dense Cholesky factorization."; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| |
| *message = "Success."; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs, |
| double* solution, |
| std::string* message) { |
| if (llt_->info() != Eigen::Success) { |
| *message = "Eigen failure. Unable to perform dense Cholesky factorization."; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| |
| VectorRef(solution, llt_->cols()) = |
| llt_->solve(ConstVectorRef(rhs, llt_->cols())); |
| *message = "Success."; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| LinearSolverTerminationType FloatEigenDenseCholesky::Factorize( |
| int num_cols, double* lhs, std::string* message) { |
| // TODO(sameeragarwal): Check if this causes a double allocation. |
| lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>(); |
| llt_ = std::make_unique<LLTType>(lhs_); |
| if (llt_->info() != Eigen::Success) { |
| *message = "Eigen failure. Unable to perform dense Cholesky factorization."; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| |
| *message = "Success."; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| LinearSolverTerminationType FloatEigenDenseCholesky::Solve( |
| const double* rhs, double* solution, std::string* message) { |
| if (llt_->info() != Eigen::Success) { |
| *message = "Eigen failure. Unable to perform dense Cholesky factorization."; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| |
| rhs_ = ConstVectorRef(rhs, llt_->cols()).cast<float>(); |
| solution_ = llt_->solve(rhs_); |
| VectorRef(solution, llt_->cols()) = solution_.cast<double>(); |
| *message = "Success."; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| #ifndef CERES_NO_LAPACK |
| LinearSolverTerminationType LAPACKDenseCholesky::Factorize( |
| int num_cols, double* lhs, std::string* message) { |
| lhs_ = lhs; |
| num_cols_ = num_cols; |
| |
| const char uplo = 'L'; |
| int info = 0; |
| dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info); |
| |
| if (info < 0) { |
| termination_type_ = LinearSolverTerminationType::FATAL_ERROR; |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it. " |
| << "LAPACK::dpotrf fatal error. " |
| << "Argument: " << -info << " is invalid."; |
| } else if (info > 0) { |
| termination_type_ = LinearSolverTerminationType::FAILURE; |
| *message = StringPrintf( |
| "LAPACK::dpotrf numerical failure. " |
| "The leading minor of order %d is not positive definite.", |
| info); |
| } else { |
| termination_type_ = LinearSolverTerminationType::SUCCESS; |
| *message = "Success."; |
| } |
| return termination_type_; |
| } |
| |
| LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs, |
| double* solution, |
| std::string* message) { |
| const char uplo = 'L'; |
| const int nrhs = 1; |
| int info = 0; |
| |
| VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_); |
| dpotrs_( |
| &uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info); |
| |
| if (info < 0) { |
| termination_type_ = LinearSolverTerminationType::FATAL_ERROR; |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it. " |
| << "LAPACK::dpotrs fatal error. " |
| << "Argument: " << -info << " is invalid."; |
| } |
| |
| *message = "Success"; |
| termination_type_ = LinearSolverTerminationType::SUCCESS; |
| |
| return termination_type_; |
| } |
| |
| LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize( |
| int num_cols, double* lhs, std::string* message) { |
| num_cols_ = num_cols; |
| lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>(); |
| |
| const char uplo = 'L'; |
| int info = 0; |
| spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info); |
| |
| if (info < 0) { |
| termination_type_ = LinearSolverTerminationType::FATAL_ERROR; |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it. " |
| << "LAPACK::spotrf fatal error. " |
| << "Argument: " << -info << " is invalid."; |
| } else if (info > 0) { |
| termination_type_ = LinearSolverTerminationType::FAILURE; |
| *message = StringPrintf( |
| "LAPACK::spotrf numerical failure. " |
| "The leading minor of order %d is not positive definite.", |
| info); |
| } else { |
| termination_type_ = LinearSolverTerminationType::SUCCESS; |
| *message = "Success."; |
| } |
| return termination_type_; |
| } |
| |
| LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve( |
| const double* rhs, double* solution, std::string* message) { |
| const char uplo = 'L'; |
| const int nrhs = 1; |
| int info = 0; |
| rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast<float>(); |
| spotrs_(&uplo, |
| &num_cols_, |
| &nrhs, |
| lhs_.data(), |
| &num_cols_, |
| rhs_and_solution_.data(), |
| &num_cols_, |
| &info); |
| |
| if (info < 0) { |
| termination_type_ = LinearSolverTerminationType::FATAL_ERROR; |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it. " |
| << "LAPACK::dpotrs fatal error. " |
| << "Argument: " << -info << " is invalid."; |
| } |
| |
| *message = "Success"; |
| termination_type_ = LinearSolverTerminationType::SUCCESS; |
| VectorRef(solution, num_cols_) = |
| rhs_and_solution_.head(num_cols_).cast<double>(); |
| return termination_type_; |
| } |
| |
| #endif // CERES_NO_LAPACK |
| |
| RefinedDenseCholesky::RefinedDenseCholesky( |
| std::unique_ptr<DenseCholesky> dense_cholesky, |
| std::unique_ptr<DenseIterativeRefiner> iterative_refiner) |
| : dense_cholesky_(std::move(dense_cholesky)), |
| iterative_refiner_(std::move(iterative_refiner)) {} |
| |
| RefinedDenseCholesky::~RefinedDenseCholesky() = default; |
| |
| LinearSolverTerminationType RefinedDenseCholesky::Factorize( |
| const int num_cols, double* lhs, std::string* message) { |
| lhs_ = lhs; |
| num_cols_ = num_cols; |
| return dense_cholesky_->Factorize(num_cols, lhs, message); |
| } |
| |
| LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs, |
| double* solution, |
| std::string* message) { |
| CHECK(lhs_ != nullptr); |
| auto termination_type = dense_cholesky_->Solve(rhs, solution, message); |
| if (termination_type != LinearSolverTerminationType::SUCCESS) { |
| return termination_type; |
| } |
| |
| iterative_refiner_->Refine( |
| num_cols_, lhs_, rhs, dense_cholesky_.get(), solution); |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| #ifndef CERES_NO_CUDA |
| |
| CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context) |
| : context_(context), |
| lhs_{context}, |
| rhs_{context}, |
| device_workspace_{context}, |
| error_(context, 1) {} |
| |
| LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols, |
| double* lhs, |
| std::string* message) { |
| factorize_result_ = LinearSolverTerminationType::FATAL_ERROR; |
| lhs_.Reserve(num_cols * num_cols); |
| num_cols_ = num_cols; |
| lhs_.CopyFromCpu(lhs, num_cols * num_cols); |
| int device_workspace_size = 0; |
| if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols, |
| lhs_.data(), |
| num_cols, |
| &device_workspace_size) != |
| CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| device_workspace_.Reserve(device_workspace_size); |
| if (cusolverDnDpotrf(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols, |
| lhs_.data(), |
| num_cols, |
| reinterpret_cast<double*>(device_workspace_.data()), |
| device_workspace_.size(), |
| error_.data()) != CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnDpotrf failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| int error = 0; |
| error_.CopyToCpu(&error, 1); |
| if (error < 0) { |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres - " |
| << "please report it. " |
| << "cuSolverDN::cusolverDnXpotrf fatal error. " |
| << "Argument: " << -error << " is invalid."; |
| // The following line is unreachable, but return failure just to be |
| // pedantic, since the compiler does not know that. |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } else if (error > 0) { |
| *message = StringPrintf( |
| "cuSolverDN::cusolverDnDpotrf numerical failure. " |
| "The leading minor of order %d is not positive definite.", |
| error); |
| factorize_result_ = LinearSolverTerminationType::FAILURE; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| *message = "Success"; |
| factorize_result_ = LinearSolverTerminationType::SUCCESS; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs, |
| double* solution, |
| std::string* message) { |
| if (factorize_result_ != LinearSolverTerminationType::SUCCESS) { |
| *message = "Factorize did not complete successfully previously."; |
| return factorize_result_; |
| } |
| rhs_.CopyFromCpu(rhs, num_cols_); |
| if (cusolverDnDpotrs(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols_, |
| 1, |
| lhs_.data(), |
| num_cols_, |
| rhs_.data(), |
| num_cols_, |
| error_.data()) != CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnDpotrs failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| int error = 0; |
| error_.CopyToCpu(&error, 1); |
| if (error != 0) { |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it." |
| << "cuSolverDN::cusolverDnDpotrs fatal error. " |
| << "Argument: " << -error << " is invalid."; |
| } |
| rhs_.CopyToCpu(solution, num_cols_); |
| *message = "Success"; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| std::unique_ptr<CUDADenseCholesky> CUDADenseCholesky::Create( |
| const LinearSolver::Options& options) { |
| if (options.dense_linear_algebra_library_type != CUDA || |
| options.context == nullptr || !options.context->IsCudaInitialized()) { |
| return nullptr; |
| } |
| return std::unique_ptr<CUDADenseCholesky>( |
| new CUDADenseCholesky(options.context)); |
| } |
| |
| std::unique_ptr<CUDADenseCholeskyMixedPrecision> |
| CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) { |
| if (options.dense_linear_algebra_library_type != CUDA || |
| !options.use_mixed_precision_solves || options.context == nullptr || |
| !options.context->IsCudaInitialized()) { |
| return nullptr; |
| } |
| return std::unique_ptr<CUDADenseCholeskyMixedPrecision>( |
| new CUDADenseCholeskyMixedPrecision( |
| options.context, options.max_num_refinement_iterations)); |
| } |
| |
| LinearSolverTerminationType |
| CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) { |
| int device_workspace_size = 0; |
| if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols_, |
| lhs_fp32_.data(), |
| num_cols_, |
| &device_workspace_size) != |
| CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| device_workspace_.Reserve(device_workspace_size); |
| if (cusolverDnSpotrf(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols_, |
| lhs_fp32_.data(), |
| num_cols_, |
| device_workspace_.data(), |
| device_workspace_.size(), |
| error_.data()) != CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnSpotrf failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| int error = 0; |
| error_.CopyToCpu(&error, 1); |
| if (error < 0) { |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres - " |
| << "please report it. " |
| << "cuSolverDN::cusolverDnSpotrf fatal error. " |
| << "Argument: " << -error << " is invalid."; |
| // The following line is unreachable, but return failure just to be |
| // pedantic, since the compiler does not know that. |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| if (error > 0) { |
| *message = StringPrintf( |
| "cuSolverDN::cusolverDnSpotrf numerical failure. " |
| "The leading minor of order %d is not positive definite.", |
| error); |
| factorize_result_ = LinearSolverTerminationType::FAILURE; |
| return LinearSolverTerminationType::FAILURE; |
| } |
| *message = "Success"; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve( |
| std::string* message) { |
| CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(), |
| residual_fp32_.data(), |
| num_cols_ * sizeof(float), |
| cudaMemcpyDeviceToDevice, |
| context_->DefaultStream()), |
| cudaSuccess); |
| if (cusolverDnSpotrs(context_->cusolver_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols_, |
| 1, |
| lhs_fp32_.data(), |
| num_cols_, |
| correction_fp32_.data(), |
| num_cols_, |
| error_.data()) != CUSOLVER_STATUS_SUCCESS) { |
| *message = "cuSolverDN::cusolverDnDpotrs failed."; |
| return LinearSolverTerminationType::FATAL_ERROR; |
| } |
| int error = 0; |
| error_.CopyToCpu(&error, 1); |
| if (error != 0) { |
| LOG(FATAL) << "Congratulations, you found a bug in Ceres. " |
| << "Please report it." |
| << "cuSolverDN::cusolverDnDpotrs fatal error. " |
| << "Argument: " << -error << " is invalid."; |
| } |
| *message = "Success"; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision( |
| ContextImpl* context, int max_num_refinement_iterations) |
| : context_(context), |
| lhs_fp64_{context}, |
| rhs_fp64_{context}, |
| lhs_fp32_{context}, |
| device_workspace_{context}, |
| error_(context, 1), |
| x_fp64_{context}, |
| correction_fp32_{context}, |
| residual_fp32_{context}, |
| residual_fp64_{context}, |
| max_num_refinement_iterations_(max_num_refinement_iterations) {} |
| |
| LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize( |
| int num_cols, double* lhs, std::string* message) { |
| num_cols_ = num_cols; |
| |
| // Copy fp64 version of lhs to GPU. |
| lhs_fp64_.Reserve(num_cols * num_cols); |
| lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols); |
| |
| // Create an fp32 copy of lhs, lhs_fp32. |
| lhs_fp32_.Reserve(num_cols * num_cols); |
| CudaFP64ToFP32(lhs_fp64_.data(), |
| lhs_fp32_.data(), |
| num_cols * num_cols, |
| context_->DefaultStream()); |
| |
| // Factorize lhs_fp32. |
| factorize_result_ = CudaCholeskyFactorize(message); |
| return factorize_result_; |
| } |
| |
| LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve( |
| const double* rhs, double* solution, std::string* message) { |
| // If factorization failed, return failure. |
| if (factorize_result_ != LinearSolverTerminationType::SUCCESS) { |
| *message = "Factorize did not complete successfully previously."; |
| return factorize_result_; |
| } |
| |
| // Reserve memory for all arrays. |
| rhs_fp64_.Reserve(num_cols_); |
| x_fp64_.Reserve(num_cols_); |
| correction_fp32_.Reserve(num_cols_); |
| residual_fp32_.Reserve(num_cols_); |
| residual_fp64_.Reserve(num_cols_); |
| |
| // Initialize x = 0. |
| CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream()); |
| |
| // Initialize residual = rhs. |
| rhs_fp64_.CopyFromCpu(rhs, num_cols_); |
| residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_); |
| |
| for (int i = 0; i <= max_num_refinement_iterations_; ++i) { |
| // Cast residual from fp64 to fp32. |
| CudaFP64ToFP32(residual_fp64_.data(), |
| residual_fp32_.data(), |
| num_cols_, |
| context_->DefaultStream()); |
| // [fp32] c = lhs^-1 * residual. |
| auto result = CudaCholeskySolve(message); |
| if (result != LinearSolverTerminationType::SUCCESS) { |
| return result; |
| } |
| // [fp64] x += c. |
| CudaDsxpy(x_fp64_.data(), |
| correction_fp32_.data(), |
| num_cols_, |
| context_->DefaultStream()); |
| if (i < max_num_refinement_iterations_) { |
| // [fp64] residual = rhs - lhs * x |
| // This is done in two steps: |
| // 1. [fp64] residual = rhs |
| residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_); |
| // 2. [fp64] residual = residual - lhs * x |
| double alpha = -1.0; |
| double beta = 1.0; |
| cublasDsymv(context_->cublas_handle_, |
| CUBLAS_FILL_MODE_LOWER, |
| num_cols_, |
| &alpha, |
| lhs_fp64_.data(), |
| num_cols_, |
| x_fp64_.data(), |
| 1, |
| &beta, |
| residual_fp64_.data(), |
| 1); |
| } |
| } |
| x_fp64_.CopyToCpu(solution, num_cols_); |
| *message = "Success."; |
| return LinearSolverTerminationType::SUCCESS; |
| } |
| |
| #endif // CERES_NO_CUDA |
| |
| } // namespace ceres::internal |