|  | // Ceres Solver - A fast non-linear least squares minimizer | 
|  | // Copyright 2018 Google Inc. All rights reserved. | 
|  | // http://ceres-solver.org/ | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | // * Redistributions of source code must retain the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer. | 
|  | // * Redistributions in binary form must reproduce the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer in the documentation | 
|  | //   and/or other materials provided with the distribution. | 
|  | // * Neither the name of Google Inc. nor the names of its contributors may be | 
|  | //   used to endorse or promote products derived from this software without | 
|  | //   specific prior written permission. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  | // | 
|  | // Author: alexs.mac@gmail.com (Alex Stewart) | 
|  |  | 
|  | // This include must come before any #ifndef check on Ceres compile options. | 
|  | #include "ceres/internal/port.h" | 
|  |  | 
|  | #ifndef CERES_NO_ACCELERATE_SPARSE | 
|  |  | 
|  | #include "ceres/accelerate_sparse.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | #include "ceres/compressed_col_sparse_matrix_utils.h" | 
|  | #include "ceres/compressed_row_sparse_matrix.h" | 
|  | #include "ceres/triplet_sparse_matrix.h" | 
|  | #include "glog/logging.h" | 
|  |  | 
|  | #define CASESTR(x) case x: return #x | 
|  |  | 
|  | namespace ceres { | 
|  | namespace internal { | 
|  |  | 
|  | namespace { | 
|  | const char* SparseStatusToString(SparseStatus_t status) { | 
|  | switch (status) { | 
|  | CASESTR(SparseStatusOK); | 
|  | CASESTR(SparseFactorizationFailed); | 
|  | CASESTR(SparseMatrixIsSingular); | 
|  | CASESTR(SparseInternalError); | 
|  | CASESTR(SparseParameterError); | 
|  | CASESTR(SparseStatusReleased); | 
|  | default: | 
|  | return "UKNOWN"; | 
|  | } | 
|  | } | 
|  | }  // namespace. | 
|  |  | 
|  | // Resizes workspace as required to contain at least required_size bytes | 
|  | // aligned to kAccelerateRequiredAlignment and returns a pointer to the | 
|  | // aligned start. | 
|  | void* ResizeForAccelerateAlignment(const size_t required_size, | 
|  | std::vector<uint8_t> *workspace) { | 
|  | // As per the Accelerate documentation, all workspace memory passed to the | 
|  | // sparse solver functions must be 16-byte aligned. | 
|  | constexpr int kAccelerateRequiredAlignment = 16; | 
|  | // Although malloc() on macOS should always be 16-byte aligned, it is unclear | 
|  | // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc). | 
|  | // As such we assume it is not and use std::align() to create a (potentially | 
|  | // offset) 16-byte aligned sub-buffer of the specified size within workspace. | 
|  | workspace->resize(required_size + kAccelerateRequiredAlignment); | 
|  | size_t size_from_aligned_start = workspace->size(); | 
|  | void* aligned_solve_workspace_start = | 
|  | reinterpret_cast<void*>(workspace->data()); | 
|  | aligned_solve_workspace_start = | 
|  | std::align(kAccelerateRequiredAlignment, | 
|  | required_size, | 
|  | aligned_solve_workspace_start, | 
|  | size_from_aligned_start); | 
|  | CHECK(aligned_solve_workspace_start != nullptr) | 
|  | << "required_size: " << required_size | 
|  | << ", workspace size: " << workspace->size(); | 
|  | return aligned_solve_workspace_start; | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor, | 
|  | DenseVector* rhs_and_solution) { | 
|  | // From SparseSolve() documentation in Solve.h | 
|  | const int required_size = | 
|  | numeric_factor->solveWorkspaceRequiredStatic + | 
|  | numeric_factor->solveWorkspaceRequiredPerRHS; | 
|  | SparseSolve(*numeric_factor, *rhs_and_solution, | 
|  | ResizeForAccelerateAlignment(required_size, &solve_workspace_)); | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | typename AccelerateSparse<Scalar>::ASSparseMatrix | 
|  | AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView( | 
|  | CompressedRowSparseMatrix* A) { | 
|  | // Accelerate uses CSC as its sparse storage format whereas Ceres uses CSR. | 
|  | // As this method returns the transpose view we can flip rows/cols to map | 
|  | // from CSR to CSC^T. | 
|  | // | 
|  | // Accelerate's columnStarts is a long*, not an int*.  These types might be | 
|  | // different (e.g. ARM on iOS) so always make a copy. | 
|  | column_starts_.resize(A->num_rows() +1); // +1 for final column length. | 
|  | std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]); | 
|  |  | 
|  | ASSparseMatrix At; | 
|  | At.structure.rowCount = A->num_cols(); | 
|  | At.structure.columnCount = A->num_rows(); | 
|  | At.structure.columnStarts = &column_starts_[0]; | 
|  | At.structure.rowIndices = A->mutable_cols(); | 
|  | At.structure.attributes.transpose = false; | 
|  | At.structure.attributes.triangle = SparseUpperTriangle; | 
|  | At.structure.attributes.kind = SparseSymmetric; | 
|  | At.structure.attributes._reserved = 0; | 
|  | At.structure.attributes._allocatedBySparse = 0; | 
|  | At.structure.blockSize = 1; | 
|  | if (std::is_same<Scalar, double>::value) { | 
|  | At.data = reinterpret_cast<Scalar*>(A->mutable_values()); | 
|  | } else { | 
|  | values_ = | 
|  | ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>(); | 
|  | At.data = values_.data(); | 
|  | } | 
|  | return At; | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | typename AccelerateSparse<Scalar>::SymbolicFactorization | 
|  | AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) { | 
|  | return SparseFactor(SparseFactorizationCholesky, A->structure); | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | typename AccelerateSparse<Scalar>::NumericFactorization | 
|  | AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A, | 
|  | SymbolicFactorization* symbolic_factor) { | 
|  | return SparseFactor(*symbolic_factor, *A); | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A, | 
|  | NumericFactorization* numeric_factor) { | 
|  | // From SparseRefactor() documentation in Solve.h | 
|  | const int required_size = std::is_same<Scalar, double>::value | 
|  | ? numeric_factor->symbolicFactorization.workspaceSize_Double | 
|  | : numeric_factor->symbolicFactorization.workspaceSize_Float; | 
|  | return SparseRefactor(*A, numeric_factor, | 
|  | ResizeForAccelerateAlignment(required_size, | 
|  | &factorization_workspace_)); | 
|  | } | 
|  |  | 
|  | // Instantiate only for the specific template types required/supported s/t the | 
|  | // definition can be in the .cc file. | 
|  | template class AccelerateSparse<double>; | 
|  | template class AccelerateSparse<float>; | 
|  |  | 
|  | template<typename Scalar> | 
|  | std::unique_ptr<SparseCholesky> | 
|  | AppleAccelerateCholesky<Scalar>::Create(OrderingType ordering_type) { | 
|  | return std::unique_ptr<SparseCholesky>( | 
|  | new AppleAccelerateCholesky<Scalar>(ordering_type)); | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | AppleAccelerateCholesky<Scalar>::AppleAccelerateCholesky( | 
|  | const OrderingType ordering_type) | 
|  | : ordering_type_(ordering_type) {} | 
|  |  | 
|  | template<typename Scalar> | 
|  | AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() { | 
|  | FreeSymbolicFactorization(); | 
|  | FreeNumericFactorization(); | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | CompressedRowSparseMatrix::StorageType | 
|  | AppleAccelerateCholesky<Scalar>::StorageType() const { | 
|  | return CompressedRowSparseMatrix::LOWER_TRIANGULAR; | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | LinearSolverTerminationType | 
|  | AppleAccelerateCholesky<Scalar>::Factorize(CompressedRowSparseMatrix* lhs, | 
|  | std::string* message) { | 
|  | CHECK_EQ(lhs->storage_type(), StorageType()); | 
|  | if (lhs == NULL) { | 
|  | *message = "Failure: Input lhs is NULL."; | 
|  | return LINEAR_SOLVER_FATAL_ERROR; | 
|  | } | 
|  | typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs = | 
|  | as_.CreateSparseMatrixTransposeView(lhs); | 
|  |  | 
|  | if (!symbolic_factor_) { | 
|  | symbolic_factor_.reset( | 
|  | new typename SparseTypesTrait<Scalar>::SymbolicFactorization( | 
|  | as_.AnalyzeCholesky(&as_lhs))); | 
|  | if (symbolic_factor_->status != SparseStatusOK) { | 
|  | *message = StringPrintf( | 
|  | "Apple Accelerate Failure : Symbolic factorisation failed: %s", | 
|  | SparseStatusToString(symbolic_factor_->status)); | 
|  | FreeSymbolicFactorization(); | 
|  | return LINEAR_SOLVER_FATAL_ERROR; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!numeric_factor_) { | 
|  | numeric_factor_.reset( | 
|  | new typename SparseTypesTrait<Scalar>::NumericFactorization( | 
|  | as_.Cholesky(&as_lhs, symbolic_factor_.get()))); | 
|  | } else { | 
|  | // Recycle memory from previous numeric factorization. | 
|  | as_.Cholesky(&as_lhs, numeric_factor_.get()); | 
|  | } | 
|  | if (numeric_factor_->status != SparseStatusOK) { | 
|  | *message = StringPrintf( | 
|  | "Apple Accelerate Failure : Numeric factorisation failed: %s", | 
|  | SparseStatusToString(numeric_factor_->status)); | 
|  | FreeNumericFactorization(); | 
|  | return LINEAR_SOLVER_FAILURE; | 
|  | } | 
|  |  | 
|  | return LINEAR_SOLVER_SUCCESS; | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | LinearSolverTerminationType | 
|  | AppleAccelerateCholesky<Scalar>::Solve(const double* rhs, | 
|  | double* solution, | 
|  | std::string* message) { | 
|  | CHECK_EQ(numeric_factor_->status, SparseStatusOK) | 
|  | << "Solve called without a call to Factorize first (" | 
|  | << SparseStatusToString(numeric_factor_->status) << ")."; | 
|  | const int num_cols = numeric_factor_->symbolicFactorization.columnCount; | 
|  |  | 
|  | typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution; | 
|  | as_rhs_and_solution.count = num_cols; | 
|  | if (std::is_same<Scalar, double>::value) { | 
|  | as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution); | 
|  | std::copy_n(rhs, num_cols, solution); | 
|  | } else { | 
|  | scalar_rhs_and_solution_ = | 
|  | ConstVectorRef(rhs, num_cols).template cast<Scalar>(); | 
|  | as_rhs_and_solution.data = scalar_rhs_and_solution_.data(); | 
|  | } | 
|  | as_.Solve(numeric_factor_.get(), &as_rhs_and_solution); | 
|  | if (!std::is_same<Scalar, double>::value) { | 
|  | VectorRef(solution, num_cols) = | 
|  | scalar_rhs_and_solution_.template cast<double>(); | 
|  | } | 
|  | return LINEAR_SOLVER_SUCCESS; | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | void AppleAccelerateCholesky<Scalar>::FreeSymbolicFactorization() { | 
|  | if (symbolic_factor_) { | 
|  | SparseCleanup(*symbolic_factor_); | 
|  | symbolic_factor_.reset(); | 
|  | } | 
|  | } | 
|  |  | 
|  | template<typename Scalar> | 
|  | void AppleAccelerateCholesky<Scalar>::FreeNumericFactorization() { | 
|  | if (numeric_factor_) { | 
|  | SparseCleanup(*numeric_factor_); | 
|  | numeric_factor_.reset(); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Instantiate only for the specific template types required/supported s/t the | 
|  | // definition can be in the .cc file. | 
|  | template class AppleAccelerateCholesky<double>; | 
|  | template class AppleAccelerateCholesky<float>; | 
|  |  | 
|  | } | 
|  | } | 
|  |  | 
|  | #endif  // CERES_NO_ACCELERATE_SPARSE |