Avoid memory allocations in Accelerate Sparse[Refactor/Solve](). - Maintain scratch workspaces for solving/refactoring instead of forcing Sparse[Refactor/Solve]() to reallocate space on each call. Change-Id: Idb9eda9c9c6cc7570cfdf851e59e5bf6b39521ce
diff --git a/internal/ceres/accelerate_sparse.cc b/internal/ceres/accelerate_sparse.cc index dc02986..eb04e71 100644 --- a/internal/ceres/accelerate_sparse.cc +++ b/internal/ceres/accelerate_sparse.cc
@@ -49,6 +49,7 @@ namespace ceres { namespace internal { +namespace { const char* SparseStatusToString(SparseStatus_t status) { switch (status) { CASESTR(SparseStatusOK); @@ -61,11 +62,44 @@ return "UKNOWN"; } } +} // namespace. + +// Resizes workspace as required to contain at least required_size bytes +// aligned to kAccelerateRequiredAlignment and returns a pointer to the +// aligned start. +void* ResizeForAccelerateAlignment(const size_t required_size, + std::vector<uint8_t> *workspace) { + // As per the Accelerate documentation, all workspace memory passed to the + // sparse solver functions must be 16-byte aligned. + constexpr int kAccelerateRequiredAlignment = 16; + // Although malloc() on macOS should always be 16-byte aligned, it is unclear + // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc). + // As such we assume it is not and use std::align() to create a (potentially + // offset) 16-byte aligned sub-buffer of the specified size within workspace. + workspace->resize(required_size + kAccelerateRequiredAlignment); + size_t size_from_aligned_start = workspace->size(); + void* aligned_solve_workspace_start = + reinterpret_cast<void*>(workspace->data()); + aligned_solve_workspace_start = + std::align(kAccelerateRequiredAlignment, + required_size, + aligned_solve_workspace_start, + size_from_aligned_start); + CHECK(aligned_solve_workspace_start != nullptr) + << "required_size: " << required_size + << ", workspace size: " << workspace->size(); + return aligned_solve_workspace_start; +} template<typename Scalar> void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor, DenseVector* rhs_and_solution) { - SparseSolve(*numeric_factor, *rhs_and_solution); + // From SparseSolve() documentation in Solve.h + const int required_size = + numeric_factor->solveWorkspaceRequiredStatic + + numeric_factor->solveWorkspaceRequiredPerRHS; + SparseSolve(*numeric_factor, *rhs_and_solution, + ResizeForAccelerateAlignment(required_size, &solve_workspace_)); } template<typename Scalar> @@ -118,7 +152,13 @@ template<typename Scalar> void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A, NumericFactorization* numeric_factor) { - return SparseRefactor(*A, numeric_factor); + // From SparseRefactor() documentation in Solve.h + const int required_size = std::is_same<Scalar, double>::value + ? numeric_factor->symbolicFactorization.workspaceSize_Double + : numeric_factor->symbolicFactorization.workspaceSize_Float; + return SparseRefactor(*A, numeric_factor, + ResizeForAccelerateAlignment(required_size, + &factorization_workspace_)); } // Instantiate only for the specific template types required/supported s/t the
diff --git a/internal/ceres/accelerate_sparse.h b/internal/ceres/accelerate_sparse.h index b849a80..068834c 100644 --- a/internal/ceres/accelerate_sparse.h +++ b/internal/ceres/accelerate_sparse.h
@@ -101,6 +101,8 @@ private: std::vector<long> column_starts_; + std::vector<uint8_t> solve_workspace_; + std::vector<uint8_t> factorization_workspace_; // Storage for the values of A if Scalar != double (necessitating a copy). Eigen::Matrix<Scalar, Eigen::Dynamic, 1> values_; };