Avoid memory allocations in Accelerate Sparse[Refactor/Solve]().

- Maintain scratch workspaces for solving/refactoring instead of
  forcing Sparse[Refactor/Solve]() to reallocate space on each call.

Change-Id: Idb9eda9c9c6cc7570cfdf851e59e5bf6b39521ce
diff --git a/internal/ceres/accelerate_sparse.cc b/internal/ceres/accelerate_sparse.cc
index dc02986..eb04e71 100644
--- a/internal/ceres/accelerate_sparse.cc
+++ b/internal/ceres/accelerate_sparse.cc
@@ -49,6 +49,7 @@
 namespace ceres {
 namespace internal {
 
+namespace {
 const char* SparseStatusToString(SparseStatus_t status) {
   switch (status) {
     CASESTR(SparseStatusOK);
@@ -61,11 +62,44 @@
       return "UKNOWN";
   }
 }
+}  // namespace.
+
+// Resizes workspace as required to contain at least required_size bytes
+// aligned to kAccelerateRequiredAlignment and returns a pointer to the
+// aligned start.
+void* ResizeForAccelerateAlignment(const size_t required_size,
+                                   std::vector<uint8_t> *workspace) {
+  // As per the Accelerate documentation, all workspace memory passed to the
+  // sparse solver functions must be 16-byte aligned.
+  constexpr int kAccelerateRequiredAlignment = 16;
+  // Although malloc() on macOS should always be 16-byte aligned, it is unclear
+  // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc).
+  // As such we assume it is not and use std::align() to create a (potentially
+  // offset) 16-byte aligned sub-buffer of the specified size within workspace.
+  workspace->resize(required_size + kAccelerateRequiredAlignment);
+  size_t size_from_aligned_start = workspace->size();
+  void* aligned_solve_workspace_start =
+      reinterpret_cast<void*>(workspace->data());
+  aligned_solve_workspace_start =
+      std::align(kAccelerateRequiredAlignment,
+                 required_size,
+                 aligned_solve_workspace_start,
+                 size_from_aligned_start);
+  CHECK(aligned_solve_workspace_start != nullptr)
+      << "required_size: " << required_size
+      << ", workspace size: " << workspace->size();
+  return aligned_solve_workspace_start;
+}
 
 template<typename Scalar>
 void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor,
                                      DenseVector* rhs_and_solution) {
-  SparseSolve(*numeric_factor, *rhs_and_solution);
+  // From SparseSolve() documentation in Solve.h
+  const int required_size =
+      numeric_factor->solveWorkspaceRequiredStatic +
+      numeric_factor->solveWorkspaceRequiredPerRHS;
+  SparseSolve(*numeric_factor, *rhs_and_solution,
+              ResizeForAccelerateAlignment(required_size, &solve_workspace_));
 }
 
 template<typename Scalar>
@@ -118,7 +152,13 @@
 template<typename Scalar>
 void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
                                         NumericFactorization* numeric_factor) {
-  return SparseRefactor(*A, numeric_factor);
+  // From SparseRefactor() documentation in Solve.h
+  const int required_size = std::is_same<Scalar, double>::value
+      ? numeric_factor->symbolicFactorization.workspaceSize_Double
+      : numeric_factor->symbolicFactorization.workspaceSize_Float;
+  return SparseRefactor(*A, numeric_factor,
+                        ResizeForAccelerateAlignment(required_size,
+                                                     &factorization_workspace_));
 }
 
 // Instantiate only for the specific template types required/supported s/t the
diff --git a/internal/ceres/accelerate_sparse.h b/internal/ceres/accelerate_sparse.h
index b849a80..068834c 100644
--- a/internal/ceres/accelerate_sparse.h
+++ b/internal/ceres/accelerate_sparse.h
@@ -101,6 +101,8 @@
 
  private:
   std::vector<long> column_starts_;
+  std::vector<uint8_t> solve_workspace_;
+  std::vector<uint8_t> factorization_workspace_;
   // Storage for the values of A if Scalar != double (necessitating a copy).
   Eigen::Matrix<Scalar, Eigen::Dynamic, 1> values_;
 };