Fix Eigen alignment issues.

For proper alignment on the heap Eigen needs to have a custom
allocator. There are two forms, new and in-place new. To make sure
that memory is aligned using new, one needs to overload new by
adding EIGEN_MAKE_ALIGNED_OPERATOR_NEW to any struct which contains a
fixed size Eigen type either through inheritance or as a direct or
indirect member. For the in-place new one need to use the
Eigen::aligned_allocator (e.g. for std::vector, std::list,
FixedArray, etc.). For more details see:
https://eigen.tuxfamily.org/dox/group__DenseMatrixManipulation__Alignement.html

This CL adds EIGEN_MAKE_ALIGNED_OPERATOR_NEW to all structs, which
contain fixed-size Eigen types and uses the Eigen::aligned_allocator
for containers which stores structs of fixed-size Eigen types.

Change-Id: I06c6c4fc74a6835918d5d1c571b7814a14c029d8
diff --git a/include/ceres/dynamic_autodiff_cost_function.h b/include/ceres/dynamic_autodiff_cost_function.h
index 1bfb7a5..4c1517a 100644
--- a/include/ceres/dynamic_autodiff_cost_function.h
+++ b/include/ceres/dynamic_autodiff_cost_function.h
@@ -33,11 +33,12 @@
 #define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
 
 #include <cmath>
+#include <memory>
 #include <numeric>
 #include <vector>
 
-#include <memory>
 #include "ceres/dynamic_cost_function.h"
+#include "ceres/internal/fixed_array.h"
 #include "ceres/jet.h"
 #include "glog/logging.h"
 
@@ -112,12 +113,15 @@
                                                0);
 
     // Allocate scratch space for the strided evaluation.
-    std::vector<Jet<double, Stride>> input_jets(num_parameters);
-    std::vector<Jet<double, Stride>> output_jets(num_residuals());
+    using JetT = Jet<double, Stride>;
+    internal::FixedArray<JetT, (256 * 7) / sizeof(JetT)> input_jets(
+        num_parameters);
+    internal::FixedArray<JetT, (256 * 7) / sizeof(JetT)> output_jets(
+        num_residuals());
 
     // Make the parameter pack that is sent to the functor (reused).
-    std::vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks,
-        static_cast<Jet<double, Stride>* >(NULL));
+    internal::FixedArray<Jet<double, Stride>*> jet_parameters(
+        num_parameter_blocks, nullptr);
     int num_active_parameters = 0;
 
     // To handle constant parameters between non-constant parameter blocks, the
diff --git a/include/ceres/internal/fixed_array.h b/include/ceres/internal/fixed_array.h
index 69af2ea..c107dfc 100644
--- a/include/ceres/internal/fixed_array.h
+++ b/include/ceres/internal/fixed_array.h
@@ -37,6 +37,8 @@
 #include <tuple>
 #include <type_traits>
 
+#include <Eigen/Core> // For Eigen::aligned_allocator
+
 #include "ceres/internal/algorithm.h"
 #include "ceres/internal/memory.h"
 #include "glog/logging.h"
@@ -46,6 +48,18 @@
 
 constexpr static auto kFixedArrayUseDefault = static_cast<size_t>(-1);
 
+// The default fixed array allocator.
+//
+// As one can not easily detect if a struct contains or inherits from a fixed
+// size Eigen type, to be safe the Eigen::aligned_allocator is used by default.
+// But trivial types can never contain Eigen types, so std::allocator is used to
+// safe some heap memory.
+template <typename T>
+using FixedArrayDefaultAllocator =
+    typename std::conditional<std::is_trivial<T>::value,
+                              std::allocator<T>,
+                              Eigen::aligned_allocator<T>>::type;
+
 // -----------------------------------------------------------------------------
 // FixedArray
 // -----------------------------------------------------------------------------
@@ -71,7 +85,7 @@
 // operators.
 template <typename T,
           size_t N = kFixedArrayUseDefault,
-          typename A = std::allocator<T>>
+          typename A = FixedArrayDefaultAllocator<T>>
 class FixedArray {
   static_assert(!std::is_array<T>::value || std::extent<T>::value > 0,
                 "Arrays with unknown bounds cannot be used with FixedArray.");
diff --git a/include/ceres/jet.h b/include/ceres/jet.h
index 2b54064..c5e4ac7 100644
--- a/include/ceres/jet.h
+++ b/include/ceres/jet.h
@@ -250,61 +250,11 @@
   T a;
 
   // The infinitesimal part.
-  //
-  // We allocate Jets on the stack and other places they might not be aligned
-  // to X(=16 [SSE], 32 [AVX] etc)-byte boundaries, which would prevent the safe
-  // use of vectorisation.  If we have C++11, we can specify the alignment.
-  // However, the standard gives wide latitude as to what alignments are valid,
-  // and it might be that the maximum supported alignment *guaranteed* to be
-  // supported is < 16, in which case we do not specify an alignment, as this
-  // implies the host is not a modern x86 machine.  If using < C++11, we cannot
-  // specify alignment.
+  Eigen::Matrix<T, N, 1> v;
 
-#if defined(EIGEN_DONT_VECTORIZE)
-  Eigen::Matrix<T, N, 1, Eigen::DontAlign> v;
-#else
-  // Enable vectorisation iff the maximum supported scalar alignment is >=
-  // 16 bytes, as this is the minimum required by Eigen for any vectorisation.
-  //
-  // NOTE: It might be the case that we could get >= 16-byte alignment even if
-  //       max_align_t < 16.  However we can't guarantee that this
-  //       would happen (and it should not for any modern x86 machine) and if it
-  //       didn't, we could get misaligned Jets.
-  static constexpr int kAlignOrNot =
-      // Work around a GCC 4.8 bug
-      // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56019) where
-      // std::max_align_t is misplaced.
-#if defined (__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8
-      alignof(::max_align_t) >= 16
-#else
-      alignof(std::max_align_t) >= 16
-#endif
-      ? Eigen::AutoAlign : Eigen::DontAlign;
-
-#if defined(EIGEN_MAX_ALIGN_BYTES)
-  // Eigen >= 3.3 supports AVX & FMA instructions that require 32-byte alignment
-  // (greater for AVX512).  Rather than duplicating the detection logic, use
-  // Eigen's macro for the alignment size.
-  //
-  // NOTE: EIGEN_MAX_ALIGN_BYTES can be > 16 (e.g. 32 for AVX), even though
-  //       kMaxAlignBytes will max out at 16.  We are therefore relying on
-  //       Eigen's detection logic to ensure that this does not result in
-  //       misaligned Jets.
-#define CERES_JET_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
-#else
-  // Eigen < 3.3 only supported 16-byte alignment.
-#define CERES_JET_ALIGN_BYTES 16
-#endif
-
-  // Default to the native alignment if 16-byte alignment is not guaranteed to
-  // be supported.  We cannot use alignof(T) as if we do, GCC 4.8 complains that
-  // the alignment 'is not an integer constant', although Clang accepts it.
-  static constexpr size_t kAlignment = kAlignOrNot == Eigen::AutoAlign
-            ? CERES_JET_ALIGN_BYTES : alignof(double);
-
-#undef CERES_JET_ALIGN_BYTES
-  alignas(kAlignment) Eigen::Matrix<T, N, 1, kAlignOrNot> v;
-#endif
+  // This struct needs to have an Eigen aligned operator new as it contains
+  // fixed-size Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 };
 
 // Unary +
diff --git a/include/ceres/tiny_solver.h b/include/ceres/tiny_solver.h
index b6484fe..e5e70b3 100644
--- a/include/ceres/tiny_solver.h
+++ b/include/ceres/tiny_solver.h
@@ -131,6 +131,10 @@
                          Function::NUM_PARAMETERS>>>
 class TinySolver {
  public:
+  // This class needs to have an Eigen aligned operator new as it contains
+  // fixed-size Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   enum {
     NUM_RESIDUALS = Function::NUM_RESIDUALS,
     NUM_PARAMETERS = Function::NUM_PARAMETERS
diff --git a/include/ceres/tiny_solver_autodiff_function.h b/include/ceres/tiny_solver_autodiff_function.h
index cc93d73..7ed752a 100644
--- a/include/ceres/tiny_solver_autodiff_function.h
+++ b/include/ceres/tiny_solver_autodiff_function.h
@@ -109,6 +109,10 @@
          typename T = double>
 class TinySolverAutoDiffFunction {
  public:
+  // This class needs to have an Eigen aligned operator new as it contains
+  // as a member a Jet type, which itself has a fixed-size Eigen type as member.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   TinySolverAutoDiffFunction(const CostFunctor& cost_functor)
       : cost_functor_(cost_functor) {
     Initialize<kNumResiduals>(cost_functor);
diff --git a/include/ceres/tiny_solver_cost_function_adapter.h b/include/ceres/tiny_solver_cost_function_adapter.h
index d44bdeb..63ac6c6 100644
--- a/include/ceres/tiny_solver_cost_function_adapter.h
+++ b/include/ceres/tiny_solver_cost_function_adapter.h
@@ -72,7 +72,8 @@
 //
 //   TinySolverCostFunctionAdapter cost_function_adapter(*cost_function);
 //
-template <int kNumResiduals = Eigen::Dynamic, int kNumParameters = Eigen::Dynamic>
+template <int kNumResiduals = Eigen::Dynamic,
+          int kNumParameters = Eigen::Dynamic>
 class TinySolverCostFunctionAdapter {
  public:
   typedef double Scalar;
@@ -81,6 +82,10 @@
     NUM_RESIDUALS = kNumResiduals
   };
 
+  // This struct needs to have an Eigen aligned operator new as it contains
+  // fixed-size Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   TinySolverCostFunctionAdapter(const CostFunction& cost_function)
       : cost_function_(cost_function) {
     CHECK_EQ(cost_function_.parameter_block_sizes().size(), 1)
@@ -127,6 +132,7 @@
     return cost_function_.parameter_block_sizes()[0];
   }
 
+ private:
   const CostFunction& cost_function_;
   mutable Eigen::Matrix<double, NUM_RESIDUALS, NUM_PARAMETERS, Eigen::RowMajor>
       row_major_jacobian_;
diff --git a/internal/ceres/cubic_interpolation_test.cc b/internal/ceres/cubic_interpolation_test.cc
index d68af22..e1abb0f 100644
--- a/internal/ceres/cubic_interpolation_test.cc
+++ b/internal/ceres/cubic_interpolation_test.cc
@@ -313,6 +313,10 @@
 
 class BiCubicInterpolatorTest : public ::testing::Test {
  public:
+  // This class needs to have an Eigen aligned operator new as it contains
+  // fixed-size Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   template <int kDataDimension>
   void RunPolynomialInterpolationTest(const Eigen::Matrix3d& coeff) {
     values_.reset(new double[kNumRows * kNumCols * kDataDimension]);
diff --git a/internal/ceres/dynamic_sparsity_test.cc b/internal/ceres/dynamic_sparsity_test.cc
index 5fe60f4..7c2e4ac 100644
--- a/internal/ceres/dynamic_sparsity_test.cc
+++ b/internal/ceres/dynamic_sparsity_test.cc
@@ -272,6 +272,10 @@
 
 class PointToLineSegmentContourCostFunction : public CostFunction {
  public:
+  // This class needs to have an Eigen aligned operator new as it contains
+  // fixed-size Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
   PointToLineSegmentContourCostFunction(const int num_segments,
                                         const Eigen::Vector2d& y)
       : num_segments_(num_segments), y_(y) {
diff --git a/internal/ceres/fixed_array_test.cc b/internal/ceres/fixed_array_test.cc
index 2909707..79ae511 100644
--- a/internal/ceres/fixed_array_test.cc
+++ b/internal/ceres/fixed_array_test.cc
@@ -834,4 +834,27 @@
   }
 }
 
+struct EigenStruct {
+  Eigen::Vector4d data;
+};
+
+static_assert(
+    std::is_same<ceres::internal::FixedArrayDefaultAllocator<double>,
+                 std::allocator<double>>::value,
+    "Double is a trivial type, so std::allocator should be used here.");
+static_assert(
+    std::is_same<ceres::internal::FixedArrayDefaultAllocator<double*>,
+                 std::allocator<double*>>::value,
+    "A pointer is a trivial type, so std::allocator should be used here.");
+static_assert(
+    std::is_same<ceres::internal::FixedArrayDefaultAllocator<Eigen::Matrix4d>,
+                 Eigen::aligned_allocator<Eigen::Matrix4d>>::value,
+    "An Eigen::Matrix4d needs the Eigen::aligned_allocator for proper "
+    "alignment.");
+static_assert(
+    std::is_same<ceres::internal::FixedArrayDefaultAllocator<EigenStruct>,
+                 Eigen::aligned_allocator<EigenStruct>>::value,
+    "A struct containing fixed size Eigen types needs Eigen::aligned_allocator "
+    "for proper alignment.");
+
 }  // namespace