Fix Eigen alignment issues. For proper alignment on the heap Eigen needs to have a custom allocator. There are two forms, new and in-place new. To make sure that memory is aligned using new, one needs to overload new by adding EIGEN_MAKE_ALIGNED_OPERATOR_NEW to any struct which contains a fixed size Eigen type either through inheritance or as a direct or indirect member. For the in-place new one need to use the Eigen::aligned_allocator (e.g. for std::vector, std::list, FixedArray, etc.). For more details see: https://eigen.tuxfamily.org/dox/group__DenseMatrixManipulation__Alignement.html This CL adds EIGEN_MAKE_ALIGNED_OPERATOR_NEW to all structs, which contain fixed-size Eigen types and uses the Eigen::aligned_allocator for containers which stores structs of fixed-size Eigen types. Change-Id: I06c6c4fc74a6835918d5d1c571b7814a14c029d8
diff --git a/include/ceres/dynamic_autodiff_cost_function.h b/include/ceres/dynamic_autodiff_cost_function.h index 1bfb7a5..4c1517a 100644 --- a/include/ceres/dynamic_autodiff_cost_function.h +++ b/include/ceres/dynamic_autodiff_cost_function.h
@@ -33,11 +33,12 @@ #define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ #include <cmath> +#include <memory> #include <numeric> #include <vector> -#include <memory> #include "ceres/dynamic_cost_function.h" +#include "ceres/internal/fixed_array.h" #include "ceres/jet.h" #include "glog/logging.h" @@ -112,12 +113,15 @@ 0); // Allocate scratch space for the strided evaluation. - std::vector<Jet<double, Stride>> input_jets(num_parameters); - std::vector<Jet<double, Stride>> output_jets(num_residuals()); + using JetT = Jet<double, Stride>; + internal::FixedArray<JetT, (256 * 7) / sizeof(JetT)> input_jets( + num_parameters); + internal::FixedArray<JetT, (256 * 7) / sizeof(JetT)> output_jets( + num_residuals()); // Make the parameter pack that is sent to the functor (reused). - std::vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, - static_cast<Jet<double, Stride>* >(NULL)); + internal::FixedArray<Jet<double, Stride>*> jet_parameters( + num_parameter_blocks, nullptr); int num_active_parameters = 0; // To handle constant parameters between non-constant parameter blocks, the
diff --git a/include/ceres/internal/fixed_array.h b/include/ceres/internal/fixed_array.h index 69af2ea..c107dfc 100644 --- a/include/ceres/internal/fixed_array.h +++ b/include/ceres/internal/fixed_array.h
@@ -37,6 +37,8 @@ #include <tuple> #include <type_traits> +#include <Eigen/Core> // For Eigen::aligned_allocator + #include "ceres/internal/algorithm.h" #include "ceres/internal/memory.h" #include "glog/logging.h" @@ -46,6 +48,18 @@ constexpr static auto kFixedArrayUseDefault = static_cast<size_t>(-1); +// The default fixed array allocator. +// +// As one can not easily detect if a struct contains or inherits from a fixed +// size Eigen type, to be safe the Eigen::aligned_allocator is used by default. +// But trivial types can never contain Eigen types, so std::allocator is used to +// safe some heap memory. +template <typename T> +using FixedArrayDefaultAllocator = + typename std::conditional<std::is_trivial<T>::value, + std::allocator<T>, + Eigen::aligned_allocator<T>>::type; + // ----------------------------------------------------------------------------- // FixedArray // ----------------------------------------------------------------------------- @@ -71,7 +85,7 @@ // operators. template <typename T, size_t N = kFixedArrayUseDefault, - typename A = std::allocator<T>> + typename A = FixedArrayDefaultAllocator<T>> class FixedArray { static_assert(!std::is_array<T>::value || std::extent<T>::value > 0, "Arrays with unknown bounds cannot be used with FixedArray.");
diff --git a/include/ceres/jet.h b/include/ceres/jet.h index 2b54064..c5e4ac7 100644 --- a/include/ceres/jet.h +++ b/include/ceres/jet.h
@@ -250,61 +250,11 @@ T a; // The infinitesimal part. - // - // We allocate Jets on the stack and other places they might not be aligned - // to X(=16 [SSE], 32 [AVX] etc)-byte boundaries, which would prevent the safe - // use of vectorisation. If we have C++11, we can specify the alignment. - // However, the standard gives wide latitude as to what alignments are valid, - // and it might be that the maximum supported alignment *guaranteed* to be - // supported is < 16, in which case we do not specify an alignment, as this - // implies the host is not a modern x86 machine. If using < C++11, we cannot - // specify alignment. + Eigen::Matrix<T, N, 1> v; -#if defined(EIGEN_DONT_VECTORIZE) - Eigen::Matrix<T, N, 1, Eigen::DontAlign> v; -#else - // Enable vectorisation iff the maximum supported scalar alignment is >= - // 16 bytes, as this is the minimum required by Eigen for any vectorisation. - // - // NOTE: It might be the case that we could get >= 16-byte alignment even if - // max_align_t < 16. However we can't guarantee that this - // would happen (and it should not for any modern x86 machine) and if it - // didn't, we could get misaligned Jets. - static constexpr int kAlignOrNot = - // Work around a GCC 4.8 bug - // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56019) where - // std::max_align_t is misplaced. -#if defined (__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 - alignof(::max_align_t) >= 16 -#else - alignof(std::max_align_t) >= 16 -#endif - ? Eigen::AutoAlign : Eigen::DontAlign; - -#if defined(EIGEN_MAX_ALIGN_BYTES) - // Eigen >= 3.3 supports AVX & FMA instructions that require 32-byte alignment - // (greater for AVX512). Rather than duplicating the detection logic, use - // Eigen's macro for the alignment size. - // - // NOTE: EIGEN_MAX_ALIGN_BYTES can be > 16 (e.g. 32 for AVX), even though - // kMaxAlignBytes will max out at 16. We are therefore relying on - // Eigen's detection logic to ensure that this does not result in - // misaligned Jets. -#define CERES_JET_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES -#else - // Eigen < 3.3 only supported 16-byte alignment. -#define CERES_JET_ALIGN_BYTES 16 -#endif - - // Default to the native alignment if 16-byte alignment is not guaranteed to - // be supported. We cannot use alignof(T) as if we do, GCC 4.8 complains that - // the alignment 'is not an integer constant', although Clang accepts it. - static constexpr size_t kAlignment = kAlignOrNot == Eigen::AutoAlign - ? CERES_JET_ALIGN_BYTES : alignof(double); - -#undef CERES_JET_ALIGN_BYTES - alignas(kAlignment) Eigen::Matrix<T, N, 1, kAlignOrNot> v; -#endif + // This struct needs to have an Eigen aligned operator new as it contains + // fixed-size Eigen types. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW }; // Unary +
diff --git a/include/ceres/tiny_solver.h b/include/ceres/tiny_solver.h index b6484fe..e5e70b3 100644 --- a/include/ceres/tiny_solver.h +++ b/include/ceres/tiny_solver.h
@@ -131,6 +131,10 @@ Function::NUM_PARAMETERS>>> class TinySolver { public: + // This class needs to have an Eigen aligned operator new as it contains + // fixed-size Eigen types. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + enum { NUM_RESIDUALS = Function::NUM_RESIDUALS, NUM_PARAMETERS = Function::NUM_PARAMETERS
diff --git a/include/ceres/tiny_solver_autodiff_function.h b/include/ceres/tiny_solver_autodiff_function.h index cc93d73..7ed752a 100644 --- a/include/ceres/tiny_solver_autodiff_function.h +++ b/include/ceres/tiny_solver_autodiff_function.h
@@ -109,6 +109,10 @@ typename T = double> class TinySolverAutoDiffFunction { public: + // This class needs to have an Eigen aligned operator new as it contains + // as a member a Jet type, which itself has a fixed-size Eigen type as member. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + TinySolverAutoDiffFunction(const CostFunctor& cost_functor) : cost_functor_(cost_functor) { Initialize<kNumResiduals>(cost_functor);
diff --git a/include/ceres/tiny_solver_cost_function_adapter.h b/include/ceres/tiny_solver_cost_function_adapter.h index d44bdeb..63ac6c6 100644 --- a/include/ceres/tiny_solver_cost_function_adapter.h +++ b/include/ceres/tiny_solver_cost_function_adapter.h
@@ -72,7 +72,8 @@ // // TinySolverCostFunctionAdapter cost_function_adapter(*cost_function); // -template <int kNumResiduals = Eigen::Dynamic, int kNumParameters = Eigen::Dynamic> +template <int kNumResiduals = Eigen::Dynamic, + int kNumParameters = Eigen::Dynamic> class TinySolverCostFunctionAdapter { public: typedef double Scalar; @@ -81,6 +82,10 @@ NUM_RESIDUALS = kNumResiduals }; + // This struct needs to have an Eigen aligned operator new as it contains + // fixed-size Eigen types. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + TinySolverCostFunctionAdapter(const CostFunction& cost_function) : cost_function_(cost_function) { CHECK_EQ(cost_function_.parameter_block_sizes().size(), 1) @@ -127,6 +132,7 @@ return cost_function_.parameter_block_sizes()[0]; } + private: const CostFunction& cost_function_; mutable Eigen::Matrix<double, NUM_RESIDUALS, NUM_PARAMETERS, Eigen::RowMajor> row_major_jacobian_;
diff --git a/internal/ceres/cubic_interpolation_test.cc b/internal/ceres/cubic_interpolation_test.cc index d68af22..e1abb0f 100644 --- a/internal/ceres/cubic_interpolation_test.cc +++ b/internal/ceres/cubic_interpolation_test.cc
@@ -313,6 +313,10 @@ class BiCubicInterpolatorTest : public ::testing::Test { public: + // This class needs to have an Eigen aligned operator new as it contains + // fixed-size Eigen types. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + template <int kDataDimension> void RunPolynomialInterpolationTest(const Eigen::Matrix3d& coeff) { values_.reset(new double[kNumRows * kNumCols * kDataDimension]);
diff --git a/internal/ceres/dynamic_sparsity_test.cc b/internal/ceres/dynamic_sparsity_test.cc index 5fe60f4..7c2e4ac 100644 --- a/internal/ceres/dynamic_sparsity_test.cc +++ b/internal/ceres/dynamic_sparsity_test.cc
@@ -272,6 +272,10 @@ class PointToLineSegmentContourCostFunction : public CostFunction { public: + // This class needs to have an Eigen aligned operator new as it contains + // fixed-size Eigen types. + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + PointToLineSegmentContourCostFunction(const int num_segments, const Eigen::Vector2d& y) : num_segments_(num_segments), y_(y) {
diff --git a/internal/ceres/fixed_array_test.cc b/internal/ceres/fixed_array_test.cc index 2909707..79ae511 100644 --- a/internal/ceres/fixed_array_test.cc +++ b/internal/ceres/fixed_array_test.cc
@@ -834,4 +834,27 @@ } } +struct EigenStruct { + Eigen::Vector4d data; +}; + +static_assert( + std::is_same<ceres::internal::FixedArrayDefaultAllocator<double>, + std::allocator<double>>::value, + "Double is a trivial type, so std::allocator should be used here."); +static_assert( + std::is_same<ceres::internal::FixedArrayDefaultAllocator<double*>, + std::allocator<double*>>::value, + "A pointer is a trivial type, so std::allocator should be used here."); +static_assert( + std::is_same<ceres::internal::FixedArrayDefaultAllocator<Eigen::Matrix4d>, + Eigen::aligned_allocator<Eigen::Matrix4d>>::value, + "An Eigen::Matrix4d needs the Eigen::aligned_allocator for proper " + "alignment."); +static_assert( + std::is_same<ceres::internal::FixedArrayDefaultAllocator<EigenStruct>, + Eigen::aligned_allocator<EigenStruct>>::value, + "A struct containing fixed size Eigen types needs Eigen::aligned_allocator " + "for proper alignment."); + } // namespace