Fix alignment issues with Jets. 1. Test that reproduces the failure on macos. 2. Move the alignment macros from manual_constructor.h to macros.h and rename them to prevent conflicts. 3. The inline array used by FixedArray is now aligned. 4. Jet has been modified to be eigen friendly. Change-Id: I4563847a767a92156dabab1ab420f0cdddb8ba77
diff --git a/include/ceres/internal/fixed_array.h b/include/ceres/internal/fixed_array.h index 29ef157..02d3b71 100644 --- a/include/ceres/internal/fixed_array.h +++ b/include/ceres/internal/fixed_array.h
@@ -34,6 +34,7 @@ #include <cstddef> #include <glog/logging.h> +#include "ceres/internal/macros.h" #include "ceres/internal/manual_constructor.h" namespace ceres { @@ -149,7 +150,7 @@ // Allocate some space, not an array of elements of type T, so that we can // skip calling the T constructors and destructors for space we never use. - ManualConstructor<InnerContainer> inline_space_[kInlineElements]; + ManualConstructor<InnerContainer> inline_space_[kInlineElements] CERES_ALIGN_ATTRIBUTE(16); }; // Implementation details follow
diff --git a/include/ceres/internal/macros.h b/include/ceres/internal/macros.h index 7dd4e3e..0e84e9c 100644 --- a/include/ceres/internal/macros.h +++ b/include/ceres/internal/macros.h
@@ -152,4 +152,19 @@ #define MUST_USE_RESULT #endif +// Platform independent macros to get aligned memory allocations. +// For example +// +// MyFoo my_foo CERES_ALIGN_ATTRIBUTE(16); +// +// Gives us an instance of MyFoo which is aligned at a 16 byte +// boundary. +#if defined(_MSC_VER) +#define CERES_ALIGN_ATTRIBUTE(n) __declspec(align(n)) +#define CERES_ALIGN_OF(T) __alignof(T) +#elif defined(__GNUC__) +#define CERES_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n))) +#define CERES_ALIGN_OF(T) __alignof(T) +#endif + #endif // CERES_PUBLIC_INTERNAL_MACROS_H_
diff --git a/include/ceres/internal/manual_constructor.h b/include/ceres/internal/manual_constructor.h index a1d1f44..174d35e 100644 --- a/include/ceres/internal/manual_constructor.h +++ b/include/ceres/internal/manual_constructor.h
@@ -45,60 +45,49 @@ namespace ceres { namespace internal { -// ------- Define ALIGNED_CHAR_ARRAY -------------------------------- +// ------- Define CERES_ALIGNED_CHAR_ARRAY -------------------------------- -#ifndef ALIGNED_CHAR_ARRAY +#ifndef CERES_ALIGNED_CHAR_ARRAY // Because MSVC and older GCCs require that the argument to their alignment // construct to be a literal constant integer, we use a template instantiated // at all the possible powers of two. template<int alignment, int size> struct AlignType { }; template<int size> struct AlignType<0, size> { typedef char result[size]; }; -#if defined(_MSC_VER) -#define BASE_PORT_H_ALIGN_ATTRIBUTE(X) __declspec(align(X)) -#define BASE_PORT_H_ALIGN_OF(T) __alignof(T) -#elif defined(__GNUC__) -#define BASE_PORT_H_ALIGN_ATTRIBUTE(X) __attribute__((aligned(X))) -#define BASE_PORT_H_ALIGN_OF(T) __alignof__(T) -#endif -#if defined(BASE_PORT_H_ALIGN_ATTRIBUTE) +#if !defined(CERES_ALIGN_ATTRIBUTE) +#define CERES_ALIGNED_CHAR_ARRAY you_must_define_CERES_ALIGNED_CHAR_ARRAY_for_your_compiler +#else // !defined(CERES_ALIGN_ATTRIBUTE) -#define BASE_PORT_H_ALIGNTYPE_TEMPLATE(X) \ +#define CERES_ALIGN_TYPE_TEMPLATE(X) \ template<int size> struct AlignType<X, size> { \ - typedef BASE_PORT_H_ALIGN_ATTRIBUTE(X) char result[size]; \ + typedef CERES_ALIGN_ATTRIBUTE(X) char result[size]; \ } -BASE_PORT_H_ALIGNTYPE_TEMPLATE(1); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(2); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(4); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(8); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(16); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(32); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(64); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(128); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(256); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(512); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(1024); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(2048); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(4096); -BASE_PORT_H_ALIGNTYPE_TEMPLATE(8192); +CERES_ALIGN_TYPE_TEMPLATE(1); +CERES_ALIGN_TYPE_TEMPLATE(2); +CERES_ALIGN_TYPE_TEMPLATE(4); +CERES_ALIGN_TYPE_TEMPLATE(8); +CERES_ALIGN_TYPE_TEMPLATE(16); +CERES_ALIGN_TYPE_TEMPLATE(32); +CERES_ALIGN_TYPE_TEMPLATE(64); +CERES_ALIGN_TYPE_TEMPLATE(128); +CERES_ALIGN_TYPE_TEMPLATE(256); +CERES_ALIGN_TYPE_TEMPLATE(512); +CERES_ALIGN_TYPE_TEMPLATE(1024); +CERES_ALIGN_TYPE_TEMPLATE(2048); +CERES_ALIGN_TYPE_TEMPLATE(4096); +CERES_ALIGN_TYPE_TEMPLATE(8192); // Any larger and MSVC++ will complain. -#define ALIGNED_CHAR_ARRAY(T, Size) \ - typename AlignType<BASE_PORT_H_ALIGN_OF(T), sizeof(T) * Size>::result +#undef CERES_ALIGN_TYPE_TEMPLATE -#undef BASE_PORT_H_ALIGNTYPE_TEMPLATE -#undef BASE_PORT_H_ALIGN_ATTRIBUTE +#define CERES_ALIGNED_CHAR_ARRAY(T, Size) \ + typename AlignType<CERES_ALIGN_OF(T), sizeof(T) * Size>::result -#else // defined(BASE_PORT_H_ALIGN_ATTRIBUTE) -#define ALIGNED_CHAR_ARRAY you_must_define_ALIGNED_CHAR_ARRAY_for_your_compiler -#endif // defined(BASE_PORT_H_ALIGN_ATTRIBUTE) +#endif // !defined(CERES_ALIGN_ATTRIBUTE) -#undef BASE_PORT_H_ALIGNTYPE_TEMPLATE -#undef BASE_PORT_H_ALIGN_ATTRIBUTE - -#endif // ALIGNED_CHAR_ARRAY +#endif // CERES_ALIGNED_CHAR_ARRAY template <typename Type> class ManualConstructor { @@ -203,10 +192,10 @@ } private: - ALIGNED_CHAR_ARRAY(Type, 1) space_; + CERES_ALIGNED_CHAR_ARRAY(Type, 1) space_; }; -#undef ALIGNED_CHAR_ARRAY +#undef CERES_ALIGNED_CHAR_ARRAY } // namespace internal } // namespace ceres
diff --git a/include/ceres/jet.h b/include/ceres/jet.h index e42abb5..50b5555 100644 --- a/include/ceres/jet.h +++ b/include/ceres/jet.h
@@ -210,8 +210,20 @@ return *this; } - T a; // The scalar part. + // The infinitesimal part comes before the scalar part to ensure + // alignment. Jets get allocated as an array of type FixedArray + // which can allocate memory on the stack or on the heap depending + // upon the size of the array. We force the memory allocated on the + // stack to be 16 byte boundary aligned, but we also need to ensure + // that the elements of the struct are themselves aligned. Eigen::Matrix<T, N, 1> v; // The infinitesimal part. + T a; // The scalar part. + + // Needed to make sure that new instances of Jets are properly + // aligned. For more details see + // + // http://eigen.tuxfamily.org/dox/TopicStructHavingEigenMembers.html + EIGEN_MAKE_ALIGNED_OPERATOR_NEW }; // Unary +
diff --git a/internal/ceres/autodiff_test.cc b/internal/ceres/autodiff_test.cc index a9fb833..d0bf97a 100644 --- a/internal/ceres/autodiff_test.cc +++ b/internal/ceres/autodiff_test.cc
@@ -377,5 +377,30 @@ } } +// This is fragile test that triggers the alignment bug on +// i686-apple-darwin10-llvm-g++-4.2 (GCC) 4.2.1. It is quite possible, +// that other combinations of operating system + compiler will +// re-arrange the operations in this test. +// +// But this is the best (and only) way we know of to trigger this +// problem for now. A more robust solution that guarantees the +// alignment of Eigen types used for automatic differentiation would +// be nice. +TEST(AutoDiff, AlignedAllocationTest) { + // This int is needed to allocate 16 bits on the stack, so that the + // next allocation is not aligned by default. + char y = 0; + + // This is needed to prevent the compiler from optimizing y out of + // this function. + y += 1; + + typedef Jet<double, 2> JetT; + FixedArray<JetT, (256 * 7) / sizeof(JetT)> x(3); + + // Need this to makes sure that x does not get optimized out. + x[0] = x[0] + JetT(1.0); +} + } // namespace internal } // namespace ceres