Enable support for AVX instructions for Jets. - Eigen versions < 3.3 only supported SIMD instructions that required 16-byte alignment (SSE), whereas Eigen >= 3.3 also supports AVX instructions which require 32+-byte alignment. - Previously we only ever requested 16-byte alignment for Jets (if >= C++11 was enabled) which resulted in Eigen assertions being triggered on some compilers as reported as Issue #251: https://github.com/ceres-solver/ceres-solver/issues/251. - Now we use Eigen’s EIGEN_MAX_ALIGN_BYTES macro, defined in Eigen >= 3.3 to specify the byte alignment for Jets when C++11 is enabled for Eigen >= 3.3. For Eigen versions < 3.3, we maintain the previous behaviour of 16. Change-Id: I749af7a70ae794e0c2a59301128db781e338422c

commit: 3fa415bd0c7624f5371073f8bf6fd23aa18bdb27 [log] [tgz]
author: Alex Stewart <alexs.mac@gmail.com> Wed Jan 04 19:50:00 2017 +0000
committer: Alex Stewart <alexs.mac@gmail.com> Thu Jan 05 19:12:45 2017 +0000
tree: 0f69697b46c798f0fe61db4e8a96508c8563abff
parent: adb5f8e92c9f2df2746e94bca20060a62f9e64ba [diff]
diff --git a/include/ceres/internal/port.h b/include/ceres/internal/port.h
index 8660a66..652f6fb 100644
--- a/include/ceres/internal/port.h
+++ b/include/ceres/internal/port.h

@@ -50,13 +50,13 @@
 #endif
 
 // We allocate some Eigen objects on the stack and other places they
-// might not be aligned to 16-byte boundaries.  If we have C++11, we
-// can specify their alignment (which is desirable, as it means we can safely
-// enable vectorisation on matrices).  However, the standard gives wide
-// lattitude as to what alignments are legal.  It must be the case that
+// might not be aligned to X(=16 [SSE], 32 [AVX] etc)-byte boundaries.  If we
+// have C++11, we can specify their alignment (which is desirable, as it means
+// we can safely enable vectorisation on matrices).  However, the standard gives
+// wide lattitude as to what alignments are legal.  It must be the case that
 // alignments up to alignof(std::max_align_t) are valid, but this might be < 16
 // on some platforms, in which case even if using C++11, on these platforms
-// we should not attempt to align to 16-byte boundaries.  If using < C++11,
+// we should not attempt to align to X-byte boundaries.  If using < C++11,
 // we cannot specify the alignment.
 #ifdef CERES_USE_CXX11
 namespace port_constants {

diff --git a/include/ceres/jet.h b/include/ceres/jet.h
index 4f517f7..a0505a2 100644
--- a/include/ceres/jet.h
+++ b/include/ceres/jet.h

@@ -228,27 +228,51 @@
   T a;
 
   // The infinitesimal part.
-
+  //
   // We allocate Jets on the stack and other places they might not be aligned
-  // to 16-byte boundaries, which would prevent the safe use of vectorisation.
-  // If we have C++11, we can specify the alignment.  However, the standard
-  // gives wide lattitude as to what alignments are valid, and it might be that
-  // the maximum supported alignment is < 16, in which case even with C++11, we
-  // cannot specify 16-byte alignment.  If using < C++11, we cannot specify
-  // alignment.
+  // to X(=16 [SSE], 32 [AVX] etc)-byte boundaries, which would prevent the safe
+  // use of vectorisation.  If we have C++11, we can specify the alignment.
+  // However, the standard gives wide lattitude as to what alignments are valid,
+  // and it might be that the maximum supported alignment *guaranteed* to be
+  // supported is < 16, in which case we do not specify an alignment, as this
+  // implies the host is not a modern x86 machine.  If using < C++11, we cannot
+  // specify alignment.
 #ifndef CERES_USE_CXX11
-  // fall back to safe version:
+  // Without >= C++11, we cannot specify the alignment so fall back to safe,
+  // unvectorised version.
   Eigen::Matrix<T, N, 1, Eigen::DontAlign> v;
 #else
-  static constexpr bool kShouldAlignMatrix =
-      16 <= ::ceres::port_constants::kMaxAlignBytes;
-  static constexpr int kAlignHint = kShouldAlignMatrix ?
-      Eigen::AutoAlign : Eigen::DontAlign;
-  // Default to the native alignment of double if 16-byte alignment is not
-  // supported.  We cannot use alignof(T) as if we do, GCC complains that the
-  // alignment 'is not an integer constant', although Clang accepts it.
-  static constexpr size_t kAlignment = kShouldAlignMatrix ? 16 : alignof(double);
-  alignas(kAlignment) Eigen::Matrix<T, N, 1, kAlignHint> v;
+  // Enable vectorisation iff the maximum supported scalar alignment is >=
+  // 16 bytes, as this is the minimum required by Eigen for any vectorisation.
+  //
+  // NOTE: It might be the case that we could get >= 16-byte alignment even if
+  //       kMaxAlignBytes < 16.  However we can't guarantee that this
+  //       would happen (and it should not for any modern x86 machine) and if it
+  //       didn't, we could get misaligned Jets.
+  static constexpr int kAlignOrNot =
+      16 <= ::ceres::port_constants::kMaxAlignBytes
+            ? Eigen::AutoAlign : Eigen::DontAlign;
+#if defined(EIGEN_MAX_ALIGN_BYTES)
+  // Eigen >= 3.3 supports AVX & FMA instructions that require 32-byte alignment
+  // (greater for AVX512).  Rather than duplicating the detection logic, use
+  // Eigen's macro for the alignment size.
+  //
+  // NOTE: EIGEN_MAX_ALIGN_BYTES can be > 16 (e.g. 32 for AVX), even though
+  //       kMaxAlignBytes will max out at 16.  We are therefore relying on
+  //       Eigen's detection logic to ensure that this does not result in
+  //       misaligned Jets.
+#define CERES_JET_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#else
+  // Eigen < 3.3 only supported 16-byte alignment.
+#define CERES_JET_ALIGN_BYTES 16
+#endif
+  // Default to the native alignment if 16-byte alignment is not guaranteed to
+  // be supported.  We cannot use alignof(T) as if we do, GCC 4.8 complains that
+  // the alignment 'is not an integer constant', although Clang accepts it.
+  static constexpr size_t kAlignment = kAlignOrNot == Eigen::AutoAlign
+            ? CERES_JET_ALIGN_BYTES : alignof(double);
+#undef CERES_JET_ALIGN_BYTES
+  alignas(kAlignment) Eigen::Matrix<T, N, 1, kAlignOrNot> v;
 #endif
 };
commit	3fa415bd0c7624f5371073f8bf6fd23aa18bdb27	[log] [tgz]
author	Alex Stewart <alexs.mac@gmail.com>	Wed Jan 04 19:50:00 2017 +0000
committer	Alex Stewart <alexs.mac@gmail.com>	Thu Jan 05 19:12:45 2017 +0000
tree	0f69697b46c798f0fe61db4e8a96508c8563abff
parent	adb5f8e92c9f2df2746e94bca20060a62f9e64ba [diff]