Use inline & -inlinehint-threshold in auto-diff benchmarks - This results in the same performance as the original solution of increasing -inline-threshold, but this approach is more viable to incorporate in a large code base as its effects are more targeted. Change-Id: Id798dbca7d3050de0ea847a5ecc69484ac78a2cf

commit: 8c36bcc81fbd4f78a2faa2c914ef40af264f4c31 [log] [tgz]
author: Alex Stewart <alexs.mac@gmail.com> Mon Apr 27 18:33:25 2020 +0100
committer: Alex Stewart <alexs.mac@gmail.com> Mon Apr 27 18:47:22 2020 +0100
tree: 712bd1c6d5b4b07555688991374805d8b539789d
parent: 57cf20aa5d3c1b2f25d255814f4fff5260db81c6 [diff]
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h
index 3505c85..0603321 100644
--- a/include/ceres/internal/autodiff.h
+++ b/include/ceres/internal/autodiff.h

@@ -184,7 +184,7 @@
 template <int j, int N, int Offset, typename T, typename JetT>
 struct Make1stOrderPerturbation {
  public:
-  static void Apply(const T* src, JetT* dst) {
+  inline static void Apply(const T* src, JetT* dst) {
     if (j == 0) {
       DCHECK(src);
       DCHECK(dst);
@@ -217,7 +217,7 @@
                                  ParameterIdx,
                                  Offset> {
   template <typename T, typename JetT>
-  static void Apply(T const* const* parameters, JetT* x) {
+  inline static void Apply(T const* const* parameters, JetT* x) {
     Make1stOrderPerturbation<0, N, Offset, T, JetT>::Apply(
         parameters[ParameterIdx], x + Offset);
     Make1stOrderPerturbations<integer_sequence<int, Ns...>,
@@ -280,7 +280,7 @@
                          ParameterIdx,
                          Offset> {
   template <typename JetT, typename T>
-  static void Apply(int num_outputs, JetT* output, T** jacobians) {
+  inline static void Apply(int num_outputs, JetT* output, T** jacobians) {
     if (jacobians[ParameterIdx]) {
       Take1stOrderPart<Offset, N>(num_outputs, output, jacobians[ParameterIdx]);
     }

diff --git a/internal/ceres/autodiff_benchmarks/CMakeLists.txt b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
index 06b3cf6..610ebc3 100644
--- a/internal/ceres/autodiff_benchmarks/CMakeLists.txt
+++ b/internal/ceres/autodiff_benchmarks/CMakeLists.txt

@@ -1,6 +1,9 @@
 # TODO: Add support for other compilers
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inline-threshold=1000000")
+  # Increase the inlining threshold only for those functions marked with an
+  # inline hint. This is typically far more realistic to significantly increase
+  # in a large code-base than -inline-threshold as that has a larger scope.
+  list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inlinehint-threshold=1000000")
 endif()
 
 add_executable(autodiff_benchmarks autodiff_benchmarks.cc)

diff --git a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
index 2ab1598..f8b2a21 100644
--- a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
+++ b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc

@@ -57,7 +57,7 @@
         : cost_function(std::forward<_Args>(__args)...) {}
 
     template <typename... _Args>
-    bool operator()(_Args&&... __args) const {
+    inline bool operator()(_Args&&... __args) const {
         return cost_function(std::forward<_Args>(__args)...);
     }
 
@@ -171,7 +171,7 @@
   Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {}
 
   template <typename T>
-  bool operator()(const T* parameters, T* residuals) const {
+  inline bool operator()(const T* parameters, T* residuals) const {
     const T& b1 = parameters[0];
     const T& b2 = parameters[1];
     const T& b3 = parameters[2];

diff --git a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
index 4134181..eba0932 100644
--- a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h

@@ -48,14 +48,14 @@
   Brdf() {}
 
   template <typename T>
-  bool operator()(const T* const material,
-                  const T* const c_ptr,
-                  const T* const n_ptr,
-                  const T* const v_ptr,
-                  const T* const l_ptr,
-                  const T* const x_ptr,
-                  const T* const y_ptr,
-                  T* residual) const {
+  inline bool operator()(const T* const material,
+                         const T* const c_ptr,
+                         const T* const n_ptr,
+                         const T* const v_ptr,
+                         const T* const l_ptr,
+                         const T* const x_ptr,
+                         const T* const y_ptr,
+                         T* residual) const {
     using Vec3 = Eigen::Matrix<T, 3, 1>;
 
     T metallic = material[0];
@@ -154,19 +154,19 @@
   }
 
   template <typename T>
-  T SchlickFresnel(const T& u) const {
+  inline T SchlickFresnel(const T& u) const {
     T m = T(1) - u;
     const T m2 = m * m;
     return m2 * m2 * m;  // (1-u)^5
   }
 
   template <typename T>
-  T Aspect(const T& anisotropic) const {
+  inline T Aspect(const T& anisotropic) const {
     return T(sqrt(T(1) - anisotropic * T(0.9)));
   }
 
   template <typename T>
-  T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
+  inline T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
     const T a = alpha_g * alpha_g;
     const T b = n_dot_v * n_dot_v;
     return T(1) / (n_dot_v + T(sqrt(a + b - a * b)));
@@ -175,7 +175,7 @@
   // Generalized-Trowbridge-Reitz (GTR) Microfacet Distribution
   // See paper, Appendix B
   template <typename T>
-  T GTR1(const T& n_dot_h, const T& a) const {
+  inline T GTR1(const T& n_dot_h, const T& a) const {
     T result = T(0);
 
     if (a >= T(1)) {
@@ -189,7 +189,7 @@
   }
 
   template <typename T>
-  T GTR2Aniso(const T& n_dot_h,
+  inline T GTR2Aniso(const T& n_dot_h,
               const T& h_dot_x,
               const T& h_dot_y,
               const T& ax,
@@ -205,9 +205,10 @@
   }
 
   template <typename Derived1, typename Derived2>
-  typename Derived1::PlainObject Lerp(const Eigen::MatrixBase<Derived1>& a,
-                                      const Eigen::MatrixBase<Derived2>& b,
-                                      typename Derived1::Scalar alpha) const {
+  inline typename Derived1::PlainObject
+  Lerp(const Eigen::MatrixBase<Derived1>& a,
+       const Eigen::MatrixBase<Derived2>& b,
+       typename Derived1::Scalar alpha) const {
     return (typename Derived1::Scalar(1) - alpha) * a + alpha * b;
   }
 

diff --git a/internal/ceres/autodiff_benchmarks/constant_cost_function.h b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
index 00f39d6..caa0431 100644
--- a/internal/ceres/autodiff_benchmarks/constant_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/constant_cost_function.h

@@ -40,7 +40,7 @@
 struct ConstantCostFunction
     : public ceres::SizedCostFunction<1, kParameterBlockSize> {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     residuals[0] = T(5);
     return true;
   }

diff --git a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
index 4ead04a..2f2552f 100644
--- a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
+++ b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h

@@ -38,7 +38,7 @@
 
 struct Linear1CostFunction {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     residuals[0] = x[0] + T(10);
     return true;
   }
@@ -46,7 +46,7 @@
 
 struct Linear10CostFunction {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     for (int i = 0; i < 10; ++i) {
       residuals[i] = x[i] + T(i);
     }

diff --git a/internal/ceres/autodiff_benchmarks/photometric_error.h b/internal/ceres/autodiff_benchmarks/photometric_error.h
index 9107ca0..8ed278d 100644
--- a/internal/ceres/autodiff_benchmarks/photometric_error.h
+++ b/internal/ceres/autodiff_benchmarks/photometric_error.h

@@ -102,8 +102,8 @@
         intrinsics_(intrinsics) {}
 
   template <typename T>
-  bool Project(Eigen::Matrix<T, 2, 1>& proj,
-               const Eigen::Matrix<T, 3, 1>& p) const {
+  inline bool Project(Eigen::Matrix<T, 2, 1>& proj,
+                      const Eigen::Matrix<T, 3, 1>& p) const {
     const double& fx = intrinsics_[0];
     const double& fy = intrinsics_[1];
     const double& cx = intrinsics_[2];
@@ -136,10 +136,10 @@
   }
 
   template <typename T>
-  bool operator()(const T* const pose_host_ptr,
-                  const T* const pose_target_ptr,
-                  const T* const idist_ptr,
-                  T* residuals_ptr) const {
+  inline bool operator()(const T* const pose_host_ptr,
+                         const T* const pose_target_ptr,
+                         const T* const idist_ptr,
+                         T* residuals_ptr) const {
     Eigen::Map<const Eigen::Quaternion<T>> q_w_h(pose_host_ptr);
     Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_h(pose_host_ptr + 4);
     Eigen::Map<const Eigen::Quaternion<T>> q_w_t(pose_target_ptr);

diff --git a/internal/ceres/autodiff_benchmarks/relative_pose_error.h b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
index 93b28c7..b5c1a93 100644
--- a/internal/ceres/autodiff_benchmarks/relative_pose_error.h
+++ b/internal/ceres/autodiff_benchmarks/relative_pose_error.h

@@ -48,9 +48,9 @@
       : meas_q_i_j_(q_i_j), meas_t_i_j_(t_i_j) {}
 
   template <typename T>
-  bool operator()(const T* const pose_i_ptr,
-                  const T* const pose_j_ptr,
-                  T* residuals_ptr) const {
+  inline bool operator()(const T* const pose_i_ptr,
+                         const T* const pose_j_ptr,
+                         T* residuals_ptr) const {
     Eigen::Map<const Eigen::Quaternion<T>> q_w_i(pose_i_ptr);
     Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_i(pose_i_ptr + 4);
     Eigen::Map<const Eigen::Quaternion<T>> q_w_j(pose_j_ptr);

diff --git a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
index 8da8bea..795342f 100644
--- a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
+++ b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h

@@ -42,9 +42,9 @@
 
   SnavelyReprojectionError() = default;
   template <typename T>
-  bool operator()(const T* const camera,
-                  const T* const point,
-                  T* residuals) const {
+  inline bool operator()(const T* const camera,
+                         const T* const point,
+                         T* residuals) const {
     T ox = T(observed_x);
     T oy = T(observed_y);
commit	8c36bcc81fbd4f78a2faa2c914ef40af264f4c31	[log] [tgz]
author	Alex Stewart <alexs.mac@gmail.com>	Mon Apr 27 18:33:25 2020 +0100
committer	Alex Stewart <alexs.mac@gmail.com>	Mon Apr 27 18:47:22 2020 +0100
tree	712bd1c6d5b4b07555688991374805d8b539789d
parent	57cf20aa5d3c1b2f25d255814f4fff5260db81c6 [diff]