Use inline & -inlinehint-threshold in auto-diff benchmarks

- This results in the same performance as the original solution of
  increasing -inline-threshold, but this approach is more viable to
  incorporate in a large code base as its effects are more targeted.

Change-Id: Id798dbca7d3050de0ea847a5ecc69484ac78a2cf
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h
index 3505c85..0603321 100644
--- a/include/ceres/internal/autodiff.h
+++ b/include/ceres/internal/autodiff.h
@@ -184,7 +184,7 @@
 template <int j, int N, int Offset, typename T, typename JetT>
 struct Make1stOrderPerturbation {
  public:
-  static void Apply(const T* src, JetT* dst) {
+  inline static void Apply(const T* src, JetT* dst) {
     if (j == 0) {
       DCHECK(src);
       DCHECK(dst);
@@ -217,7 +217,7 @@
                                  ParameterIdx,
                                  Offset> {
   template <typename T, typename JetT>
-  static void Apply(T const* const* parameters, JetT* x) {
+  inline static void Apply(T const* const* parameters, JetT* x) {
     Make1stOrderPerturbation<0, N, Offset, T, JetT>::Apply(
         parameters[ParameterIdx], x + Offset);
     Make1stOrderPerturbations<integer_sequence<int, Ns...>,
@@ -280,7 +280,7 @@
                          ParameterIdx,
                          Offset> {
   template <typename JetT, typename T>
-  static void Apply(int num_outputs, JetT* output, T** jacobians) {
+  inline static void Apply(int num_outputs, JetT* output, T** jacobians) {
     if (jacobians[ParameterIdx]) {
       Take1stOrderPart<Offset, N>(num_outputs, output, jacobians[ParameterIdx]);
     }
diff --git a/internal/ceres/autodiff_benchmarks/CMakeLists.txt b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
index 06b3cf6..610ebc3 100644
--- a/internal/ceres/autodiff_benchmarks/CMakeLists.txt
+++ b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
@@ -1,6 +1,9 @@
 # TODO: Add support for other compilers
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inline-threshold=1000000")
+  # Increase the inlining threshold only for those functions marked with an
+  # inline hint. This is typically far more realistic to significantly increase
+  # in a large code-base than -inline-threshold as that has a larger scope.
+  list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inlinehint-threshold=1000000")
 endif()
 
 add_executable(autodiff_benchmarks autodiff_benchmarks.cc)
diff --git a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
index 2ab1598..f8b2a21 100644
--- a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
+++ b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
@@ -57,7 +57,7 @@
         : cost_function(std::forward<_Args>(__args)...) {}
 
     template <typename... _Args>
-    bool operator()(_Args&&... __args) const {
+    inline bool operator()(_Args&&... __args) const {
         return cost_function(std::forward<_Args>(__args)...);
     }
 
@@ -171,7 +171,7 @@
   Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {}
 
   template <typename T>
-  bool operator()(const T* parameters, T* residuals) const {
+  inline bool operator()(const T* parameters, T* residuals) const {
     const T& b1 = parameters[0];
     const T& b2 = parameters[1];
     const T& b3 = parameters[2];
diff --git a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
index 4134181..eba0932 100644
--- a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
@@ -48,14 +48,14 @@
   Brdf() {}
 
   template <typename T>
-  bool operator()(const T* const material,
-                  const T* const c_ptr,
-                  const T* const n_ptr,
-                  const T* const v_ptr,
-                  const T* const l_ptr,
-                  const T* const x_ptr,
-                  const T* const y_ptr,
-                  T* residual) const {
+  inline bool operator()(const T* const material,
+                         const T* const c_ptr,
+                         const T* const n_ptr,
+                         const T* const v_ptr,
+                         const T* const l_ptr,
+                         const T* const x_ptr,
+                         const T* const y_ptr,
+                         T* residual) const {
     using Vec3 = Eigen::Matrix<T, 3, 1>;
 
     T metallic = material[0];
@@ -154,19 +154,19 @@
   }
 
   template <typename T>
-  T SchlickFresnel(const T& u) const {
+  inline T SchlickFresnel(const T& u) const {
     T m = T(1) - u;
     const T m2 = m * m;
     return m2 * m2 * m;  // (1-u)^5
   }
 
   template <typename T>
-  T Aspect(const T& anisotropic) const {
+  inline T Aspect(const T& anisotropic) const {
     return T(sqrt(T(1) - anisotropic * T(0.9)));
   }
 
   template <typename T>
-  T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
+  inline T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
     const T a = alpha_g * alpha_g;
     const T b = n_dot_v * n_dot_v;
     return T(1) / (n_dot_v + T(sqrt(a + b - a * b)));
@@ -175,7 +175,7 @@
   // Generalized-Trowbridge-Reitz (GTR) Microfacet Distribution
   // See paper, Appendix B
   template <typename T>
-  T GTR1(const T& n_dot_h, const T& a) const {
+  inline T GTR1(const T& n_dot_h, const T& a) const {
     T result = T(0);
 
     if (a >= T(1)) {
@@ -189,7 +189,7 @@
   }
 
   template <typename T>
-  T GTR2Aniso(const T& n_dot_h,
+  inline T GTR2Aniso(const T& n_dot_h,
               const T& h_dot_x,
               const T& h_dot_y,
               const T& ax,
@@ -205,9 +205,10 @@
   }
 
   template <typename Derived1, typename Derived2>
-  typename Derived1::PlainObject Lerp(const Eigen::MatrixBase<Derived1>& a,
-                                      const Eigen::MatrixBase<Derived2>& b,
-                                      typename Derived1::Scalar alpha) const {
+  inline typename Derived1::PlainObject
+  Lerp(const Eigen::MatrixBase<Derived1>& a,
+       const Eigen::MatrixBase<Derived2>& b,
+       typename Derived1::Scalar alpha) const {
     return (typename Derived1::Scalar(1) - alpha) * a + alpha * b;
   }
 
diff --git a/internal/ceres/autodiff_benchmarks/constant_cost_function.h b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
index 00f39d6..caa0431 100644
--- a/internal/ceres/autodiff_benchmarks/constant_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
@@ -40,7 +40,7 @@
 struct ConstantCostFunction
     : public ceres::SizedCostFunction<1, kParameterBlockSize> {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     residuals[0] = T(5);
     return true;
   }
diff --git a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
index 4ead04a..2f2552f 100644
--- a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
+++ b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
@@ -38,7 +38,7 @@
 
 struct Linear1CostFunction {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     residuals[0] = x[0] + T(10);
     return true;
   }
@@ -46,7 +46,7 @@
 
 struct Linear10CostFunction {
   template <typename T>
-  bool operator()(const T* const x, T* residuals) const {
+  inline bool operator()(const T* const x, T* residuals) const {
     for (int i = 0; i < 10; ++i) {
       residuals[i] = x[i] + T(i);
     }
diff --git a/internal/ceres/autodiff_benchmarks/photometric_error.h b/internal/ceres/autodiff_benchmarks/photometric_error.h
index 9107ca0..8ed278d 100644
--- a/internal/ceres/autodiff_benchmarks/photometric_error.h
+++ b/internal/ceres/autodiff_benchmarks/photometric_error.h
@@ -102,8 +102,8 @@
         intrinsics_(intrinsics) {}
 
   template <typename T>
-  bool Project(Eigen::Matrix<T, 2, 1>& proj,
-               const Eigen::Matrix<T, 3, 1>& p) const {
+  inline bool Project(Eigen::Matrix<T, 2, 1>& proj,
+                      const Eigen::Matrix<T, 3, 1>& p) const {
     const double& fx = intrinsics_[0];
     const double& fy = intrinsics_[1];
     const double& cx = intrinsics_[2];
@@ -136,10 +136,10 @@
   }
 
   template <typename T>
-  bool operator()(const T* const pose_host_ptr,
-                  const T* const pose_target_ptr,
-                  const T* const idist_ptr,
-                  T* residuals_ptr) const {
+  inline bool operator()(const T* const pose_host_ptr,
+                         const T* const pose_target_ptr,
+                         const T* const idist_ptr,
+                         T* residuals_ptr) const {
     Eigen::Map<const Eigen::Quaternion<T>> q_w_h(pose_host_ptr);
     Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_h(pose_host_ptr + 4);
     Eigen::Map<const Eigen::Quaternion<T>> q_w_t(pose_target_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/relative_pose_error.h b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
index 93b28c7..b5c1a93 100644
--- a/internal/ceres/autodiff_benchmarks/relative_pose_error.h
+++ b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
@@ -48,9 +48,9 @@
       : meas_q_i_j_(q_i_j), meas_t_i_j_(t_i_j) {}
 
   template <typename T>
-  bool operator()(const T* const pose_i_ptr,
-                  const T* const pose_j_ptr,
-                  T* residuals_ptr) const {
+  inline bool operator()(const T* const pose_i_ptr,
+                         const T* const pose_j_ptr,
+                         T* residuals_ptr) const {
     Eigen::Map<const Eigen::Quaternion<T>> q_w_i(pose_i_ptr);
     Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_i(pose_i_ptr + 4);
     Eigen::Map<const Eigen::Quaternion<T>> q_w_j(pose_j_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
index 8da8bea..795342f 100644
--- a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
+++ b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
@@ -42,9 +42,9 @@
 
   SnavelyReprojectionError() = default;
   template <typename T>
-  bool operator()(const T* const camera,
-                  const T* const point,
-                  T* residuals) const {
+  inline bool operator()(const T* const camera,
+                         const T* const point,
+                         T* residuals) const {
     T ox = T(observed_x);
     T oy = T(observed_y);