Use inline & -inlinehint-threshold in auto-diff benchmarks - This results in the same performance as the original solution of increasing -inline-threshold, but this approach is more viable to incorporate in a large code base as its effects are more targeted. Change-Id: Id798dbca7d3050de0ea847a5ecc69484ac78a2cf
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h index 3505c85..0603321 100644 --- a/include/ceres/internal/autodiff.h +++ b/include/ceres/internal/autodiff.h
@@ -184,7 +184,7 @@ template <int j, int N, int Offset, typename T, typename JetT> struct Make1stOrderPerturbation { public: - static void Apply(const T* src, JetT* dst) { + inline static void Apply(const T* src, JetT* dst) { if (j == 0) { DCHECK(src); DCHECK(dst); @@ -217,7 +217,7 @@ ParameterIdx, Offset> { template <typename T, typename JetT> - static void Apply(T const* const* parameters, JetT* x) { + inline static void Apply(T const* const* parameters, JetT* x) { Make1stOrderPerturbation<0, N, Offset, T, JetT>::Apply( parameters[ParameterIdx], x + Offset); Make1stOrderPerturbations<integer_sequence<int, Ns...>, @@ -280,7 +280,7 @@ ParameterIdx, Offset> { template <typename JetT, typename T> - static void Apply(int num_outputs, JetT* output, T** jacobians) { + inline static void Apply(int num_outputs, JetT* output, T** jacobians) { if (jacobians[ParameterIdx]) { Take1stOrderPart<Offset, N>(num_outputs, output, jacobians[ParameterIdx]); }
diff --git a/internal/ceres/autodiff_benchmarks/CMakeLists.txt b/internal/ceres/autodiff_benchmarks/CMakeLists.txt index 06b3cf6..610ebc3 100644 --- a/internal/ceres/autodiff_benchmarks/CMakeLists.txt +++ b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
@@ -1,6 +1,9 @@ # TODO: Add support for other compilers if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inline-threshold=1000000") + # Increase the inlining threshold only for those functions marked with an + # inline hint. This is typically far more realistic to significantly increase + # in a large code-base than -inline-threshold as that has a larger scope. + list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inlinehint-threshold=1000000") endif() add_executable(autodiff_benchmarks autodiff_benchmarks.cc)
diff --git a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc index 2ab1598..f8b2a21 100644 --- a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc +++ b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
@@ -57,7 +57,7 @@ : cost_function(std::forward<_Args>(__args)...) {} template <typename... _Args> - bool operator()(_Args&&... __args) const { + inline bool operator()(_Args&&... __args) const { return cost_function(std::forward<_Args>(__args)...); } @@ -171,7 +171,7 @@ Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {} template <typename T> - bool operator()(const T* parameters, T* residuals) const { + inline bool operator()(const T* parameters, T* residuals) const { const T& b1 = parameters[0]; const T& b2 = parameters[1]; const T& b3 = parameters[2];
diff --git a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h index 4134181..eba0932 100644 --- a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h +++ b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
@@ -48,14 +48,14 @@ Brdf() {} template <typename T> - bool operator()(const T* const material, - const T* const c_ptr, - const T* const n_ptr, - const T* const v_ptr, - const T* const l_ptr, - const T* const x_ptr, - const T* const y_ptr, - T* residual) const { + inline bool operator()(const T* const material, + const T* const c_ptr, + const T* const n_ptr, + const T* const v_ptr, + const T* const l_ptr, + const T* const x_ptr, + const T* const y_ptr, + T* residual) const { using Vec3 = Eigen::Matrix<T, 3, 1>; T metallic = material[0]; @@ -154,19 +154,19 @@ } template <typename T> - T SchlickFresnel(const T& u) const { + inline T SchlickFresnel(const T& u) const { T m = T(1) - u; const T m2 = m * m; return m2 * m2 * m; // (1-u)^5 } template <typename T> - T Aspect(const T& anisotropic) const { + inline T Aspect(const T& anisotropic) const { return T(sqrt(T(1) - anisotropic * T(0.9))); } template <typename T> - T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const { + inline T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const { const T a = alpha_g * alpha_g; const T b = n_dot_v * n_dot_v; return T(1) / (n_dot_v + T(sqrt(a + b - a * b))); @@ -175,7 +175,7 @@ // Generalized-Trowbridge-Reitz (GTR) Microfacet Distribution // See paper, Appendix B template <typename T> - T GTR1(const T& n_dot_h, const T& a) const { + inline T GTR1(const T& n_dot_h, const T& a) const { T result = T(0); if (a >= T(1)) { @@ -189,7 +189,7 @@ } template <typename T> - T GTR2Aniso(const T& n_dot_h, + inline T GTR2Aniso(const T& n_dot_h, const T& h_dot_x, const T& h_dot_y, const T& ax, @@ -205,9 +205,10 @@ } template <typename Derived1, typename Derived2> - typename Derived1::PlainObject Lerp(const Eigen::MatrixBase<Derived1>& a, - const Eigen::MatrixBase<Derived2>& b, - typename Derived1::Scalar alpha) const { + inline typename Derived1::PlainObject + Lerp(const Eigen::MatrixBase<Derived1>& a, + const Eigen::MatrixBase<Derived2>& b, + typename Derived1::Scalar alpha) const { return (typename Derived1::Scalar(1) - alpha) * a + alpha * b; }
diff --git a/internal/ceres/autodiff_benchmarks/constant_cost_function.h b/internal/ceres/autodiff_benchmarks/constant_cost_function.h index 00f39d6..caa0431 100644 --- a/internal/ceres/autodiff_benchmarks/constant_cost_function.h +++ b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
@@ -40,7 +40,7 @@ struct ConstantCostFunction : public ceres::SizedCostFunction<1, kParameterBlockSize> { template <typename T> - bool operator()(const T* const x, T* residuals) const { + inline bool operator()(const T* const x, T* residuals) const { residuals[0] = T(5); return true; }
diff --git a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h index 4ead04a..2f2552f 100644 --- a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h +++ b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
@@ -38,7 +38,7 @@ struct Linear1CostFunction { template <typename T> - bool operator()(const T* const x, T* residuals) const { + inline bool operator()(const T* const x, T* residuals) const { residuals[0] = x[0] + T(10); return true; } @@ -46,7 +46,7 @@ struct Linear10CostFunction { template <typename T> - bool operator()(const T* const x, T* residuals) const { + inline bool operator()(const T* const x, T* residuals) const { for (int i = 0; i < 10; ++i) { residuals[i] = x[i] + T(i); }
diff --git a/internal/ceres/autodiff_benchmarks/photometric_error.h b/internal/ceres/autodiff_benchmarks/photometric_error.h index 9107ca0..8ed278d 100644 --- a/internal/ceres/autodiff_benchmarks/photometric_error.h +++ b/internal/ceres/autodiff_benchmarks/photometric_error.h
@@ -102,8 +102,8 @@ intrinsics_(intrinsics) {} template <typename T> - bool Project(Eigen::Matrix<T, 2, 1>& proj, - const Eigen::Matrix<T, 3, 1>& p) const { + inline bool Project(Eigen::Matrix<T, 2, 1>& proj, + const Eigen::Matrix<T, 3, 1>& p) const { const double& fx = intrinsics_[0]; const double& fy = intrinsics_[1]; const double& cx = intrinsics_[2]; @@ -136,10 +136,10 @@ } template <typename T> - bool operator()(const T* const pose_host_ptr, - const T* const pose_target_ptr, - const T* const idist_ptr, - T* residuals_ptr) const { + inline bool operator()(const T* const pose_host_ptr, + const T* const pose_target_ptr, + const T* const idist_ptr, + T* residuals_ptr) const { Eigen::Map<const Eigen::Quaternion<T>> q_w_h(pose_host_ptr); Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_h(pose_host_ptr + 4); Eigen::Map<const Eigen::Quaternion<T>> q_w_t(pose_target_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/relative_pose_error.h b/internal/ceres/autodiff_benchmarks/relative_pose_error.h index 93b28c7..b5c1a93 100644 --- a/internal/ceres/autodiff_benchmarks/relative_pose_error.h +++ b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
@@ -48,9 +48,9 @@ : meas_q_i_j_(q_i_j), meas_t_i_j_(t_i_j) {} template <typename T> - bool operator()(const T* const pose_i_ptr, - const T* const pose_j_ptr, - T* residuals_ptr) const { + inline bool operator()(const T* const pose_i_ptr, + const T* const pose_j_ptr, + T* residuals_ptr) const { Eigen::Map<const Eigen::Quaternion<T>> q_w_i(pose_i_ptr); Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_i(pose_i_ptr + 4); Eigen::Map<const Eigen::Quaternion<T>> q_w_j(pose_j_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h index 8da8bea..795342f 100644 --- a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h +++ b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
@@ -42,9 +42,9 @@ SnavelyReprojectionError() = default; template <typename T> - bool operator()(const T* const camera, - const T* const point, - T* residuals) const { + inline bool operator()(const T* const camera, + const T* const point, + T* residuals) const { T ox = T(observed_x); T oy = T(observed_y);