Use inline & -inlinehint-threshold in auto-diff benchmarks
- This results in the same performance as the original solution of
increasing -inline-threshold, but this approach is more viable to
incorporate in a large code base as its effects are more targeted.
Change-Id: Id798dbca7d3050de0ea847a5ecc69484ac78a2cf
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h
index 3505c85..0603321 100644
--- a/include/ceres/internal/autodiff.h
+++ b/include/ceres/internal/autodiff.h
@@ -184,7 +184,7 @@
template <int j, int N, int Offset, typename T, typename JetT>
struct Make1stOrderPerturbation {
public:
- static void Apply(const T* src, JetT* dst) {
+ inline static void Apply(const T* src, JetT* dst) {
if (j == 0) {
DCHECK(src);
DCHECK(dst);
@@ -217,7 +217,7 @@
ParameterIdx,
Offset> {
template <typename T, typename JetT>
- static void Apply(T const* const* parameters, JetT* x) {
+ inline static void Apply(T const* const* parameters, JetT* x) {
Make1stOrderPerturbation<0, N, Offset, T, JetT>::Apply(
parameters[ParameterIdx], x + Offset);
Make1stOrderPerturbations<integer_sequence<int, Ns...>,
@@ -280,7 +280,7 @@
ParameterIdx,
Offset> {
template <typename JetT, typename T>
- static void Apply(int num_outputs, JetT* output, T** jacobians) {
+ inline static void Apply(int num_outputs, JetT* output, T** jacobians) {
if (jacobians[ParameterIdx]) {
Take1stOrderPart<Offset, N>(num_outputs, output, jacobians[ParameterIdx]);
}
diff --git a/internal/ceres/autodiff_benchmarks/CMakeLists.txt b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
index 06b3cf6..610ebc3 100644
--- a/internal/ceres/autodiff_benchmarks/CMakeLists.txt
+++ b/internal/ceres/autodiff_benchmarks/CMakeLists.txt
@@ -1,6 +1,9 @@
# TODO: Add support for other compilers
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inline-threshold=1000000")
+ # Increase the inlining threshold only for those functions marked with an
+ # inline hint. This is typically far more realistic to significantly increase
+ # in a large code-base than -inline-threshold as that has a larger scope.
+ list(APPEND CERES_BENCHMARK_FLAGS "-mllvm" "-inlinehint-threshold=1000000")
endif()
add_executable(autodiff_benchmarks autodiff_benchmarks.cc)
diff --git a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
index 2ab1598..f8b2a21 100644
--- a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
+++ b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
@@ -57,7 +57,7 @@
: cost_function(std::forward<_Args>(__args)...) {}
template <typename... _Args>
- bool operator()(_Args&&... __args) const {
+ inline bool operator()(_Args&&... __args) const {
return cost_function(std::forward<_Args>(__args)...);
}
@@ -171,7 +171,7 @@
Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {}
template <typename T>
- bool operator()(const T* parameters, T* residuals) const {
+ inline bool operator()(const T* parameters, T* residuals) const {
const T& b1 = parameters[0];
const T& b2 = parameters[1];
const T& b3 = parameters[2];
diff --git a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
index 4134181..eba0932 100644
--- a/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/brdf_cost_function.h
@@ -48,14 +48,14 @@
Brdf() {}
template <typename T>
- bool operator()(const T* const material,
- const T* const c_ptr,
- const T* const n_ptr,
- const T* const v_ptr,
- const T* const l_ptr,
- const T* const x_ptr,
- const T* const y_ptr,
- T* residual) const {
+ inline bool operator()(const T* const material,
+ const T* const c_ptr,
+ const T* const n_ptr,
+ const T* const v_ptr,
+ const T* const l_ptr,
+ const T* const x_ptr,
+ const T* const y_ptr,
+ T* residual) const {
using Vec3 = Eigen::Matrix<T, 3, 1>;
T metallic = material[0];
@@ -154,19 +154,19 @@
}
template <typename T>
- T SchlickFresnel(const T& u) const {
+ inline T SchlickFresnel(const T& u) const {
T m = T(1) - u;
const T m2 = m * m;
return m2 * m2 * m; // (1-u)^5
}
template <typename T>
- T Aspect(const T& anisotropic) const {
+ inline T Aspect(const T& anisotropic) const {
return T(sqrt(T(1) - anisotropic * T(0.9)));
}
template <typename T>
- T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
+ inline T SmithG_GGX(const T& n_dot_v, const T& alpha_g) const {
const T a = alpha_g * alpha_g;
const T b = n_dot_v * n_dot_v;
return T(1) / (n_dot_v + T(sqrt(a + b - a * b)));
@@ -175,7 +175,7 @@
// Generalized-Trowbridge-Reitz (GTR) Microfacet Distribution
// See paper, Appendix B
template <typename T>
- T GTR1(const T& n_dot_h, const T& a) const {
+ inline T GTR1(const T& n_dot_h, const T& a) const {
T result = T(0);
if (a >= T(1)) {
@@ -189,7 +189,7 @@
}
template <typename T>
- T GTR2Aniso(const T& n_dot_h,
+ inline T GTR2Aniso(const T& n_dot_h,
const T& h_dot_x,
const T& h_dot_y,
const T& ax,
@@ -205,9 +205,10 @@
}
template <typename Derived1, typename Derived2>
- typename Derived1::PlainObject Lerp(const Eigen::MatrixBase<Derived1>& a,
- const Eigen::MatrixBase<Derived2>& b,
- typename Derived1::Scalar alpha) const {
+ inline typename Derived1::PlainObject
+ Lerp(const Eigen::MatrixBase<Derived1>& a,
+ const Eigen::MatrixBase<Derived2>& b,
+ typename Derived1::Scalar alpha) const {
return (typename Derived1::Scalar(1) - alpha) * a + alpha * b;
}
diff --git a/internal/ceres/autodiff_benchmarks/constant_cost_function.h b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
index 00f39d6..caa0431 100644
--- a/internal/ceres/autodiff_benchmarks/constant_cost_function.h
+++ b/internal/ceres/autodiff_benchmarks/constant_cost_function.h
@@ -40,7 +40,7 @@
struct ConstantCostFunction
: public ceres::SizedCostFunction<1, kParameterBlockSize> {
template <typename T>
- bool operator()(const T* const x, T* residuals) const {
+ inline bool operator()(const T* const x, T* residuals) const {
residuals[0] = T(5);
return true;
}
diff --git a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
index 4ead04a..2f2552f 100644
--- a/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
+++ b/internal/ceres/autodiff_benchmarks/linear_cost_functions.h
@@ -38,7 +38,7 @@
struct Linear1CostFunction {
template <typename T>
- bool operator()(const T* const x, T* residuals) const {
+ inline bool operator()(const T* const x, T* residuals) const {
residuals[0] = x[0] + T(10);
return true;
}
@@ -46,7 +46,7 @@
struct Linear10CostFunction {
template <typename T>
- bool operator()(const T* const x, T* residuals) const {
+ inline bool operator()(const T* const x, T* residuals) const {
for (int i = 0; i < 10; ++i) {
residuals[i] = x[i] + T(i);
}
diff --git a/internal/ceres/autodiff_benchmarks/photometric_error.h b/internal/ceres/autodiff_benchmarks/photometric_error.h
index 9107ca0..8ed278d 100644
--- a/internal/ceres/autodiff_benchmarks/photometric_error.h
+++ b/internal/ceres/autodiff_benchmarks/photometric_error.h
@@ -102,8 +102,8 @@
intrinsics_(intrinsics) {}
template <typename T>
- bool Project(Eigen::Matrix<T, 2, 1>& proj,
- const Eigen::Matrix<T, 3, 1>& p) const {
+ inline bool Project(Eigen::Matrix<T, 2, 1>& proj,
+ const Eigen::Matrix<T, 3, 1>& p) const {
const double& fx = intrinsics_[0];
const double& fy = intrinsics_[1];
const double& cx = intrinsics_[2];
@@ -136,10 +136,10 @@
}
template <typename T>
- bool operator()(const T* const pose_host_ptr,
- const T* const pose_target_ptr,
- const T* const idist_ptr,
- T* residuals_ptr) const {
+ inline bool operator()(const T* const pose_host_ptr,
+ const T* const pose_target_ptr,
+ const T* const idist_ptr,
+ T* residuals_ptr) const {
Eigen::Map<const Eigen::Quaternion<T>> q_w_h(pose_host_ptr);
Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_h(pose_host_ptr + 4);
Eigen::Map<const Eigen::Quaternion<T>> q_w_t(pose_target_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/relative_pose_error.h b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
index 93b28c7..b5c1a93 100644
--- a/internal/ceres/autodiff_benchmarks/relative_pose_error.h
+++ b/internal/ceres/autodiff_benchmarks/relative_pose_error.h
@@ -48,9 +48,9 @@
: meas_q_i_j_(q_i_j), meas_t_i_j_(t_i_j) {}
template <typename T>
- bool operator()(const T* const pose_i_ptr,
- const T* const pose_j_ptr,
- T* residuals_ptr) const {
+ inline bool operator()(const T* const pose_i_ptr,
+ const T* const pose_j_ptr,
+ T* residuals_ptr) const {
Eigen::Map<const Eigen::Quaternion<T>> q_w_i(pose_i_ptr);
Eigen::Map<const Eigen::Matrix<T, 3, 1>> t_w_i(pose_i_ptr + 4);
Eigen::Map<const Eigen::Quaternion<T>> q_w_j(pose_j_ptr);
diff --git a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
index 8da8bea..795342f 100644
--- a/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
+++ b/internal/ceres/autodiff_benchmarks/snavely_reprojection_error.h
@@ -42,9 +42,9 @@
SnavelyReprojectionError() = default;
template <typename T>
- bool operator()(const T* const camera,
- const T* const point,
- T* residuals) const {
+ inline bool operator()(const T* const camera,
+ const T* const point,
+ T* residuals) const {
T ox = T(observed_x);
T oy = T(observed_y);