Use ArraySelector in Autodiff The class ArraySelector is now used in autodiff to store the parameters and residuals. This reduces overhead of FixedArray for fixed-sized residuals and allows more optimizations due to inlining and unrolling. Change-Id: Ibadc5644e64d672f7a555e250fb1f8da262f9d4f
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h index 72b8e37..0bc41f2 100644 --- a/include/ceres/internal/autodiff.h +++ b/include/ceres/internal/autodiff.h
@@ -144,6 +144,7 @@ #include <array> +#include "ceres/internal/array_selector.h" #include "ceres/internal/eigen.h" #include "ceres/internal/fixed_array.h" #include "ceres/internal/parameter_dims.h" @@ -152,6 +153,17 @@ #include "ceres/types.h" #include "glog/logging.h" +// If the number of parameters exceeds this values, the corresponding jets are +// placed on the heap. This will reduce performance by a factor of 2-5 on +// current compilers. +#ifndef CERES_AUTODIFF_MAX_PARAMETERS_ON_STACK +#define CERES_AUTODIFF_MAX_PARAMETERS_ON_STACK 50 +#endif + +#ifndef CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK +#define CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK 20 +#endif + namespace ceres { namespace internal { @@ -174,9 +186,7 @@ DCHECK(src); DCHECK(dst); for (int j = 0; j < N; ++j) { - dst[j].a = src[j]; - dst[j].v.setZero(); - dst[j].v[Offset + j] = T(1.0); + dst[j] = JetT(src[j], Offset + j); } } @@ -284,38 +294,54 @@ typename T> inline bool AutoDifferentiate(const Functor& functor, T const* const* parameters, - int num_outputs, + int dynamic_num_outputs, T* function_value, T** jacobians) { - DCHECK_GT(num_outputs, 0); - typedef Jet<T, ParameterDims::kNumParameters> JetT; - FixedArray<JetT, (256 * 7) / sizeof(JetT)> x(ParameterDims::kNumParameters + - num_outputs); - using Parameters = typename ParameterDims::Parameters; - // These are the positions of the respective jets in the fixed array x. + if (kNumResiduals != DYNAMIC) { + DCHECK_EQ(kNumResiduals, dynamic_num_outputs); + } + + ArraySelector<JetT, + ParameterDims::kNumParameters, + CERES_AUTODIFF_MAX_PARAMETERS_ON_STACK> + parameters_as_jets(ParameterDims::kNumParameters); + + // Pointers to the beginning of each parameter block std::array<JetT*, ParameterDims::kNumParameterBlocks> unpacked_parameters = - ParameterDims::GetUnpackedParameters(x.data()); - JetT* output = x.data() + ParameterDims::kNumParameters; + ParameterDims::GetUnpackedParameters(parameters_as_jets.data()); + + // If the number of residuals is fixed, we use the template argument as the + // number of outputs. Otherwise we use the num_outputs parameter. Note: The + // ?-operator here is compile-time evaluated, therefore num_outputs is also + // a compile-time constant for functors with fixed residuals. + const int num_outputs = + kNumResiduals == DYNAMIC ? dynamic_num_outputs : kNumResiduals; + DCHECK_GT(num_outputs, 0); + + ArraySelector<JetT, kNumResiduals, CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK> + residuals_as_jets(num_outputs); // Invalidate the output Jets, so that we can detect if the user // did not assign values to all of them. for (int i = 0; i < num_outputs; ++i) { - output[i].a = kImpossibleValue; - output[i].v.setConstant(kImpossibleValue); + residuals_as_jets[i].a = kImpossibleValue; + residuals_as_jets[i].v.setConstant(kImpossibleValue); } - Make1stOrderPerturbations<Parameters>::Apply(parameters, x.data()); + Make1stOrderPerturbations<Parameters>::Apply(parameters, + parameters_as_jets.data()); if (!VariadicEvaluate<ParameterDims>( - functor, unpacked_parameters.data(), output)) { + functor, unpacked_parameters.data(), residuals_as_jets.data())) { return false; } - Take0thOrderPart(num_outputs, output, function_value); - Take1stOrderParts<Parameters>::Apply(num_outputs, output, jacobians); + Take0thOrderPart(num_outputs, residuals_as_jets.data(), function_value); + Take1stOrderParts<Parameters>::Apply( + num_outputs, residuals_as_jets.data(), jacobians); return true; }
diff --git a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc index 89f56df..252cb52 100644 --- a/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc +++ b/internal/ceres/autodiff_benchmarks/autodiff_benchmarks.cc
@@ -78,7 +78,6 @@ } } BENCHMARK(BM_Linear1AutoDiff)->Arg(0)->Arg(1); -; #ifdef WITH_CODE_GENERATION static void BM_Linear10CodeGen(benchmark::State& state) { @@ -98,7 +97,6 @@ } } BENCHMARK(BM_Linear10CodeGen)->Arg(0)->Arg(1); -; #endif static void BM_Linear10AutoDiff(benchmark::State& state) { @@ -121,7 +119,6 @@ } } BENCHMARK(BM_Linear10AutoDiff)->Arg(0)->Arg(1); -; // From the NIST problem collection. struct Rat43CostFunctor { @@ -185,7 +182,6 @@ } } BENCHMARK(BM_SnavelyReprojectionCodeGen)->Arg(0)->Arg(1); -; #endif static void BM_SnavelyReprojectionAutoDiff(benchmark::State& state) { @@ -214,7 +210,6 @@ } BENCHMARK(BM_SnavelyReprojectionAutoDiff)->Arg(0)->Arg(1); -; #ifdef WITH_CODE_GENERATION static void BM_BrdfCodeGen(benchmark::State& state) { @@ -252,7 +247,6 @@ } BENCHMARK(BM_BrdfCodeGen)->Arg(0)->Arg(1); -; #endif static void BM_BrdfAutoDiff(benchmark::State& state) { @@ -292,7 +286,6 @@ } BENCHMARK(BM_BrdfAutoDiff)->Arg(0)->Arg(1); -; } // namespace ceres