Speed up InvertPSDMatrix For matrices of size <= 4, Eigen implements an optimized matrix inverse, which is orders of magnitude faster than computing the Cholesky factorization and inverting it. This can have a significant impact on the performance of the SchurEliminator. This change implements this optimization, adds tests and benchmarks. Before this change on my MacBook Pro ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<2> 102 ns 101 ns 6504851 BenchmarkFixedSizedInvertPSDMatrix<3> 164 ns 164 ns 4297669 BenchmarkFixedSizedInvertPSDMatrix<4> 200 ns 200 ns 3933623 BenchmarkFixedSizedInvertPSDMatrix<5> 353 ns 353 ns 1930454 BenchmarkFixedSizedInvertPSDMatrix<6> 428 ns 427 ns 1629074 BenchmarkFixedSizedInvertPSDMatrix<7> 559 ns 558 ns 1211639 BenchmarkFixedSizedInvertPSDMatrix<8> 527 ns 527 ns 1000000 BenchmarkFixedSizedInvertPSDMatrix<9> 873 ns 873 ns 902713 BenchmarkFixedSizedInvertPSDMatrix<10> 892 ns 892 ns 787410 BenchmarkFixedSizedInvertPSDMatrix<11> 1201 ns 1201 ns 564334 BenchmarkFixedSizedInvertPSDMatrix<12> 1081 ns 1080 ns 588359 BenchmarkDynamicallyInvertPSDMatrix/1 322 ns 322 ns 2244892 BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 362 ns 1869693 BenchmarkDynamicallyInvertPSDMatrix/3 455 ns 455 ns 1604092 BenchmarkDynamicallyInvertPSDMatrix/4 443 ns 443 ns 1578062 BenchmarkDynamicallyInvertPSDMatrix/5 648 ns 647 ns 963232 BenchmarkDynamicallyInvertPSDMatrix/6 756 ns 755 ns 899766 BenchmarkDynamicallyInvertPSDMatrix/7 906 ns 905 ns 740506 BenchmarkDynamicallyInvertPSDMatrix/8 885 ns 885 ns 790657 BenchmarkDynamicallyInvertPSDMatrix/9 1219 ns 1219 ns 600503 BenchmarkDynamicallyInvertPSDMatrix/10 1267 ns 1266 ns 534555 BenchmarkDynamicallyInvertPSDMatrix/11 1580 ns 1579 ns 469591 BenchmarkDynamicallyInvertPSDMatrix/12 1366 ns 1365 ns 514513 after this change: ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<2> 1 ns 1 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<3> 1 ns 1 ns 514399512 BenchmarkFixedSizedInvertPSDMatrix<4> 2 ns 2 ns 320587683 BenchmarkFixedSizedInvertPSDMatrix<5> 372 ns 372 ns 1856986 BenchmarkFixedSizedInvertPSDMatrix<6> 446 ns 446 ns 1552502 BenchmarkFixedSizedInvertPSDMatrix<7> 571 ns 570 ns 1208021 BenchmarkFixedSizedInvertPSDMatrix<8> 586 ns 584 ns 1090988 BenchmarkFixedSizedInvertPSDMatrix<9> 1003 ns 1001 ns 753279 BenchmarkFixedSizedInvertPSDMatrix<10> 1074 ns 1070 ns 689974 BenchmarkFixedSizedInvertPSDMatrix<11> 1361 ns 1351 ns 545388 BenchmarkFixedSizedInvertPSDMatrix<12> 1160 ns 1158 ns 615742 BenchmarkDynamicallyInvertPSDMatrix/1 326 ns 326 ns 2206552 BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 361 ns 1820982 BenchmarkDynamicallyInvertPSDMatrix/3 432 ns 431 ns 1696361 BenchmarkDynamicallyInvertPSDMatrix/4 473 ns 472 ns 1294115 BenchmarkDynamicallyInvertPSDMatrix/5 657 ns 656 ns 917888 BenchmarkDynamicallyInvertPSDMatrix/6 804 ns 802 ns 884050 BenchmarkDynamicallyInvertPSDMatrix/7 936 ns 935 ns 679565 BenchmarkDynamicallyInvertPSDMatrix/8 915 ns 915 ns 695548 BenchmarkDynamicallyInvertPSDMatrix/9 1299 ns 1293 ns 583256 BenchmarkDynamicallyInvertPSDMatrix/10 1300 ns 1296 ns 562959 BenchmarkDynamicallyInvertPSDMatrix/11 1617 ns 1610 ns 480393 BenchmarkDynamicallyInvertPSDMatrix/12 1380 ns 1379 ns 503934 Change-Id: Id0d3bbe6d610ee7a3004bb8e2b657b90307b9805
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 3670aa4..22b5d07 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -511,4 +511,7 @@ add_executable(small_blas_gemm_benchmark small_blas_gemm_benchmark.cc) add_dependencies_to_benchmark(small_blas_gemm_benchmark) + + add_executable(invert_psd_matrix_benchmark invert_psd_matrix_benchmark.cc) + add_dependencies_to_benchmark(invert_psd_matrix_benchmark) endif (BUILD_BENCHMARKS)
diff --git a/internal/ceres/invert_psd_matrix.h b/internal/ceres/invert_psd_matrix.h index 2319fea..60f6a57 100644 --- a/internal/ceres/invert_psd_matrix.h +++ b/internal/ceres/invert_psd_matrix.h
@@ -51,16 +51,22 @@ typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix( const bool assume_full_rank, const typename EigenTypes<kSize, kSize>::Matrix& m) { + using MType = typename EigenTypes<kSize, kSize>::Matrix; const int size = m.rows(); - // If the matrix can be assumed to be full rank, then just use the - // Cholesky factorization to invert it. + // If the matrix can be assumed to be full rank, then if its small + // (< 5) and fixed size, use Eigen's optimized inverse() + // implementation. + // + // https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html#title3 if (assume_full_rank) { - return m.template selfadjointView<Eigen::Upper>().llt().solve( - Matrix::Identity(size, size)); + if (kSize > 0 && kSize < 5) { + return m.inverse(); + } + return m.llt().solve(MType::Identity(size, size)); } - Eigen::JacobiSVD<Matrix> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); + Eigen::JacobiSVD<MType> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); const double tolerance = std::numeric_limits<double>::epsilon() * size * svd.singularValues()(0);
diff --git a/internal/ceres/invert_psd_matrix_benchmark.cc b/internal/ceres/invert_psd_matrix_benchmark.cc new file mode 100644 index 0000000..5aab6b6 --- /dev/null +++ b/internal/ceres/invert_psd_matrix_benchmark.cc
@@ -0,0 +1,90 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2019 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materils provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: sameeragarwal@google.com (Sameer Agarwal) + +#include "Eigen/Dense" +#include "benchmark/benchmark.h" +#include "ceres/invert_psd_matrix.h" + +namespace ceres { +namespace internal { + +template <int kSize> +void BenchmarkFixedSizedInvertPSDMatrix(benchmark::State& state) { + using MatrixType = typename EigenTypes<kSize, kSize>::Matrix; + MatrixType input = MatrixType::Random(); + input += input.transpose() + MatrixType::Identity(); + + MatrixType output; + constexpr bool kAssumeFullRank = true; + for (auto _ : state) { + benchmark::DoNotOptimize( + output = InvertPSDMatrix<kSize>(kAssumeFullRank, input)); + } +} + +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 1); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 2); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 3); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 4); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 5); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 6); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 7); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 8); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 9); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 10); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 11); +BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 12); + + +void BenchmarkDynamicallyInvertPSDMatrix(benchmark::State& state) { + using MatrixType = typename EigenTypes<Eigen::Dynamic, Eigen::Dynamic>::Matrix; + const int size = state.range(0); + MatrixType input = MatrixType::Random(size, size); + input += input.transpose() + MatrixType::Identity(size, size); + + MatrixType output; + constexpr bool kAssumeFullRank = true; + for (auto _ : state) { + benchmark::DoNotOptimize( + output = InvertPSDMatrix<Eigen::Dynamic>(kAssumeFullRank, input)); + } +} + +BENCHMARK(BenchmarkDynamicallyInvertPSDMatrix) + ->Apply([](benchmark::internal::Benchmark* benchmark) { + for (int i = 1; i < 13; ++i) { + benchmark->Args({i}); + } + }); + +} // namespace internal +} // namespace ceres + +BENCHMARK_MAIN();
diff --git a/internal/ceres/invert_psd_matrix_test.cc b/internal/ceres/invert_psd_matrix_test.cc index 5da9c11..0078e21 100644 --- a/internal/ceres/invert_psd_matrix_test.cc +++ b/internal/ceres/invert_psd_matrix_test.cc
@@ -42,7 +42,7 @@ template <int kSize> typename EigenTypes<kSize, kSize>::Matrix RandomPSDMatrixWithEigenValues( const typename EigenTypes<kSize>::Vector& eigenvalues) { - typename EigenTypes<kSize, kSize>::Matrix m; + typename EigenTypes<kSize, kSize>::Matrix m(eigenvalues.rows(), eigenvalues.rows()); m.setRandom(); Eigen::SelfAdjointEigenSolver<typename EigenTypes<kSize, kSize>::Matrix> es( m); @@ -65,7 +65,7 @@ const Matrix m = RandomPSDMatrixWithEigenValues<5>(eigenvalues); const Matrix inverse_m = InvertPSDMatrix<5>(kFullRank, m); EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0, 0.0, - std::numeric_limits<double>::epsilon()); + 10 * std::numeric_limits<double>::epsilon()); } TEST(InvertPSDMatrix, RankDeficient5x5) { @@ -82,5 +82,29 @@ 10 * std::numeric_limits<double>::epsilon()); } +TEST(InvertPSDMatrix, DynamicFullRank5x5) { + EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5); + eigenvalues.setRandom(); + eigenvalues = eigenvalues.array().abs().matrix(); + const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues); + const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kFullRank, m); + EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0, 0.0, + 10 * std::numeric_limits<double>::epsilon()); +} + +TEST(InvertPSDMatrix, DynamicRankDeficient5x5) { + EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5); + eigenvalues.setRandom(); + eigenvalues = eigenvalues.array().abs().matrix(); + eigenvalues(3) = 0.0; + const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues); + const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kRankDeficient, m); + Matrix pseudo_identity = Matrix::Identity(5, 5); + pseudo_identity(3, 3) = 0.0; + EXPECT_NEAR((m * inverse_m * m - m).norm() / m.norm(), + 0.0, + 10 * std::numeric_limits<double>::epsilon()); +} + } // namespace internal } // namespace ceres