Speed up InvertPSDMatrix For matrices of size <= 4, Eigen implements an optimized matrix inverse, which is orders of magnitude faster than computing the Cholesky factorization and inverting it. This can have a significant impact on the performance of the SchurEliminator. This change implements this optimization, adds tests and benchmarks. Before this change on my MacBook Pro ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<2> 102 ns 101 ns 6504851 BenchmarkFixedSizedInvertPSDMatrix<3> 164 ns 164 ns 4297669 BenchmarkFixedSizedInvertPSDMatrix<4> 200 ns 200 ns 3933623 BenchmarkFixedSizedInvertPSDMatrix<5> 353 ns 353 ns 1930454 BenchmarkFixedSizedInvertPSDMatrix<6> 428 ns 427 ns 1629074 BenchmarkFixedSizedInvertPSDMatrix<7> 559 ns 558 ns 1211639 BenchmarkFixedSizedInvertPSDMatrix<8> 527 ns 527 ns 1000000 BenchmarkFixedSizedInvertPSDMatrix<9> 873 ns 873 ns 902713 BenchmarkFixedSizedInvertPSDMatrix<10> 892 ns 892 ns 787410 BenchmarkFixedSizedInvertPSDMatrix<11> 1201 ns 1201 ns 564334 BenchmarkFixedSizedInvertPSDMatrix<12> 1081 ns 1080 ns 588359 BenchmarkDynamicallyInvertPSDMatrix/1 322 ns 322 ns 2244892 BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 362 ns 1869693 BenchmarkDynamicallyInvertPSDMatrix/3 455 ns 455 ns 1604092 BenchmarkDynamicallyInvertPSDMatrix/4 443 ns 443 ns 1578062 BenchmarkDynamicallyInvertPSDMatrix/5 648 ns 647 ns 963232 BenchmarkDynamicallyInvertPSDMatrix/6 756 ns 755 ns 899766 BenchmarkDynamicallyInvertPSDMatrix/7 906 ns 905 ns 740506 BenchmarkDynamicallyInvertPSDMatrix/8 885 ns 885 ns 790657 BenchmarkDynamicallyInvertPSDMatrix/9 1219 ns 1219 ns 600503 BenchmarkDynamicallyInvertPSDMatrix/10 1267 ns 1266 ns 534555 BenchmarkDynamicallyInvertPSDMatrix/11 1580 ns 1579 ns 469591 BenchmarkDynamicallyInvertPSDMatrix/12 1366 ns 1365 ns 514513 after this change: ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------- BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<2> 1 ns 1 ns 1000000000 BenchmarkFixedSizedInvertPSDMatrix<3> 1 ns 1 ns 514399512 BenchmarkFixedSizedInvertPSDMatrix<4> 2 ns 2 ns 320587683 BenchmarkFixedSizedInvertPSDMatrix<5> 372 ns 372 ns 1856986 BenchmarkFixedSizedInvertPSDMatrix<6> 446 ns 446 ns 1552502 BenchmarkFixedSizedInvertPSDMatrix<7> 571 ns 570 ns 1208021 BenchmarkFixedSizedInvertPSDMatrix<8> 586 ns 584 ns 1090988 BenchmarkFixedSizedInvertPSDMatrix<9> 1003 ns 1001 ns 753279 BenchmarkFixedSizedInvertPSDMatrix<10> 1074 ns 1070 ns 689974 BenchmarkFixedSizedInvertPSDMatrix<11> 1361 ns 1351 ns 545388 BenchmarkFixedSizedInvertPSDMatrix<12> 1160 ns 1158 ns 615742 BenchmarkDynamicallyInvertPSDMatrix/1 326 ns 326 ns 2206552 BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 361 ns 1820982 BenchmarkDynamicallyInvertPSDMatrix/3 432 ns 431 ns 1696361 BenchmarkDynamicallyInvertPSDMatrix/4 473 ns 472 ns 1294115 BenchmarkDynamicallyInvertPSDMatrix/5 657 ns 656 ns 917888 BenchmarkDynamicallyInvertPSDMatrix/6 804 ns 802 ns 884050 BenchmarkDynamicallyInvertPSDMatrix/7 936 ns 935 ns 679565 BenchmarkDynamicallyInvertPSDMatrix/8 915 ns 915 ns 695548 BenchmarkDynamicallyInvertPSDMatrix/9 1299 ns 1293 ns 583256 BenchmarkDynamicallyInvertPSDMatrix/10 1300 ns 1296 ns 562959 BenchmarkDynamicallyInvertPSDMatrix/11 1617 ns 1610 ns 480393 BenchmarkDynamicallyInvertPSDMatrix/12 1380 ns 1379 ns 503934 Change-Id: Id0d3bbe6d610ee7a3004bb8e2b657b90307b9805

commit: a0cd0854aad4241f5c998195bdf7ca2abb59b493 [log] [tgz]
author: Sameer Agarwal <sameeragarwal@google.com> Thu May 30 17:38:15 2019 -0700
committer: Sameer Agarwal <sameeragarwal@google.com> Thu May 30 18:15:01 2019 -0700
tree: df5b32b0b716046b37661fa6e6cca6663c4dadb8
parent: 7b53262b7fe113847ca186236b7a8e6a61ff415c [diff]
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 3670aa4..22b5d07 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt

@@ -511,4 +511,7 @@
 
   add_executable(small_blas_gemm_benchmark small_blas_gemm_benchmark.cc)
   add_dependencies_to_benchmark(small_blas_gemm_benchmark)
+
+  add_executable(invert_psd_matrix_benchmark invert_psd_matrix_benchmark.cc)
+  add_dependencies_to_benchmark(invert_psd_matrix_benchmark)
 endif (BUILD_BENCHMARKS)

diff --git a/internal/ceres/invert_psd_matrix.h b/internal/ceres/invert_psd_matrix.h
index 2319fea..60f6a57 100644
--- a/internal/ceres/invert_psd_matrix.h
+++ b/internal/ceres/invert_psd_matrix.h

@@ -51,16 +51,22 @@
 typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix(
     const bool assume_full_rank,
     const typename EigenTypes<kSize, kSize>::Matrix& m) {
+  using MType = typename EigenTypes<kSize, kSize>::Matrix;
   const int size = m.rows();
 
-  // If the matrix can be assumed to be full rank, then just use the
-  // Cholesky factorization to invert it.
+  // If the matrix can be assumed to be full rank, then if its small
+  // (< 5) and fixed size, use Eigen's optimized inverse()
+  // implementation.
+  //
+  // https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html#title3
   if (assume_full_rank) {
-    return m.template selfadjointView<Eigen::Upper>().llt().solve(
-        Matrix::Identity(size, size));
+    if (kSize > 0 && kSize < 5) {
+        return m.inverse();
+    }
+    return m.llt().solve(MType::Identity(size, size));
   }
 
-  Eigen::JacobiSVD<Matrix> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
+  Eigen::JacobiSVD<MType> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
   const double tolerance =
       std::numeric_limits<double>::epsilon() * size * svd.singularValues()(0);
 

diff --git a/internal/ceres/invert_psd_matrix_benchmark.cc b/internal/ceres/invert_psd_matrix_benchmark.cc
new file mode 100644
index 0000000..5aab6b6
--- /dev/null
+++ b/internal/ceres/invert_psd_matrix_benchmark.cc

@@ -0,0 +1,90 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2019 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materils provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "Eigen/Dense"
+#include "benchmark/benchmark.h"
+#include "ceres/invert_psd_matrix.h"
+
+namespace ceres {
+namespace internal {
+
+template <int kSize>
+void BenchmarkFixedSizedInvertPSDMatrix(benchmark::State& state) {
+  using MatrixType = typename EigenTypes<kSize, kSize>::Matrix;
+  MatrixType input = MatrixType::Random();
+  input += input.transpose() + MatrixType::Identity();
+
+  MatrixType output;
+  constexpr bool kAssumeFullRank = true;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(
+        output = InvertPSDMatrix<kSize>(kAssumeFullRank, input));
+  }
+}
+
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 1);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 2);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 3);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 4);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 5);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 6);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 7);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 8);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 9);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 10);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 11);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 12);
+
+
+void BenchmarkDynamicallyInvertPSDMatrix(benchmark::State& state) {
+  using MatrixType = typename EigenTypes<Eigen::Dynamic, Eigen::Dynamic>::Matrix;
+  const int size = state.range(0);
+  MatrixType input = MatrixType::Random(size, size);
+  input += input.transpose() + MatrixType::Identity(size, size);
+
+  MatrixType output;
+  constexpr bool kAssumeFullRank = true;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(
+        output = InvertPSDMatrix<Eigen::Dynamic>(kAssumeFullRank, input));
+  }
+}
+
+BENCHMARK(BenchmarkDynamicallyInvertPSDMatrix)
+    ->Apply([](benchmark::internal::Benchmark* benchmark) {
+      for (int i = 1; i < 13; ++i) {
+        benchmark->Args({i});
+      }
+    });
+
+}  // namespace internal
+}  // namespace ceres
+
+BENCHMARK_MAIN();

diff --git a/internal/ceres/invert_psd_matrix_test.cc b/internal/ceres/invert_psd_matrix_test.cc
index 5da9c11..0078e21 100644
--- a/internal/ceres/invert_psd_matrix_test.cc
+++ b/internal/ceres/invert_psd_matrix_test.cc

@@ -42,7 +42,7 @@
 template <int kSize>
 typename EigenTypes<kSize, kSize>::Matrix RandomPSDMatrixWithEigenValues(
     const typename EigenTypes<kSize>::Vector& eigenvalues) {
-  typename EigenTypes<kSize, kSize>::Matrix m;
+  typename EigenTypes<kSize, kSize>::Matrix m(eigenvalues.rows(), eigenvalues.rows());
   m.setRandom();
   Eigen::SelfAdjointEigenSolver<typename EigenTypes<kSize, kSize>::Matrix> es(
       m);
@@ -65,7 +65,7 @@
   const Matrix m = RandomPSDMatrixWithEigenValues<5>(eigenvalues);
   const Matrix inverse_m = InvertPSDMatrix<5>(kFullRank, m);
   EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0,  0.0,
-              std::numeric_limits<double>::epsilon());
+              10 * std::numeric_limits<double>::epsilon());
 }
 
 TEST(InvertPSDMatrix, RankDeficient5x5) {
@@ -82,5 +82,29 @@
               10 * std::numeric_limits<double>::epsilon());
 }
 
+TEST(InvertPSDMatrix, DynamicFullRank5x5) {
+  EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+  eigenvalues.setRandom();
+  eigenvalues = eigenvalues.array().abs().matrix();
+  const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+  const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kFullRank, m);
+  EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0,  0.0,
+              10 * std::numeric_limits<double>::epsilon());
+}
+
+TEST(InvertPSDMatrix, DynamicRankDeficient5x5) {
+  EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+  eigenvalues.setRandom();
+  eigenvalues = eigenvalues.array().abs().matrix();
+  eigenvalues(3) = 0.0;
+  const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+  const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kRankDeficient, m);
+  Matrix pseudo_identity = Matrix::Identity(5, 5);
+  pseudo_identity(3, 3) = 0.0;
+  EXPECT_NEAR((m * inverse_m * m - m).norm() / m.norm(),
+              0.0,
+              10 * std::numeric_limits<double>::epsilon());
+}
+
 }  // namespace internal
 }  // namespace ceres
commit	a0cd0854aad4241f5c998195bdf7ca2abb59b493	[log] [tgz]
author	Sameer Agarwal <sameeragarwal@google.com>	Thu May 30 17:38:15 2019 -0700
committer	Sameer Agarwal <sameeragarwal@google.com>	Thu May 30 18:15:01 2019 -0700
tree	df5b32b0b716046b37661fa6e6cca6663c4dadb8
parent	7b53262b7fe113847ca186236b7a8e6a61ff415c [diff]