Speed up InvertPSDMatrix

For matrices of size <= 4, Eigen implements an optimized matrix
inverse, which is orders of magnitude faster than computing the
Cholesky factorization and inverting it. This can have a significant
impact on the performance of the SchurEliminator.

This change implements this optimization, adds tests and benchmarks.

Before this change on my MacBook Pro

-----------------------------------------------------------------------------------
Benchmark                                            Time           CPU Iterations
-----------------------------------------------------------------------------------
BenchmarkFixedSizedInvertPSDMatrix<1>                0 ns          0 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<2>              102 ns        101 ns    6504851
BenchmarkFixedSizedInvertPSDMatrix<3>     	   164 ns        164 ns    4297669
BenchmarkFixedSizedInvertPSDMatrix<4>              200 ns        200 ns    3933623
BenchmarkFixedSizedInvertPSDMatrix<5>              353 ns        353 ns    1930454
BenchmarkFixedSizedInvertPSDMatrix<6>              428 ns        427 ns    1629074
BenchmarkFixedSizedInvertPSDMatrix<7>              559 ns        558 ns    1211639
BenchmarkFixedSizedInvertPSDMatrix<8>              527 ns        527 ns    1000000
BenchmarkFixedSizedInvertPSDMatrix<9>              873 ns        873 ns     902713
BenchmarkFixedSizedInvertPSDMatrix<10>             892 ns        892 ns     787410
BenchmarkFixedSizedInvertPSDMatrix<11>            1201 ns       1201 ns     564334
BenchmarkFixedSizedInvertPSDMatrix<12>            1081 ns       1080 ns     588359
BenchmarkDynamicallyInvertPSDMatrix/1              322 ns        322 ns    2244892
BenchmarkDynamicallyInvertPSDMatrix/2              362 ns        362 ns    1869693
BenchmarkDynamicallyInvertPSDMatrix/3              455 ns        455 ns    1604092
BenchmarkDynamicallyInvertPSDMatrix/4              443 ns        443 ns    1578062
BenchmarkDynamicallyInvertPSDMatrix/5              648 ns        647 ns     963232
BenchmarkDynamicallyInvertPSDMatrix/6              756 ns        755 ns     899766
BenchmarkDynamicallyInvertPSDMatrix/7              906 ns        905 ns     740506
BenchmarkDynamicallyInvertPSDMatrix/8              885 ns        885 ns     790657
BenchmarkDynamicallyInvertPSDMatrix/9             1219 ns       1219 ns     600503
BenchmarkDynamicallyInvertPSDMatrix/10            1267 ns       1266 ns     534555
BenchmarkDynamicallyInvertPSDMatrix/11            1580 ns       1579 ns     469591
BenchmarkDynamicallyInvertPSDMatrix/12            1366 ns       1365 ns     514513

after this change:

-----------------------------------------------------------------------------------
Benchmark                                            Time           CPU Iterations
-----------------------------------------------------------------------------------
BenchmarkFixedSizedInvertPSDMatrix<1>                0 ns          0 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<2>                1 ns          1 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<3>                1 ns          1 ns  514399512
BenchmarkFixedSizedInvertPSDMatrix<4>                2 ns          2 ns  320587683
BenchmarkFixedSizedInvertPSDMatrix<5>              372 ns        372 ns    1856986
BenchmarkFixedSizedInvertPSDMatrix<6>              446 ns        446 ns    1552502
BenchmarkFixedSizedInvertPSDMatrix<7>              571 ns        570 ns    1208021
BenchmarkFixedSizedInvertPSDMatrix<8>              586 ns        584 ns    1090988
BenchmarkFixedSizedInvertPSDMatrix<9>             1003 ns       1001 ns     753279
BenchmarkFixedSizedInvertPSDMatrix<10>            1074 ns       1070 ns     689974
BenchmarkFixedSizedInvertPSDMatrix<11>            1361 ns       1351 ns     545388
BenchmarkFixedSizedInvertPSDMatrix<12>            1160 ns       1158 ns     615742
BenchmarkDynamicallyInvertPSDMatrix/1              326 ns        326 ns    2206552
BenchmarkDynamicallyInvertPSDMatrix/2              362 ns        361 ns    1820982
BenchmarkDynamicallyInvertPSDMatrix/3              432 ns        431 ns    1696361
BenchmarkDynamicallyInvertPSDMatrix/4              473 ns        472 ns    1294115
BenchmarkDynamicallyInvertPSDMatrix/5              657 ns        656 ns     917888
BenchmarkDynamicallyInvertPSDMatrix/6              804 ns        802 ns     884050
BenchmarkDynamicallyInvertPSDMatrix/7              936 ns        935 ns     679565
BenchmarkDynamicallyInvertPSDMatrix/8              915 ns        915 ns     695548
BenchmarkDynamicallyInvertPSDMatrix/9             1299 ns       1293 ns     583256
BenchmarkDynamicallyInvertPSDMatrix/10            1300 ns       1296 ns     562959
BenchmarkDynamicallyInvertPSDMatrix/11            1617 ns       1610 ns     480393
BenchmarkDynamicallyInvertPSDMatrix/12            1380 ns       1379 ns     503934

Change-Id: Id0d3bbe6d610ee7a3004bb8e2b657b90307b9805
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 3670aa4..22b5d07 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -511,4 +511,7 @@
 
   add_executable(small_blas_gemm_benchmark small_blas_gemm_benchmark.cc)
   add_dependencies_to_benchmark(small_blas_gemm_benchmark)
+
+  add_executable(invert_psd_matrix_benchmark invert_psd_matrix_benchmark.cc)
+  add_dependencies_to_benchmark(invert_psd_matrix_benchmark)
 endif (BUILD_BENCHMARKS)
diff --git a/internal/ceres/invert_psd_matrix.h b/internal/ceres/invert_psd_matrix.h
index 2319fea..60f6a57 100644
--- a/internal/ceres/invert_psd_matrix.h
+++ b/internal/ceres/invert_psd_matrix.h
@@ -51,16 +51,22 @@
 typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix(
     const bool assume_full_rank,
     const typename EigenTypes<kSize, kSize>::Matrix& m) {
+  using MType = typename EigenTypes<kSize, kSize>::Matrix;
   const int size = m.rows();
 
-  // If the matrix can be assumed to be full rank, then just use the
-  // Cholesky factorization to invert it.
+  // If the matrix can be assumed to be full rank, then if its small
+  // (< 5) and fixed size, use Eigen's optimized inverse()
+  // implementation.
+  //
+  // https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html#title3
   if (assume_full_rank) {
-    return m.template selfadjointView<Eigen::Upper>().llt().solve(
-        Matrix::Identity(size, size));
+    if (kSize > 0 && kSize < 5) {
+        return m.inverse();
+    }
+    return m.llt().solve(MType::Identity(size, size));
   }
 
-  Eigen::JacobiSVD<Matrix> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
+  Eigen::JacobiSVD<MType> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
   const double tolerance =
       std::numeric_limits<double>::epsilon() * size * svd.singularValues()(0);
 
diff --git a/internal/ceres/invert_psd_matrix_benchmark.cc b/internal/ceres/invert_psd_matrix_benchmark.cc
new file mode 100644
index 0000000..5aab6b6
--- /dev/null
+++ b/internal/ceres/invert_psd_matrix_benchmark.cc
@@ -0,0 +1,90 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2019 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materils provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "Eigen/Dense"
+#include "benchmark/benchmark.h"
+#include "ceres/invert_psd_matrix.h"
+
+namespace ceres {
+namespace internal {
+
+template <int kSize>
+void BenchmarkFixedSizedInvertPSDMatrix(benchmark::State& state) {
+  using MatrixType = typename EigenTypes<kSize, kSize>::Matrix;
+  MatrixType input = MatrixType::Random();
+  input += input.transpose() + MatrixType::Identity();
+
+  MatrixType output;
+  constexpr bool kAssumeFullRank = true;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(
+        output = InvertPSDMatrix<kSize>(kAssumeFullRank, input));
+  }
+}
+
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 1);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 2);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 3);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 4);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 5);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 6);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 7);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 8);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 9);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 10);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 11);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 12);
+
+
+void BenchmarkDynamicallyInvertPSDMatrix(benchmark::State& state) {
+  using MatrixType = typename EigenTypes<Eigen::Dynamic, Eigen::Dynamic>::Matrix;
+  const int size = state.range(0);
+  MatrixType input = MatrixType::Random(size, size);
+  input += input.transpose() + MatrixType::Identity(size, size);
+
+  MatrixType output;
+  constexpr bool kAssumeFullRank = true;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(
+        output = InvertPSDMatrix<Eigen::Dynamic>(kAssumeFullRank, input));
+  }
+}
+
+BENCHMARK(BenchmarkDynamicallyInvertPSDMatrix)
+    ->Apply([](benchmark::internal::Benchmark* benchmark) {
+      for (int i = 1; i < 13; ++i) {
+        benchmark->Args({i});
+      }
+    });
+
+}  // namespace internal
+}  // namespace ceres
+
+BENCHMARK_MAIN();
diff --git a/internal/ceres/invert_psd_matrix_test.cc b/internal/ceres/invert_psd_matrix_test.cc
index 5da9c11..0078e21 100644
--- a/internal/ceres/invert_psd_matrix_test.cc
+++ b/internal/ceres/invert_psd_matrix_test.cc
@@ -42,7 +42,7 @@
 template <int kSize>
 typename EigenTypes<kSize, kSize>::Matrix RandomPSDMatrixWithEigenValues(
     const typename EigenTypes<kSize>::Vector& eigenvalues) {
-  typename EigenTypes<kSize, kSize>::Matrix m;
+  typename EigenTypes<kSize, kSize>::Matrix m(eigenvalues.rows(), eigenvalues.rows());
   m.setRandom();
   Eigen::SelfAdjointEigenSolver<typename EigenTypes<kSize, kSize>::Matrix> es(
       m);
@@ -65,7 +65,7 @@
   const Matrix m = RandomPSDMatrixWithEigenValues<5>(eigenvalues);
   const Matrix inverse_m = InvertPSDMatrix<5>(kFullRank, m);
   EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0,  0.0,
-              std::numeric_limits<double>::epsilon());
+              10 * std::numeric_limits<double>::epsilon());
 }
 
 TEST(InvertPSDMatrix, RankDeficient5x5) {
@@ -82,5 +82,29 @@
               10 * std::numeric_limits<double>::epsilon());
 }
 
+TEST(InvertPSDMatrix, DynamicFullRank5x5) {
+  EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+  eigenvalues.setRandom();
+  eigenvalues = eigenvalues.array().abs().matrix();
+  const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+  const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kFullRank, m);
+  EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0,  0.0,
+              10 * std::numeric_limits<double>::epsilon());
+}
+
+TEST(InvertPSDMatrix, DynamicRankDeficient5x5) {
+  EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+  eigenvalues.setRandom();
+  eigenvalues = eigenvalues.array().abs().matrix();
+  eigenvalues(3) = 0.0;
+  const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+  const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kRankDeficient, m);
+  Matrix pseudo_identity = Matrix::Identity(5, 5);
+  pseudo_identity(3, 3) = 0.0;
+  EXPECT_NEAR((m * inverse_m * m - m).norm() / m.norm(),
+              0.0,
+              10 * std::numeric_limits<double>::epsilon());
+}
+
 }  // namespace internal
 }  // namespace ceres