Speed up InvertPSDMatrix
For matrices of size <= 4, Eigen implements an optimized matrix
inverse, which is orders of magnitude faster than computing the
Cholesky factorization and inverting it. This can have a significant
impact on the performance of the SchurEliminator.
This change implements this optimization, adds tests and benchmarks.
Before this change on my MacBook Pro
-----------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------
BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<2> 102 ns 101 ns 6504851
BenchmarkFixedSizedInvertPSDMatrix<3> 164 ns 164 ns 4297669
BenchmarkFixedSizedInvertPSDMatrix<4> 200 ns 200 ns 3933623
BenchmarkFixedSizedInvertPSDMatrix<5> 353 ns 353 ns 1930454
BenchmarkFixedSizedInvertPSDMatrix<6> 428 ns 427 ns 1629074
BenchmarkFixedSizedInvertPSDMatrix<7> 559 ns 558 ns 1211639
BenchmarkFixedSizedInvertPSDMatrix<8> 527 ns 527 ns 1000000
BenchmarkFixedSizedInvertPSDMatrix<9> 873 ns 873 ns 902713
BenchmarkFixedSizedInvertPSDMatrix<10> 892 ns 892 ns 787410
BenchmarkFixedSizedInvertPSDMatrix<11> 1201 ns 1201 ns 564334
BenchmarkFixedSizedInvertPSDMatrix<12> 1081 ns 1080 ns 588359
BenchmarkDynamicallyInvertPSDMatrix/1 322 ns 322 ns 2244892
BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 362 ns 1869693
BenchmarkDynamicallyInvertPSDMatrix/3 455 ns 455 ns 1604092
BenchmarkDynamicallyInvertPSDMatrix/4 443 ns 443 ns 1578062
BenchmarkDynamicallyInvertPSDMatrix/5 648 ns 647 ns 963232
BenchmarkDynamicallyInvertPSDMatrix/6 756 ns 755 ns 899766
BenchmarkDynamicallyInvertPSDMatrix/7 906 ns 905 ns 740506
BenchmarkDynamicallyInvertPSDMatrix/8 885 ns 885 ns 790657
BenchmarkDynamicallyInvertPSDMatrix/9 1219 ns 1219 ns 600503
BenchmarkDynamicallyInvertPSDMatrix/10 1267 ns 1266 ns 534555
BenchmarkDynamicallyInvertPSDMatrix/11 1580 ns 1579 ns 469591
BenchmarkDynamicallyInvertPSDMatrix/12 1366 ns 1365 ns 514513
after this change:
-----------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------
BenchmarkFixedSizedInvertPSDMatrix<1> 0 ns 0 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<2> 1 ns 1 ns 1000000000
BenchmarkFixedSizedInvertPSDMatrix<3> 1 ns 1 ns 514399512
BenchmarkFixedSizedInvertPSDMatrix<4> 2 ns 2 ns 320587683
BenchmarkFixedSizedInvertPSDMatrix<5> 372 ns 372 ns 1856986
BenchmarkFixedSizedInvertPSDMatrix<6> 446 ns 446 ns 1552502
BenchmarkFixedSizedInvertPSDMatrix<7> 571 ns 570 ns 1208021
BenchmarkFixedSizedInvertPSDMatrix<8> 586 ns 584 ns 1090988
BenchmarkFixedSizedInvertPSDMatrix<9> 1003 ns 1001 ns 753279
BenchmarkFixedSizedInvertPSDMatrix<10> 1074 ns 1070 ns 689974
BenchmarkFixedSizedInvertPSDMatrix<11> 1361 ns 1351 ns 545388
BenchmarkFixedSizedInvertPSDMatrix<12> 1160 ns 1158 ns 615742
BenchmarkDynamicallyInvertPSDMatrix/1 326 ns 326 ns 2206552
BenchmarkDynamicallyInvertPSDMatrix/2 362 ns 361 ns 1820982
BenchmarkDynamicallyInvertPSDMatrix/3 432 ns 431 ns 1696361
BenchmarkDynamicallyInvertPSDMatrix/4 473 ns 472 ns 1294115
BenchmarkDynamicallyInvertPSDMatrix/5 657 ns 656 ns 917888
BenchmarkDynamicallyInvertPSDMatrix/6 804 ns 802 ns 884050
BenchmarkDynamicallyInvertPSDMatrix/7 936 ns 935 ns 679565
BenchmarkDynamicallyInvertPSDMatrix/8 915 ns 915 ns 695548
BenchmarkDynamicallyInvertPSDMatrix/9 1299 ns 1293 ns 583256
BenchmarkDynamicallyInvertPSDMatrix/10 1300 ns 1296 ns 562959
BenchmarkDynamicallyInvertPSDMatrix/11 1617 ns 1610 ns 480393
BenchmarkDynamicallyInvertPSDMatrix/12 1380 ns 1379 ns 503934
Change-Id: Id0d3bbe6d610ee7a3004bb8e2b657b90307b9805
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 3670aa4..22b5d07 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -511,4 +511,7 @@
add_executable(small_blas_gemm_benchmark small_blas_gemm_benchmark.cc)
add_dependencies_to_benchmark(small_blas_gemm_benchmark)
+
+ add_executable(invert_psd_matrix_benchmark invert_psd_matrix_benchmark.cc)
+ add_dependencies_to_benchmark(invert_psd_matrix_benchmark)
endif (BUILD_BENCHMARKS)
diff --git a/internal/ceres/invert_psd_matrix.h b/internal/ceres/invert_psd_matrix.h
index 2319fea..60f6a57 100644
--- a/internal/ceres/invert_psd_matrix.h
+++ b/internal/ceres/invert_psd_matrix.h
@@ -51,16 +51,22 @@
typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix(
const bool assume_full_rank,
const typename EigenTypes<kSize, kSize>::Matrix& m) {
+ using MType = typename EigenTypes<kSize, kSize>::Matrix;
const int size = m.rows();
- // If the matrix can be assumed to be full rank, then just use the
- // Cholesky factorization to invert it.
+ // If the matrix can be assumed to be full rank, then if its small
+ // (< 5) and fixed size, use Eigen's optimized inverse()
+ // implementation.
+ //
+ // https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html#title3
if (assume_full_rank) {
- return m.template selfadjointView<Eigen::Upper>().llt().solve(
- Matrix::Identity(size, size));
+ if (kSize > 0 && kSize < 5) {
+ return m.inverse();
+ }
+ return m.llt().solve(MType::Identity(size, size));
}
- Eigen::JacobiSVD<Matrix> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
+ Eigen::JacobiSVD<MType> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
const double tolerance =
std::numeric_limits<double>::epsilon() * size * svd.singularValues()(0);
diff --git a/internal/ceres/invert_psd_matrix_benchmark.cc b/internal/ceres/invert_psd_matrix_benchmark.cc
new file mode 100644
index 0000000..5aab6b6
--- /dev/null
+++ b/internal/ceres/invert_psd_matrix_benchmark.cc
@@ -0,0 +1,90 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2019 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materils provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "Eigen/Dense"
+#include "benchmark/benchmark.h"
+#include "ceres/invert_psd_matrix.h"
+
+namespace ceres {
+namespace internal {
+
+template <int kSize>
+void BenchmarkFixedSizedInvertPSDMatrix(benchmark::State& state) {
+ using MatrixType = typename EigenTypes<kSize, kSize>::Matrix;
+ MatrixType input = MatrixType::Random();
+ input += input.transpose() + MatrixType::Identity();
+
+ MatrixType output;
+ constexpr bool kAssumeFullRank = true;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(
+ output = InvertPSDMatrix<kSize>(kAssumeFullRank, input));
+ }
+}
+
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 1);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 2);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 3);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 4);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 5);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 6);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 7);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 8);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 9);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 10);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 11);
+BENCHMARK_TEMPLATE(BenchmarkFixedSizedInvertPSDMatrix, 12);
+
+
+void BenchmarkDynamicallyInvertPSDMatrix(benchmark::State& state) {
+ using MatrixType = typename EigenTypes<Eigen::Dynamic, Eigen::Dynamic>::Matrix;
+ const int size = state.range(0);
+ MatrixType input = MatrixType::Random(size, size);
+ input += input.transpose() + MatrixType::Identity(size, size);
+
+ MatrixType output;
+ constexpr bool kAssumeFullRank = true;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(
+ output = InvertPSDMatrix<Eigen::Dynamic>(kAssumeFullRank, input));
+ }
+}
+
+BENCHMARK(BenchmarkDynamicallyInvertPSDMatrix)
+ ->Apply([](benchmark::internal::Benchmark* benchmark) {
+ for (int i = 1; i < 13; ++i) {
+ benchmark->Args({i});
+ }
+ });
+
+} // namespace internal
+} // namespace ceres
+
+BENCHMARK_MAIN();
diff --git a/internal/ceres/invert_psd_matrix_test.cc b/internal/ceres/invert_psd_matrix_test.cc
index 5da9c11..0078e21 100644
--- a/internal/ceres/invert_psd_matrix_test.cc
+++ b/internal/ceres/invert_psd_matrix_test.cc
@@ -42,7 +42,7 @@
template <int kSize>
typename EigenTypes<kSize, kSize>::Matrix RandomPSDMatrixWithEigenValues(
const typename EigenTypes<kSize>::Vector& eigenvalues) {
- typename EigenTypes<kSize, kSize>::Matrix m;
+ typename EigenTypes<kSize, kSize>::Matrix m(eigenvalues.rows(), eigenvalues.rows());
m.setRandom();
Eigen::SelfAdjointEigenSolver<typename EigenTypes<kSize, kSize>::Matrix> es(
m);
@@ -65,7 +65,7 @@
const Matrix m = RandomPSDMatrixWithEigenValues<5>(eigenvalues);
const Matrix inverse_m = InvertPSDMatrix<5>(kFullRank, m);
EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0, 0.0,
- std::numeric_limits<double>::epsilon());
+ 10 * std::numeric_limits<double>::epsilon());
}
TEST(InvertPSDMatrix, RankDeficient5x5) {
@@ -82,5 +82,29 @@
10 * std::numeric_limits<double>::epsilon());
}
+TEST(InvertPSDMatrix, DynamicFullRank5x5) {
+ EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+ eigenvalues.setRandom();
+ eigenvalues = eigenvalues.array().abs().matrix();
+ const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+ const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kFullRank, m);
+ EXPECT_NEAR((m * inverse_m - Matrix::Identity(5,5)).norm() / 5.0, 0.0,
+ 10 * std::numeric_limits<double>::epsilon());
+}
+
+TEST(InvertPSDMatrix, DynamicRankDeficient5x5) {
+ EigenTypes<Eigen::Dynamic>::Vector eigenvalues(5);
+ eigenvalues.setRandom();
+ eigenvalues = eigenvalues.array().abs().matrix();
+ eigenvalues(3) = 0.0;
+ const Matrix m = RandomPSDMatrixWithEigenValues<Eigen::Dynamic>(eigenvalues);
+ const Matrix inverse_m = InvertPSDMatrix<Eigen::Dynamic>(kRankDeficient, m);
+ Matrix pseudo_identity = Matrix::Identity(5, 5);
+ pseudo_identity(3, 3) = 0.0;
+ EXPECT_NEAR((m * inverse_m * m - m).norm() / m.norm(),
+ 0.0,
+ 10 * std::numeric_limits<double>::epsilon());
+}
+
} // namespace internal
} // namespace ceres