Allow using Eigen's LDLT factorization instead of LLT factorization

It seems that Eigen's LLT factorization is broken on ARM.
This patch enables the use of LDLT factorization instead of LLT
factorization. The switch is controlled at compile time using a
preprocessor define - CERES_USE_EIGEN_LDLT.

By default we continue to use LLT factorization though.

To make the switching easier without introducing the Cholesky factorization
based inversion and linear system solve routines have been abstracted into
two new functions.

Android.mk has been updated to enable the LDLT factorization, but
the cmake file has not been updated as I will leave it to Alex's
capable hands to do proper detection of ARM as a target platform.

Change-Id: Iffe3abd2ce894de2a388b454df3da909b482d5e5
diff --git a/cmake/config.h.in b/cmake/config.h.in
index aea7921..41d629e 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -44,6 +44,10 @@
 // If defined, use the LGPL code in Eigen.
 @CERES_USE_EIGEN_SPARSE@
 
+// If defined, use Eigen's LDLT factorization instead of LLT
+// factorization.
+@CERES_USE_LDLT_FOR_EIGEN_CHOLESKY@
+
 // If defined, Ceres was compiled without LAPACK.
 @CERES_NO_LAPACK@
 
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index f754381..962d231 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -61,6 +61,7 @@
     dogleg_strategy.cc
     dynamic_compressed_row_jacobian_writer.cc
     dynamic_compressed_row_sparse_matrix.cc
+    eigen_dense_cholesky.cc
     evaluator.cc
     file.cc
     gradient_checking_cost_function.cc
diff --git a/internal/ceres/block_random_access_diagonal_matrix.cc b/internal/ceres/block_random_access_diagonal_matrix.cc
index 052690d..afa10de 100644
--- a/internal/ceres/block_random_access_diagonal_matrix.cc
+++ b/internal/ceres/block_random_access_diagonal_matrix.cc
@@ -34,7 +34,7 @@
 #include <set>
 #include <utility>
 #include <vector>
-#include "Eigen/Dense"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/stl_util.h"
@@ -125,12 +125,7 @@
   double* values = tsm_->mutable_values();
   for (int i = 0; i < blocks_.size(); ++i) {
     const int block_size = blocks_[i];
-    MatrixRef block(values, block_size, block_size);
-    block =
-        block
-        .selfadjointView<Eigen::Upper>()
-        .llt()
-        .solve(Matrix::Identity(block_size, block_size));
+    InvertUpperTriangularUsingCholesky(block_size, values, values);
     values += block_size * block_size;
   }
 }
diff --git a/internal/ceres/block_random_access_diagonal_matrix_test.cc b/internal/ceres/block_random_access_diagonal_matrix_test.cc
index 8fa3798..0bf9b79 100644
--- a/internal/ceres/block_random_access_diagonal_matrix_test.cc
+++ b/internal/ceres/block_random_access_diagonal_matrix_test.cc
@@ -32,10 +32,10 @@
 #include <vector>
 
 #include "ceres/block_random_access_diagonal_matrix.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "glog/logging.h"
 #include "gtest/gtest.h"
-#include "Eigen/Cholesky"
 
 namespace ceres {
 namespace internal {
@@ -147,9 +147,10 @@
   const TripletSparseMatrix* tsm = m_->matrix();
   Matrix dense;
   tsm->ToDenseMatrix(&dense);
-  Matrix expected_inverse =
-      dense.llt().solve(Matrix::Identity(dense.rows(), dense.rows()));
-
+  Matrix expected_inverse(dense.rows(), dense.rows());
+  InvertUpperTriangularUsingCholesky(dense.rows(),
+                                     dense.data(),
+                                     expected_inverse.data());
   m_->Invert();
   tsm->ToDenseMatrix(&dense);
 
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc
index b13cf3f..8682ae7 100644
--- a/internal/ceres/dense_normal_cholesky_solver.cc
+++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -32,9 +32,9 @@
 
 #include <cstddef>
 
-#include "Eigen/Dense"
 #include "ceres/blas.h"
 #include "ceres/dense_sparse_matrix.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/lapack.h"
@@ -95,11 +95,8 @@
 
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
-  summary.termination_type = LINEAR_SOLVER_SUCCESS;
-  Eigen::LLT<Matrix, Eigen::Upper> llt =
-      lhs.selfadjointView<Eigen::Upper>().llt();
-
-  if (llt.info() != Eigen::Success) {
+  if (SolveUpperTriangularUsingCholesky(num_cols, lhs.data(), rhs.data(), x)
+      != Eigen::Success) {
     summary.termination_type = LINEAR_SOLVER_FAILURE;
     summary.message = "Eigen LLT decomposition failed.";
   } else {
@@ -107,7 +104,6 @@
     summary.message = "Success.";
   }
 
-  VectorRef(x, num_cols) = llt.solve(rhs);
   event_logger.AddEvent("Solve");
   return summary;
 }
diff --git a/internal/ceres/eigen_dense_cholesky.cc b/internal/ceres/eigen_dense_cholesky.cc
new file mode 100644
index 0000000..1b7f87f
--- /dev/null
+++ b/internal/ceres/eigen_dense_cholesky.cc
@@ -0,0 +1,62 @@
+#include "ceres/eigen_dense_cholesky.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/port.h"
+#include "Eigen/Dense"
+
+namespace ceres {
+namespace internal {
+
+using Eigen::Upper;
+
+Eigen::ComputationInfo
+InvertUpperTriangularUsingCholesky(const int size,
+                                   const double* values,
+                                   double* inverse_values) {
+  ConstMatrixRef m(values, size, size);
+  MatrixRef inverse(inverse_values, size, size);
+
+  // On ARM we have experienced significant numerical problems with
+  // Eigen's LLT implementation. Defining
+  // CERES_USE_LDLT_FOR_EIGEN_CHOLESKY switches to using the slightly
+  // more expensive but much more numerically well behaved LDLT
+  // factorization algorithm.
+
+#ifdef CERES_USE_LDLT_FOR_EIGEN_CHOLESKY
+  Eigen::LDLT<Matrix, Upper> cholesky = m.selfadjointView<Upper>().ldlt();
+#else
+  Eigen::LLT<Matrix, Upper> cholesky = m.selfadjointView<Upper>().llt();
+#endif
+
+  inverse = cholesky.solve(Matrix::Identity(size, size));
+  return cholesky.info();
+}
+
+Eigen::ComputationInfo
+SolveUpperTriangularUsingCholesky(int size,
+                                  const double* lhs_values,
+                                  const double* rhs_values,
+                                  double* solution) {
+  ConstMatrixRef lhs(lhs_values, size, size);
+
+  // On ARM we have experienced significant numerical problems with
+  // Eigen's LLT implementation. Defining
+  // CERES_USE_LDLT_FOR_EIGEN_CHOLESKY switches to using the slightly
+  // more expensive but much more numerically well behaved LDLT
+  // factorization algorithm.
+
+#ifdef CERES_USE_LDLT_FOR_EIGEN_CHOLESKY
+  Eigen::LDLT<Matrix, Upper> cholesky = lhs.selfadjointView<Upper>().ldlt();
+#else
+  Eigen::LLT<Matrix, Upper> cholesky = lhs.selfadjointView<Upper>().llt();
+#endif
+
+  if (cholesky.info() == Eigen::Success) {
+    ConstVectorRef rhs(rhs_values, size);
+    VectorRef(solution, size) = cholesky.solve(rhs);
+  }
+
+  return cholesky.info();
+}
+
+}  // namespace internal
+}  // namespace ceres
diff --git a/internal/ceres/eigen_dense_cholesky.h b/internal/ceres/eigen_dense_cholesky.h
new file mode 100644
index 0000000..79a091e
--- /dev/null
+++ b/internal/ceres/eigen_dense_cholesky.h
@@ -0,0 +1,67 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2015 Google Inc. All rights reserved.
+// http://ceres-solver.org/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// Wrappers around Eigen's dense Cholesky factorization
+// routines. Normally, if CERES_USE_LDLT_FOR_EIGEN_CHOLESKY is not
+// defined Eigen's LLT factorization is used, which is faster than the
+// LDLT factorization, however on ARM the LLT factorization seems to
+// give significantly worse results than LDLT and we are forced to use
+// LDLT instead.
+//
+// These wrapper functions provide a level of indirection to deal with
+// this switching and hide it from the rest of the code base.
+
+#ifndef CERES_INTERNAL_ARRAY_UTILS_H_
+#define CERES_INTERNAL_ARRAY_UTILS_H_
+
+#include "ceres/internal/eigen.h"
+
+namespace ceres {
+namespace internal {
+
+// Invert a matrix using Eigen's dense Cholesky factorization. values
+// and inverse_values can point to the same array.
+Eigen::ComputationInfo
+InvertUpperTriangularUsingCholesky(int size,
+                                   const double* values,
+                                   double* inverse_values);
+
+// Solve a linear system using Eigen's dense Cholesky
+// factorization. rhs_values and solution can point to the same array.
+Eigen::ComputationInfo
+SolveUpperTriangularUsingCholesky(int size,
+                                  const double* lhs_values,
+                                  const double* rhs_values,
+                                  double* solution);
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_INTERNAL_ARRAY_UTILS_H_
diff --git a/internal/ceres/implicit_schur_complement.cc b/internal/ceres/implicit_schur_complement.cc
index d05f038..b943510 100644
--- a/internal/ceres/implicit_schur_complement.cc
+++ b/internal/ceres/implicit_schur_complement.cc
@@ -30,9 +30,9 @@
 
 #include "ceres/implicit_schur_complement.h"
 
-#include "Eigen/Dense"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_solver.h"
@@ -148,18 +148,16 @@
     const int row_block_pos = block_diagonal_structure->rows[r].block.position;
     const int row_block_size = block_diagonal_structure->rows[r].block.size;
     const Cell& cell = block_diagonal_structure->rows[r].cells[0];
-    MatrixRef m(block_diagonal->mutable_values() + cell.position,
-                row_block_size, row_block_size);
-
+    double* block_values = block_diagonal->mutable_values() + cell.position;
+    MatrixRef m(block_values, row_block_size, row_block_size);
     if (D != NULL) {
       ConstVectorRef d(D + row_block_pos, row_block_size);
       m += d.array().square().matrix().asDiagonal();
     }
 
-    m = m
-        .selfadjointView<Eigen::Upper>()
-        .llt()
-        .solve(Matrix::Identity(row_block_size, row_block_size));
+    InvertUpperTriangularUsingCholesky(row_block_size,
+                                       block_values,
+                                       block_values);
   }
 }
 
diff --git a/internal/ceres/implicit_schur_complement_test.cc b/internal/ceres/implicit_schur_complement_test.cc
index e586ea1..9bec91f 100644
--- a/internal/ceres/implicit_schur_complement_test.cc
+++ b/internal/ceres/implicit_schur_complement_test.cc
@@ -31,10 +31,10 @@
 #include "ceres/implicit_schur_complement.h"
 
 #include <cstddef>
-#include "Eigen/Dense"
 #include "ceres/block_random_access_dense_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
@@ -107,11 +107,16 @@
 
     solution->resize(num_cols_);
     solution->setZero();
-    VectorRef schur_solution(solution->data() + num_cols_ - num_schur_rows,
-                             num_schur_rows);
-    schur_solution = lhs->selfadjointView<Eigen::Upper>().llt().solve(*rhs);
-    eliminator->BackSubstitute(A_.get(), b_.get(), D,
-                               schur_solution.data(), solution->data());
+    double* schur_solution =  solution->data() + num_cols_ - num_schur_rows;
+    SolveUpperTriangularUsingCholesky(num_schur_rows,
+                                      lhs->data(),
+                                      rhs->data(),
+                                      schur_solution);
+    eliminator->BackSubstitute(A_.get(),
+                               b_.get(),
+                               D,
+                               schur_solution,
+                               solution->data());
   }
 
   AssertionResult TestImplicitSchurComplement(double* D) {
@@ -158,8 +163,11 @@
     }
 
     // Reference solution to the f_block.
-    const Vector reference_f_sol =
-        lhs.selfadjointView<Eigen::Upper>().llt().solve(rhs);
+    Vector reference_f_sol(rhs.rows());
+    SolveUpperTriangularUsingCholesky(lhs.rows(),
+                                      lhs.data(),
+                                      rhs.data(),
+                                      reference_f_sol.data());
 
     // Backsubstituted solution from the implicit schur solver using the
     // reference solution to the f_block.
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 2491060..6b1a458 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -43,6 +43,7 @@
 #include "ceres/conjugate_gradients_solver.h"
 #include "ceres/cxsparse.h"
 #include "ceres/detect_structure.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/lapack.h"
@@ -52,7 +53,6 @@
 #include "ceres/triplet_sparse_matrix.h"
 #include "ceres/types.h"
 #include "ceres/wall_time.h"
-#include "Eigen/Dense"
 #include "Eigen/SparseCore"
 
 namespace ceres {
@@ -198,18 +198,13 @@
   summary.num_iterations = 1;
 
   if (options().dense_linear_algebra_library_type == EIGEN) {
-    Eigen::LLT<Matrix, Eigen::Upper> llt =
-        ConstMatrixRef(m->values(), num_rows, num_rows)
-        .selfadjointView<Eigen::Upper>()
-        .llt();
-    if (llt.info() != Eigen::Success) {
+    if (SolveUpperTriangularUsingCholesky(num_rows, m->values(), rhs(), solution)
+        != Eigen::Success) {
       summary.termination_type = LINEAR_SOLVER_FAILURE;
       summary.message =
           "Eigen failure. Unable to perform dense Cholesky factorization.";
       return summary;
     }
-
-    VectorRef(solution, num_rows) = llt.solve(ConstVectorRef(rhs(), num_rows));
   } else {
     VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
     summary.termination_type =
diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h
index 7e06806..e736e4e 100644
--- a/internal/ceres/schur_eliminator_impl.h
+++ b/internal/ceres/schur_eliminator_impl.h
@@ -57,6 +57,7 @@
 #include "ceres/block_random_access_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/fixed_array.h"
 #include "ceres/internal/scoped_ptr.h"
@@ -268,11 +269,9 @@
     // which case its much faster to compute the inverse once and
     // use it to multiply other matrices/vectors instead of doing a
     // Solve call over and over again.
-    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
-        ete
-        .template selfadjointView<Eigen::Upper>()
-        .llt()
-        .solve(Matrix::Identity(e_block_size, e_block_size));
+    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
+        inverse_ete(e_block_size, e_block_size);
+    InvertUpperTriangularUsingCholesky(e_block_size, ete.data(), inverse_ete.data());
 
     // For the current chunk compute and update the rhs of the reduced
     // linear system.
@@ -361,7 +360,7 @@
               ete.data(), 0, 0, e_block_size, e_block_size);
     }
 
-    ete.llt().solveInPlace(y_block);
+    SolveUpperTriangularUsingCholesky(e_block_size, ete.data(), y_ptr, y_ptr);
   }
 }
 
diff --git a/internal/ceres/schur_eliminator_test.cc b/internal/ceres/schur_eliminator_test.cc
index 40bc904..8217e8c 100644
--- a/internal/ceres/schur_eliminator_test.cc
+++ b/internal/ceres/schur_eliminator_test.cc
@@ -35,6 +35,7 @@
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/casts.h"
 #include "ceres/detect_structure.h"
+#include "ceres/eigen_dense_cholesky.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_least_squares_problems.h"
@@ -121,7 +122,10 @@
         .triangularView<Eigen::Upper>() = R - Q.transpose() * P * Q;
     rhs_expected =
         g.tail(schur_size) - Q.transpose() * P * g.head(num_eliminate_cols);
-    sol_expected = H.llt().solve(g);
+    SolveUpperTriangularUsingCholesky(H.rows(),
+                                      H.data(),
+                                      g.data(),
+                                      sol_expected.data());
   }
 
   void EliminateSolveAndCompare(const VectorRef& diagonal,
@@ -157,11 +161,11 @@
     eliminator->Eliminate(A.get(), b.get(), diagonal.data(), &lhs, rhs.data());
 
     MatrixRef lhs_ref(lhs.mutable_values(), lhs.num_rows(), lhs.num_cols());
-    Vector reduced_sol  =
-        lhs_ref
-        .selfadjointView<Eigen::Upper>()
-        .llt()
-        .solve(rhs);
+    Vector reduced_sol(lhs.num_rows());
+    SolveUpperTriangularUsingCholesky(lhs.num_cols(),
+                                      lhs.values(),
+                                      rhs.data(),
+                                      reduced_sol.data());
 
     // Solution to the linear least squares problem.
     Vector sol(num_cols);
diff --git a/jni/Android.mk b/jni/Android.mk
index d49ab4a..2a9d337 100644
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -105,6 +105,12 @@
                 -DCERES_NO_CXSPARSE \
                 -DCERES_STD_UNORDERED_MAP
 
+# On ARM we have experienced significant numerical problems with
+# Eigen's LLT implementation. Defining
+# CERES_USE_LDLT_FOR_EIGEN_CHOLESKY switches to using the slightly
+# more expensive but much more numerically well behaved LDLT
+# factorization algorithm.
+LOCAL_CFLAGS += -DCERES_USE_LDLT_FOR_EIGEN_CHOLESKY
 
 # If the user did not enable threads in CERES_EXTRA_DEFINES, then add
 # CERES_NO_THREADS.
@@ -144,6 +150,7 @@
                    $(CERES_SRC_PATH)/dogleg_strategy.cc \
                    $(CERES_SRC_PATH)/dynamic_compressed_row_jacobian_writer.cc \
                    $(CERES_SRC_PATH)/dynamic_compressed_row_sparse_matrix.cc \
+                   $(CERES_SRC_PATH)/eigen_dense_cholesky.cc \
                    $(CERES_SRC_PATH)/evaluator.cc \
                    $(CERES_SRC_PATH)/file.cc \
                    $(CERES_SRC_PATH)/gradient_checking_cost_function.cc \