Add Nested Dissection based fill reducing ordering

With this change, the user can now choose between Approximate Minimum
Degree and Nested Dissection as a fill reducing algorithm when using
a sparse direct factorization based linear solver like SPARSE_NORMAL_CHOLESKY
or SPARSE_SCHUR.

Currenly only SUITE_SPARSE is supported. It requires that
SuiteSparse be compiled with Metis support enabled.

On most problems AMD is still the better choice, but in some cases
like the grid3D dataset from https://lucacarlone.mit.edu/datasets/
the solution time with AMD is 57s and with NESDIS 38 on my M1 Mac.

On some other problems at Google we have observed speedups of 10x,
there is also a corresponding decrease in the total amount of memory
used.

This patch is based on the original work done by NeroBurner in
https://ceres-solver-review.googlesource.com/c/ceres-solver/+/20580

1. Add a new enum to the public api LinearSolverOrderingType and
   a setting Solver::Options::linear_solver_ordering_type.
2. TrustRegionPreprocessor had some complicated logic which determined
   when linear solvers should reorder their matrices on their own and not
   this has been refactored into a more readable function that lives
   inside reorder_program.h/cc.
3. Plumbing in reorder_program.cc and trust_region_processor.cc to use
   nested dissection.
4. Update bundle_adjuster.cc to use nested dissection.

Change-Id: I388b027934f86c58b4da2b65a4fa5204ea73bf40
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index d03c2ff..87e18e1 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -95,7 +95,7 @@
               "Options are: suite_sparse and cx_sparse.");
 DEFINE_string(dense_linear_algebra_library, "eigen",
               "Options are: eigen, lapack, and cuda");
-DEFINE_string(ordering, "automatic", "Options are: automatic, user.");
+DEFINE_string(ordering, "amd", "Options are: amd, nesdis and user");
 
 DEFINE_bool(use_quaternions, false, "If true, uses quaternions to represent "
             "rotations. If false, angle axis is used.");
@@ -128,7 +128,6 @@
 DEFINE_string(initial_ply, "", "Export the BAL file data as a PLY file.");
 DEFINE_string(final_ply, "", "Export the refined BAL file data as a PLY "
               "file.");
-
 // clang-format on
 
 namespace ceres::examples {
@@ -228,24 +227,30 @@
   // the right ParameterBlock ordering, or by manually specifying a
   // suitable ordering vector and defining
   // Options::num_eliminate_blocks.
-  if (CERES_GET_FLAG(FLAGS_ordering) == "automatic") {
-    return;
+  if (CERES_GET_FLAG(FLAGS_ordering) == "user") {
+    auto* ordering = new ceres::ParameterBlockOrdering;
+
+    // The points come before the cameras.
+    for (int i = 0; i < num_points; ++i) {
+      ordering->AddElementToGroup(points + point_block_size * i, 0);
+    }
+
+    for (int i = 0; i < num_cameras; ++i) {
+      // When using axis-angle, there is a single parameter block for
+      // the entire camera.
+      ordering->AddElementToGroup(cameras + camera_block_size * i, 1);
+    }
+
+    options->linear_solver_ordering.reset(ordering);
+  } else {
+    if (CERES_GET_FLAG(FLAGS_ordering) == "amd") {
+      options->linear_solver_ordering_type = AMD;
+    } else if (CERES_GET_FLAG(FLAGS_ordering) == "nesdis") {
+      options->linear_solver_ordering_type = NESDIS;
+    } else {
+      LOG(FATAL) << "Unknown ordering type: " << CERES_GET_FLAG(FLAGS_ordering);
+    }
   }
-
-  auto* ordering = new ceres::ParameterBlockOrdering;
-
-  // The points come before the cameras.
-  for (int i = 0; i < num_points; ++i) {
-    ordering->AddElementToGroup(points + point_block_size * i, 0);
-  }
-
-  for (int i = 0; i < num_cameras; ++i) {
-    // When using axis-angle, there is a single parameter block for
-    // the entire camera.
-    ordering->AddElementToGroup(cameras + camera_block_size * i, 1);
-  }
-
-  options->linear_solver_ordering.reset(ordering);
 }
 
 void SetMinimizerOptions(Solver::Options* options) {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 026fc1c0..43146b1 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -395,12 +395,33 @@
 #endif
 
     // The order in which variables are eliminated in a linear solver
-    // can have a significant of impact on the efficiency and accuracy
-    // of the method. e.g., when doing sparse Cholesky factorization,
+    // can have a significant impact on the efficiency and accuracy of
+    // the method. e.g., when doing sparse Cholesky factorization,
     // there are matrices for which a good ordering will give a
     // Cholesky factor with O(n) storage, where as a bad ordering will
     // result in an completely dense factor.
     //
+    // Sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
+    // SPARSE_SCHUR use a fill reducing ordering of the columns and
+    // rows of the matrix being factorized before computing the
+    // numeric factorization.
+    //
+    // This enum controls the type of algorithm used to compute
+    // this fill reducing ordering. There is no single algorithm
+    // that works on all matrices, so determining which algorithm
+    // works better is a matter of empirical experimentation.
+    //
+    // Implementation status:
+    //
+    // AMD works for SUITE_SPARSE, CX_SPARSE, EIGEN_SPARSE &
+    // ACCELERATE_SPARSE.
+    //
+    // NESDIS currently works for SUITE_SPARSE when using
+    // SPARSE_NORMAL_CHOLESKY, SPARSE_SCHUR, CGNR + SUBSET,
+    // ITERATIVE_SCHUR + CLUSTER_JACOBI, ITERATIVE_SCHUR +
+    // CLUSTER_TRIDIAGONAL linear solvers.
+    LinearSolverOrderingType linear_solver_ordering_type = AMD;
+
     // Ceres allows the user to provide varying amounts of hints to
     // the solver about the variable elimination ordering to use. This
     // can range from no hints, where the solver is free to decide the
@@ -418,15 +439,47 @@
     // associated with it, that determines its order in the set of
     // groups.
     //
-    // Given such an ordering, Ceres ensures that the parameter blocks in
-    // the lowest numbered group are eliminated first, and then the
-    // parameter blocks in the next lowest numbered group and so on. Within
-    // each group, Ceres is free to order the parameter blocks as it
-    // chooses.
+    // The exact interpretation of this information depends on the
+    // values of linear_solver_ordering_type and
+    // linear_solver_type/preconditioner_type and
+    // sparse_linear_algebra_type.
     //
-    // If nullptr, then all parameter blocks are assumed to be in the
-    // same group and the solver is free to decide the best
-    // ordering.
+    // sparse_linear_algebra_library_type != SUITE_SPARSE
+    // ==================================================
+    //
+    // The value of linear_solver_ordering_type does not matter and:
+    //
+    // a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
+    //    linear_solver_type = CGNR and preconditioner_type = SUBSET
+    //
+    // then the value of linear_solver_ordering is ignored.
+    //
+    // b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
+    //
+    // then if linear_solver_ordering is non-null, then the lowest
+    // number group is used as the first elimination group to compute
+    // the Schur complement. All other groups are ignored.
+    //
+    // sparse_linear_algebra_library_type == SUITE_SPARSE
+    // ==================================================
+    //
+    // linear_solver_ordering_type = AMD
+    // ---------------------------------
+    //
+    // linear_solver_type = SPARSE_NORMAL_CHOLESKY or
+    // linear_solver_type = CGNR and preconditioner_type = SUBSET
+    //
+    // if linear_solver_ordering = nullptr, then Ceres assumes that
+    // all parameter blocks are in the same elimination group and uses
+    // the Approximate Minimum Degree algorithm to compute a fill
+    // reducing ordering.
+    //
+    // If linear_solver_order is not null, then a Constrained
+    // Approximate Minimum Degree (CAMD) ordering used where the
+    // parameter blocks in the lowest numbered group are eliminated
+    // first, and then the parameter blocks in the next lowest
+    // numbered group and so on. Within each group, CAMD free to order
+    // the parameter blocks as it chooses.
     //
     // e.g. Consider the linear system
     //
@@ -442,18 +495,53 @@
     //   {0: y}, {1: x} - eliminate y first.
     //   {0: x, y}      - Solver gets to decide the elimination order.
     //
-    // Thus, to have Ceres determine the ordering automatically using
-    // heuristics, put all the variables in group 0 and to control the
-    // ordering for every variable, create groups 0..N-1, one per
-    // variable, in the desired order.
+    // Thus, to have Ceres determine the ordering automatically, put
+    // all the variables in group 0 and to control the ordering for
+    // every variable, create groups 0..N-1, one per variable, in the
+    // desired order.
+    //
+    // b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
+    //
+    // If linear_solver_ordering is null then an greedy maximum
+    // independent set algorithm is used to compute the first
+    // elimination group, and AMD is applied to the corresponding
+    // Schur complement matrix or a subset thereof used that is used
+    // as a preconditioner.
+    //
+    // If linear_solver_ordering is not null, then CAMD is used to
+    // compute a fill reducing ordering.
+    //
+    // linear_solver_ordering = NESDIS
+    // -------------------------------
+    //
+    // a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
+    //    linear_solver_type = CGNR and preconditioner_type = SUBSET
+    //
+    // then the value of linear_solver_ordering is ignored and a
+    // Nested Dissection algorithm is used to compute a fill reducing
+    // ordering.
+    //
+    // b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
+    //
+    // If linear_solver_ordering is null then an greedy maximum
+    // independent set algorithm is used to compute the first
+    // elimination group, and Nested Dissection is applied to the
+    // corresponding Schur complement matrix or a subset thereof used
+    // that is used as a preconditioner.
+    //
+    // If linear_solver_ordering is not null, the parameter blocks in
+    // the lowest numbered group are eliminated to compute the Schur
+    // complement. All other groups are ignored and Nested Dissection
+    // is applied to the corresponding Schur complement matrix or a
+    // subset thereof used that is used as a preconditioner.
     //
     // Bundle Adjustment
     // -----------------
     //
-    // A particular case of interest is bundle adjustment, where the user
-    // has two options. The default is to not specify an ordering at all,
-    // the solver will see that the user wants to use a Schur type solver
-    // and figure out the right elimination ordering.
+    // A particular case of interest is bundle adjustment, where the
+    // user has two options. The default is not specifying ordering at
+    // all; in this case the solver will see that the user wants to
+    // use a Schur type solver and figure out an elimination ordering.
     //
     // But if the user already knows what parameter blocks are points and
     // what are cameras, they can save preprocessing time by partitioning
@@ -500,6 +588,10 @@
     // Jacobian matrix and generally speaking, there is no performance
     // penalty for doing so.
 
+    // TODO(sameeragarwal): Remove this option. It is too obscure and
+    // there is no clear way of figuring out when this is a useful
+    // thing to do.
+    //
     // In some rare cases, it is worth using a more complicated
     // reordering algorithm which has slightly better runtime
     // performance at the expense of an extra copy of the Jacobian
@@ -946,6 +1038,8 @@
         SPARSE_NORMAL_CHOLESKY;
 #endif
 
+    LinearSolverOrderingType linear_solver_ordering_type;
+
     // Size of the elimination groups given by the user as hints to
     // the linear solver.
     std::vector<int> linear_solver_ordering_given;
diff --git a/include/ceres/types.h b/include/ceres/types.h
index e522423..2fc5abc 100644
--- a/include/ceres/types.h
+++ b/include/ceres/types.h
@@ -183,6 +183,30 @@
   NO_SPARSE
 };
 
+// The order in which variables are eliminated in a linear solver
+// can have a significant of impact on the efficiency and accuracy
+// of the method. e.g., when doing sparse Cholesky factorization,
+// there are matrices for which a good ordering will give a
+// Cholesky factor with O(n) storage, where as a bad ordering will
+// result in an completely dense factor.
+//
+// So sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
+// SPARSE_SCHUR and preconditioners like SUBSET, CLUSTER_JACOBI &
+// CLUSTER_TRIDIAGONAL use a fill reducing ordering of the columns and
+// rows of the matrix being factorized before actually the numeric
+// factorization.
+//
+// This enum controls the class of algorithm used to compute this
+// fill reducing ordering. There is no single algorithm that works
+// on all matrices, so determining which algorithm works better is a
+// matter of empirical experimentation.
+enum LinearSolverOrderingType {
+  // Approximate Minimum Degree.
+  AMD,
+  // Nested Dissection.
+  NESDIS
+};
+
 enum DenseLinearAlgebraLibraryType {
   EIGEN,
   LAPACK,
@@ -467,6 +491,11 @@
 CERES_EXPORT bool StringToSparseLinearAlgebraLibraryType(
     std::string value, SparseLinearAlgebraLibraryType* type);
 
+CERES_EXPORT const char* LinearSolverOrderingTypeToString(
+    LinearSolverOrderingType type);
+CERES_EXPORT bool StringToLinearSolverOrderingType(
+    std::string value, LinearSolverOrderingType* type);
+
 CERES_EXPORT const char* DenseLinearAlgebraLibraryTypeToString(
     DenseLinearAlgebraLibraryType type);
 CERES_EXPORT bool StringToDenseLinearAlgebraLibraryType(
diff --git a/internal/ceres/reorder_program.cc b/internal/ceres/reorder_program.cc
index c6765fe..1f50a39 100644
--- a/internal/ceres/reorder_program.cc
+++ b/internal/ceres/reorder_program.cc
@@ -85,7 +85,6 @@
   return min_parameter_block_position;
 }
 
-#if defined(CERES_USE_EIGEN_SPARSE)
 Eigen::SparseMatrix<int> CreateBlockJacobian(
     const TripletSparseMatrix& block_jacobian_transpose) {
   using SparseMatrix = Eigen::SparseMatrix<int>;
@@ -105,9 +104,9 @@
   block_jacobian.setFromTriplets(triplets.begin(), triplets.end());
   return block_jacobian;
 }
-#endif
 
 void OrderingForSparseNormalCholeskyUsingSuiteSparse(
+    const LinearSolverOrderingType linear_solver_ordering_type,
     const TripletSparseMatrix& tsm_block_jacobian_transpose,
     const vector<ParameterBlock*>& parameter_blocks,
     const ParameterBlockOrdering& parameter_block_ordering,
@@ -120,40 +119,55 @@
   cholmod_sparse* block_jacobian_transpose = ss.CreateSparseMatrix(
       const_cast<TripletSparseMatrix*>(&tsm_block_jacobian_transpose));
 
-  // If the user did not supply a useful ordering, then just use
-  // regular AMD.
-  if (parameter_block_ordering.NumGroups() <= 1) {
-    ss.Ordering(block_jacobian_transpose, OrderingType::AMD, &ordering[0]);
-  } else {
-    vector<int> constraints;
-    for (auto* parameter_block : parameter_blocks) {
-      constraints.push_back(parameter_block_ordering.GroupId(
-          parameter_block->mutable_user_state()));
+  if (linear_solver_ordering_type == ceres::AMD) {
+    if (parameter_block_ordering.NumGroups() <= 1) {
+      // The user did not supply a useful ordering so just go ahead
+      // and use AMD.
+      ss.Ordering(block_jacobian_transpose, OrderingType::AMD, &ordering[0]);
+    } else {
+      // The user supplied an ordering, so use CAMD.
+      vector<int> constraints;
+      constraints.reserve(parameter_blocks.size());
+      for (auto* parameter_block : parameter_blocks) {
+        constraints.push_back(parameter_block_ordering.GroupId(
+            parameter_block->mutable_user_state()));
+      }
+
+      // Renumber the entries of constraints to be contiguous integers
+      // as CAMD requires that the group ids be in the range [0,
+      // parameter_blocks.size() - 1].
+      MapValuesToContiguousRange(constraints.size(), &constraints[0]);
+      ss.ConstrainedApproximateMinimumDegreeOrdering(
+          block_jacobian_transpose, &constraints[0], ordering);
     }
-
-    // Renumber the entries of constraints to be contiguous integers
-    // as CAMD requires that the group ids be in the range [0,
-    // parameter_blocks.size() - 1].
-    MapValuesToContiguousRange(constraints.size(), &constraints[0]);
-    ss.ConstrainedApproximateMinimumDegreeOrdering(
-        block_jacobian_transpose, &constraints[0], ordering);
+  } else if (linear_solver_ordering_type == ceres::NESDIS) {
+    // If nested dissection is chosen as an ordering algorithm, then
+    // ignore any user provided linear_solver_ordering.
+    CHECK(SuiteSparse::IsNestedDissectionAvailable())
+        << "Congratulations, you found a Ceres bug! "
+        << "Please report this error to the developers.";
+    ss.Ordering(block_jacobian_transpose, OrderingType::NESDIS, &ordering[0]);
+  } else {
+    LOG(FATAL) << "Congratulations, you found a Ceres bug! "
+               << "Please report this error to the developers.";
   }
 
-  VLOG(2) << "Block ordering stats: "
-          << " flops: " << ss.mutable_cc()->fl
-          << " lnz  : " << ss.mutable_cc()->lnz
-          << " anz  : " << ss.mutable_cc()->anz;
-
   ss.Free(block_jacobian_transpose);
 #endif  // CERES_NO_SUITESPARSE
 }
 
 void OrderingForSparseNormalCholeskyUsingCXSparse(
-    const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
+    const LinearSolverOrderingType linear_solver_ordering_type,
+    const TripletSparseMatrix& tsm_block_jacobian_transpose,
+    int* ordering) {
 #ifdef CERES_NO_CXSPARSE
   LOG(FATAL) << "Congratulations, you found a Ceres bug! "
              << "Please report this error to the developers.";
 #else
+  CHECK_NE(linear_solver_ordering_type, NESDIS)
+      << "Congratulations, you found a Ceres bug! "
+      << "Please report this error to the developers.";
+
   // CXSparse works with J'J instead of J'. So compute the block
   // sparsity for J'J and compute an approximate minimum degree
   // ordering.
@@ -173,13 +187,18 @@
 }
 
 void OrderingForSparseNormalCholeskyUsingEigenSparse(
-    const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
+    const LinearSolverOrderingType linear_solver_ordering_type,
+    const TripletSparseMatrix& tsm_block_jacobian_transpose,
+    int* ordering) {
 #ifndef CERES_USE_EIGEN_SPARSE
   LOG(FATAL) << "SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE "
                 "because Ceres was not built with support for "
                 "Eigen's SimplicialLDLT decomposition. "
                 "This requires enabling building with -DEIGENSPARSE=ON.";
 #else
+  CHECK_NE(linear_solver_ordering_type, NESDIS)
+      << "Congratulations, you found a Ceres bug! "
+      << "Please report this error to the developers.";
 
   // This conversion from a TripletSparseMatrix to a Eigen::Triplet
   // matrix is unfortunate, but unavoidable for now. It is not a
@@ -326,7 +345,7 @@
 
 // Pre-order the columns corresponding to the Schur complement if
 // possible.
-static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
+static void ReorderSchurComplementColumnsUsingSuiteSparse(
     const ParameterBlockOrdering& parameter_block_ordering, Program* program) {
 #ifndef CERES_NO_SUITESPARSE
   SuiteSparse ss;
@@ -365,7 +384,7 @@
 #endif
 }
 
-static void MaybeReorderSchurComplementColumnsUsingEigen(
+static void ReorderSchurComplementColumnsUsingEigen(
     const int size_of_first_elimination_group,
     const ProblemImpl::ParameterMap& parameter_map,
     Program* program) {
@@ -420,6 +439,7 @@
 bool ReorderProgramForSchurTypeLinearSolver(
     const LinearSolverType linear_solver_type,
     const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    const LinearSolverOrderingType linear_solver_ordering_type,
     const ProblemImpl::ParameterMap& parameter_map,
     ParameterBlockOrdering* parameter_block_ordering,
     Program* program,
@@ -485,12 +505,16 @@
   const int size_of_first_elimination_group =
       parameter_block_ordering->group_to_elements().begin()->second.size();
 
-  if (linear_solver_type == SPARSE_SCHUR) {
+  // Pre-ordering of the columns of the Schur complement only works if
+  // we are using approximate mininmum degree based ordering and
+  // SUITE_SPARSE or EIGEN_SPARSE.
+  if (linear_solver_type == SPARSE_SCHUR &&
+      linear_solver_ordering_type == ceres::AMD) {
     if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
-      MaybeReorderSchurComplementColumnsUsingSuiteSparse(
-          *parameter_block_ordering, program);
+      ReorderSchurComplementColumnsUsingSuiteSparse(*parameter_block_ordering,
+                                                    program);
     } else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
-      MaybeReorderSchurComplementColumnsUsingEigen(
+      ReorderSchurComplementColumnsUsingEigen(
           size_of_first_elimination_group, parameter_map, program);
     }
   }
@@ -503,6 +527,7 @@
 
 bool ReorderProgramForSparseCholesky(
     const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    const LinearSolverOrderingType linear_solver_ordering_type,
     const ParameterBlockOrdering& parameter_block_ordering,
     int start_row_block,
     Program* program,
@@ -526,12 +551,14 @@
 
   if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
     OrderingForSparseNormalCholeskyUsingSuiteSparse(
+        linear_solver_ordering_type,
         *tsm_block_jacobian_transpose,
         parameter_blocks,
         parameter_block_ordering,
         &ordering[0]);
   } else if (sparse_linear_algebra_library_type == CX_SPARSE) {
-    OrderingForSparseNormalCholeskyUsingCXSparse(*tsm_block_jacobian_transpose,
+    OrderingForSparseNormalCholeskyUsingCXSparse(linear_solver_ordering_type,
+                                                 *tsm_block_jacobian_transpose,
                                                  &ordering[0]);
   } else if (sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
     // Accelerate does not provide a function to perform reordering without
@@ -544,7 +571,9 @@
 
   } else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
     OrderingForSparseNormalCholeskyUsingEigenSparse(
-        *tsm_block_jacobian_transpose, &ordering[0]);
+        linear_solver_ordering_type,
+        *tsm_block_jacobian_transpose,
+        &ordering[0]);
   }
 
   // Apply ordering.
@@ -569,4 +598,54 @@
   return it - residual_blocks->begin();
 }
 
+bool AreJacobianColumnsOrdered(
+    const LinearSolverType linear_solver_type,
+    const PreconditionerType preconditioner_type,
+    const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    const LinearSolverOrderingType linear_solver_ordering_type,
+    const bool use_postordering,
+    const bool dynamic_sparsity) {
+  if (use_postordering || dynamic_sparsity) {
+    return false;
+  }
+
+  if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
+    if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
+        (linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
+      return true;
+    }
+    if (linear_solver_type == SPARSE_SCHUR &&
+        linear_solver_ordering_type == ceres::AMD) {
+      return true;
+    }
+  }
+
+  // For all sparse linear algebra libraries other than SuiteSparse,
+  // nested dissection is not used for pre-ordering.
+  if (linear_solver_ordering_type == ceres::NESDIS) {
+    return false;
+  }
+
+  if (sparse_linear_algebra_library_type == ceres::EIGEN_SPARSE) {
+    if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
+        linear_solver_type == SPARSE_SCHUR ||
+        (linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
+      return true;
+    }
+  }
+
+  if (sparse_linear_algebra_library_type == ceres::CX_SPARSE) {
+    if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
+        (linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
+      return true;
+    }
+  }
+
+  if (sparse_linear_algebra_library_type == ceres::ACCELERATE_SPARSE) {
+    return false;
+  }
+
+  return false;
+}
+
 }  // namespace ceres::internal
diff --git a/internal/ceres/reorder_program.h b/internal/ceres/reorder_program.h
index 64f4095..8ae456e 100644
--- a/internal/ceres/reorder_program.h
+++ b/internal/ceres/reorder_program.h
@@ -35,6 +35,7 @@
 
 #include "ceres/internal/disable_warnings.h"
 #include "ceres/internal/export.h"
+#include "ceres/linear_solver.h"
 #include "ceres/parameter_block_ordering.h"
 #include "ceres/problem_impl.h"
 #include "ceres/types.h"
@@ -75,6 +76,7 @@
 CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver(
     LinearSolverType linear_solver_type,
     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    LinearSolverOrderingType linear_solver_ordering_type,
     const ProblemImpl::ParameterMap& parameter_map,
     ParameterBlockOrdering* parameter_block_ordering,
     Program* program,
@@ -92,6 +94,7 @@
 // ordering will take it into account, otherwise it will be ignored.
 CERES_NO_EXPORT bool ReorderProgramForSparseCholesky(
     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    LinearSolverOrderingType linear_solver_ordering_type,
     const ParameterBlockOrdering& parameter_block_ordering,
     int start_row_block,
     Program* program,
@@ -111,6 +114,16 @@
     const std::unordered_set<ResidualBlockId>& bottom_residual_blocks,
     Program* program);
 
+// The return value of this function indicates whether the columns of
+// the Jacobian can be reordered using a fill reducing ordering.
+CERES_NO_EXPORT bool AreJacobianColumnsOrdered(
+    LinearSolverType linear_solver_type,
+    PreconditionerType preconditioner_type,
+    SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
+    LinearSolverOrderingType linear_solver_ordering_type,
+    bool use_postordering,
+    bool dynamic_sparsity);
+
 }  // namespace ceres::internal
 
 #include "ceres/internal/reenable_warnings.h"
diff --git a/internal/ceres/reorder_program_test.cc b/internal/ceres/reorder_program_test.cc
index 0fbab4e..872f381 100644
--- a/internal/ceres/reorder_program_test.cc
+++ b/internal/ceres/reorder_program_test.cc
@@ -185,6 +185,7 @@
 
     std::string error;
     EXPECT_TRUE(ReorderProgramForSparseCholesky(ceres::SUITE_SPARSE,
+                                                ceres::AMD,
                                                 linear_solver_ordering,
                                                 0, /* use all rows */
                                                 program,
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index 150c555..dbaba3e 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -50,6 +50,7 @@
 #include "ceres/schur_templates.h"
 #include "ceres/solver_utils.h"
 #include "ceres/stringprintf.h"
+#include "ceres/suitesparse.h"
 #include "ceres/types.h"
 #include "ceres/wall_time.h"
 
@@ -107,6 +108,11 @@
   return true;
 }
 
+bool IsNestedDissectionAvailable(SparseLinearAlgebraLibraryType type) {
+  return (type == SUITE_SPARSE) &&
+         internal::SuiteSparse::IsNestedDissectionAvailable();
+}
+
 bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
   OPTION_GT(initial_trust_region_radius, 0.0);
   OPTION_GT(min_trust_region_radius, 0.0);
@@ -182,8 +188,10 @@
             name,
             sparse_linear_algebra_library_name);
         return false;
-      } else if (!IsSparseLinearAlgebraLibraryTypeAvailable(
-                     options.sparse_linear_algebra_library_type)) {
+      }
+
+      if (!IsSparseLinearAlgebraLibraryTypeAvailable(
+              options.sparse_linear_algebra_library_type)) {
         *error = StringPrintf(
             "Can't use %s with "
             "Solver::Options::sparse_linear_algebra_library_type = %s, "
@@ -192,6 +200,22 @@
             sparse_linear_algebra_library_name);
         return false;
       }
+
+      if (options.linear_solver_ordering_type == ceres::NESDIS &&
+          !IsNestedDissectionAvailable(
+              options.sparse_linear_algebra_library_type)) {
+        if (options.sparse_linear_algebra_library_type == SUITE_SPARSE) {
+          *error = StringPrintf(
+              "Can't use NESDIS with SUITE_SPARSE because SuiteSparse was "
+              "compiled without support for Metis.");
+        } else {
+          *error = StringPrintf(
+              "Can't use NESDIS with "
+              "Solver::Options::sparse_linear_algebra_library_type = %s.",
+              sparse_linear_algebra_library_name);
+        }
+        return false;
+      }
     }
   }
 
@@ -339,7 +363,7 @@
                                  &(summary->inner_iteration_ordering_given));
 
   // clang-format off
-  summary->dense_linear_algebra_library_type  = options.dense_linear_algebra_library_type;  //  NOLINT
+  summary->dense_linear_algebra_library_type  = options.dense_linear_algebra_library_type;  
   summary->dogleg_type                        = options.dogleg_type;
   summary->inner_iteration_time_in_seconds    = 0.0;
   summary->num_line_search_steps              = 0;
@@ -348,18 +372,19 @@
   summary->line_search_polynomial_minimization_time_in_seconds = 0.0;
   summary->line_search_total_time_in_seconds  = 0.0;
   summary->inner_iterations_given             = options.use_inner_iterations;
-  summary->line_search_direction_type         = options.line_search_direction_type;         //  NOLINT
-  summary->line_search_interpolation_type     = options.line_search_interpolation_type;     //  NOLINT
+  summary->line_search_direction_type         = options.line_search_direction_type;       
+  summary->line_search_interpolation_type     = options.line_search_interpolation_type;   
   summary->line_search_type                   = options.line_search_type;
   summary->linear_solver_type_given           = options.linear_solver_type;
   summary->max_lbfgs_rank                     = options.max_lbfgs_rank;
   summary->minimizer_type                     = options.minimizer_type;
-  summary->nonlinear_conjugate_gradient_type  = options.nonlinear_conjugate_gradient_type;  //  NOLINT
+  summary->nonlinear_conjugate_gradient_type  = options.nonlinear_conjugate_gradient_type; 
   summary->num_threads_given                  = options.num_threads;
   summary->preconditioner_type_given          = options.preconditioner_type;
-  summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; //  NOLINT
-  summary->trust_region_strategy_type         = options.trust_region_strategy_type;         //  NOLINT
-  summary->visibility_clustering_type         = options.visibility_clustering_type;         //  NOLINT
+  summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type;
+  summary->linear_solver_ordering_type        = options.linear_solver_ordering_type;       
+  summary->trust_region_strategy_type         = options.trust_region_strategy_type;        
+  summary->visibility_clustering_type         = options.visibility_clustering_type;
   // clang-format on
 }
 
@@ -367,11 +392,14 @@
                         Solver::Summary* summary) {
   internal::OrderingToGroupSizes(pp.options.linear_solver_ordering.get(),
                                  &(summary->linear_solver_ordering_used));
+  // TODO(sameeragarwal): Update the preprocessor to collapse the
+  // second and higher groups into one group when nested dissection is
+  // used.
   internal::OrderingToGroupSizes(pp.options.inner_iteration_ordering.get(),
                                  &(summary->inner_iteration_ordering_used));
 
   // clang-format off
-  summary->inner_iterations_used          = pp.inner_iteration_minimizer.get() != nullptr;     // NOLINT
+  summary->inner_iterations_used          = pp.inner_iteration_minimizer != nullptr;   
   summary->linear_solver_type_used        = pp.linear_solver_options.type;
   summary->num_threads_used               = pp.options.num_threads;
   summary->preconditioner_type_used       = pp.options.preconditioner_type;
@@ -379,7 +407,7 @@
 
   internal::SetSummaryFinalCost(summary);
 
-  if (pp.reduced_program.get() != nullptr) {
+  if (pp.reduced_program != nullptr) {
     SummarizeReducedProgram(*pp.reduced_program, summary);
   }
 
@@ -389,7 +417,7 @@
   // case if the preprocessor failed, or if the reduced problem did
   // not contain any parameter blocks. Thus, only extract the
   // evaluator statistics if one exists.
-  if (pp.evaluator.get() != nullptr) {
+  if (pp.evaluator != nullptr) {
     const map<string, CallStatistics>& evaluator_statistics =
         pp.evaluator->Statistics();
     {
@@ -411,7 +439,7 @@
   // Again, like the evaluator, there may or may not be a linear
   // solver from which we can extract run time statistics. In
   // particular the line search solver does not use a linear solver.
-  if (pp.linear_solver.get() != nullptr) {
+  if (pp.linear_solver != nullptr) {
     const map<string, CallStatistics>& linear_solver_statistics =
         pp.linear_solver->Statistics();
     const CallStatistics& call_stats = FindWithDefault(
@@ -666,17 +694,6 @@
                         dense_linear_algebra_library_type));
     }
 
-    if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
-        linear_solver_type_used == SPARSE_SCHUR ||
-        (linear_solver_type_used == ITERATIVE_SCHUR &&
-         (preconditioner_type_used == CLUSTER_JACOBI ||
-          preconditioner_type_used == CLUSTER_TRIDIAGONAL))) {
-      StringAppendF(&report,
-                    "\nSparse linear algebra library %15s\n",
-                    SparseLinearAlgebraLibraryTypeToString(
-                        sparse_linear_algebra_library_type));
-    }
-
     StringAppendF(&report,
                   "Trust region strategy     %19s",
                   TrustRegionStrategyTypeToString(trust_region_strategy_type));
@@ -687,9 +704,23 @@
         StringAppendF(&report, " (SUBSPACE)");
       }
     }
-    StringAppendF(&report, "\n");
-    StringAppendF(&report, "\n");
 
+    if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
+        linear_solver_type_used == SPARSE_SCHUR ||
+        (linear_solver_type_used == CGNR &&
+         preconditioner_type_used == SUBSET) ||
+        (linear_solver_type_used == ITERATIVE_SCHUR &&
+         (preconditioner_type_used == CLUSTER_JACOBI ||
+          preconditioner_type_used == CLUSTER_TRIDIAGONAL))) {
+      StringAppendF(
+          &report,
+          "\nSparse linear algebra library %15s + %s\n",
+          SparseLinearAlgebraLibraryTypeToString(
+              sparse_linear_algebra_library_type),
+          LinearSolverOrderingTypeToString(linear_solver_ordering_type));
+    }
+
+    StringAppendF(&report, "\n");
     StringAppendF(&report, "%45s    %21s\n", "Given", "Used");
     StringAppendF(&report,
                   "Linear solver       %25s%25s\n",
diff --git a/internal/ceres/trust_region_preprocessor.cc b/internal/ceres/trust_region_preprocessor.cc
index 9467b5a..ed62aa9 100644
--- a/internal/ceres/trust_region_preprocessor.cc
+++ b/internal/ceres/trust_region_preprocessor.cc
@@ -113,6 +113,7 @@
     return ReorderProgramForSchurTypeLinearSolver(
         options.linear_solver_type,
         options.sparse_linear_algebra_library_type,
+        options.linear_solver_ordering_type,
         pp->problem->parameter_map(),
         options.linear_solver_ordering.get(),
         pp->reduced_program.get(),
@@ -123,6 +124,7 @@
       !options.dynamic_sparsity) {
     return ReorderProgramForSparseCholesky(
         options.sparse_linear_algebra_library_type,
+        options.linear_solver_ordering_type,
         *options.linear_solver_ordering,
         0, /* use all the rows of the jacobian */
         pp->reduced_program.get(),
@@ -138,6 +140,7 @@
 
     return ReorderProgramForSparseCholesky(
         options.sparse_linear_algebra_library_type,
+        options.linear_solver_ordering_type,
         *options.linear_solver_ordering,
         pp->linear_solver_options.subset_preconditioner_start_row_block,
         pp->reduced_program.get(),
@@ -200,6 +203,7 @@
       options.visibility_clustering_type;
   pp->linear_solver_options.sparse_linear_algebra_library_type =
       options.sparse_linear_algebra_library_type;
+
   pp->linear_solver_options.dense_linear_algebra_library_type =
       options.dense_linear_algebra_library_type;
   pp->linear_solver_options.use_explicit_schur_complement =
@@ -210,10 +214,6 @@
   pp->linear_solver_options.max_num_refinement_iterations =
       options.max_num_refinement_iterations;
   pp->linear_solver_options.num_threads = options.num_threads;
-  pp->linear_solver_options.ordering_type = OrderingType::NATURAL;
-  if (options.use_postordering) {
-    pp->linear_solver_options.ordering_type = OrderingType::AMD;
-  }
   pp->linear_solver_options.context = pp->problem->context();
 
   if (IsSchurType(pp->linear_solver_options.type)) {
@@ -227,20 +227,24 @@
     if (pp->linear_solver_options.elimination_groups.size() == 1) {
       pp->linear_solver_options.elimination_groups.push_back(0);
     }
+  }
 
-    if (options.linear_solver_type == SPARSE_SCHUR) {
-      // When using SPARSE_SCHUR, we ignore the user's postordering
-      // preferences if CX_SPARSE is the sparse linear algebra
-      // backend.
-      //
-      // This ensures that the linear solver does not assume that a
-      // fill-reducing pre-ordering has been done.
-      //
-      // TODO(sameeragarwal): Implement the reordering of parameter
-      // blocks for CX_SPARSE.
-      if (options.sparse_linear_algebra_library_type == CX_SPARSE) {
-        pp->linear_solver_options.ordering_type = OrderingType::AMD;
-      }
+  if (AreJacobianColumnsOrdered(options.linear_solver_type,
+                                options.preconditioner_type,
+                                options.sparse_linear_algebra_library_type,
+                                options.linear_solver_ordering_type,
+                                options.use_postordering,
+                                options.dynamic_sparsity)) {
+    pp->linear_solver_options.ordering_type = OrderingType::NATURAL;
+  } else {
+    if (options.linear_solver_ordering_type == ceres::AMD) {
+      pp->linear_solver_options.ordering_type = OrderingType::AMD;
+    } else if (options.linear_solver_ordering_type == ceres::NESDIS) {
+      pp->linear_solver_options.ordering_type = OrderingType::NESDIS;
+    } else {
+      LOG(FATAL) << "Congratulations you have found a bug in Ceres Solver."
+                 << " Please report this to the maintainers. : "
+                 << options.linear_solver_ordering_type;
     }
   }
 
diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc
index 4824267..eea0312 100644
--- a/internal/ceres/types.cc
+++ b/internal/ceres/types.cc
@@ -124,6 +124,23 @@
   return false;
 }
 
+const char* LinearSolverOrderingTypeToString(LinearSolverOrderingType type) {
+  switch (type) {
+    CASESTR(AMD);
+    CASESTR(NESDIS);
+    default:
+      return "UNKNOWN";
+  }
+}
+
+bool StringToLinearSolverOrderingType(string value,
+                                      LinearSolverOrderingType* type) {
+  UpperCase(&value);
+  STRENUM(AMD);
+  STRENUM(NESDIS);
+  return false;
+}
+
 const char* DenseLinearAlgebraLibraryTypeToString(
     DenseLinearAlgebraLibraryType type) {
   switch (type) {
diff --git a/internal/ceres/visibility_based_preconditioner.cc b/internal/ceres/visibility_based_preconditioner.cc
index bde2814..0a7b19f 100644
--- a/internal/ceres/visibility_based_preconditioner.cc
+++ b/internal/ceres/visibility_based_preconditioner.cc
@@ -106,7 +106,7 @@
   LinearSolver::Options sparse_cholesky_options;
   sparse_cholesky_options.sparse_linear_algebra_library_type =
       options_.sparse_linear_algebra_library_type;
-  sparse_cholesky_options.ordering_type = options.ordering_type;
+  sparse_cholesky_options.ordering_type = options_.ordering_type;
   sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options);
 
   const time_t init_time = time(nullptr);