Multiple dense linear algebra backends.
1. When a LAPACK implementation is present, then
DENSE_QR, DENSE_NORMAL_CHOLESKY and DENSE_SCHUR
can use it for doing dense linear algebra operations.
2. The user can switch dense linear algebra libraries
by setting Solver::Options::dense_linear_algebra_library_type.
3. Solver::Options::sparse_linear_algebra_library is now
Solver::Options::sparse_linear_algebra_library_type to be consistent
with all the other enums in Solver::Options.
4. Updated documentation as well as Solver::Summary::FullReport
to reflect these changes.
Change-Id: I5ab930bc15e90906b648bc399b551e6bd5d6498f
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c14cc2c..a85bd14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,6 +131,40 @@
SET(CXSPARSE_SEARCH_HEADERS ${SEARCH_HEADERS})
LIST(APPEND CXSPARSE_SEARCH_HEADERS /usr/include/suitesparse) # Ubuntu
+SET(BLAS_AND_LAPACK_FOUND TRUE)
+IF ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK))
+ MESSAGE("${LAPACK}")
+
+ IF (APPLE)
+ # Mac OS X has LAPACK/BLAS bundled in a framework called
+ # "vecLib". Search for that instead of for the normal "lapack"
+ # library.
+ FIND_LIBRARY(LAPACK_LIB NAMES vecLib)
+ ELSE (APPLE)
+ FIND_LIBRARY(BLAS_LIB NAMES blas)
+ IF (EXISTS ${BLAS_LIB})
+ MESSAGE("-- Found BLAS library: ${BLAS_LIB}")
+ ELSE (EXISTS ${BLAS_LIB})
+ MESSAGE("-- Did not find BLAS library")
+ SET(BLAS_AND_LAPACK_FOUND FALSE)
+ ENDIF (EXISTS ${BLAS_LIB})
+ FIND_LIBRARY(LAPACK_LIB NAMES lapack)
+ ENDIF (APPLE)
+ELSE ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK))
+ SET(BLAS_AND_LAPACK_FOUND FALSE)
+ENDIF ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK))
+
+IF (EXISTS ${LAPACK_LIB})
+ MESSAGE("-- Found LAPACK library: ${LAPACK_LIB}")
+ELSE (EXISTS ${LAPACK_LIB})
+ SET(BLAS_AND_LAPACK_FOUND FALSE)
+ MESSAGE("-- Did not find LAPACK library")
+ENDIF (EXISTS ${LAPACK_LIB})
+
+IF (NOT BLAS_AND_LAPACK_FOUND)
+ ADD_DEFINITIONS(-DCERES_NO_LAPACK)
+ENDIF (NOT BLAS_AND_LAPACK_FOUND)
+
IF ((NOT DEFINED SUITESPARSE) OR (DEFINED SUITESPARSE AND SUITESPARSE))
# Check for SuiteSparse dependencies
@@ -298,32 +332,8 @@
SET(TBB_FOUND FALSE)
ENDIF (EXISTS ${TBB_MALLOC_LIB})
-SET(BLAS_AND_LAPACK_FOUND TRUE)
-IF (APPLE)
- # Mac OS X has LAPACK/BLAS bundled in a framework called
- # "vecLib". Search for that instead of for the normal "lapack"
- # library.
- FIND_LIBRARY(LAPACK_LIB NAMES vecLib)
-ELSE (APPLE)
- FIND_LIBRARY(BLAS_LIB NAMES blas)
- IF (EXISTS ${BLAS_LIB})
- MESSAGE("-- Found BLAS library: ${BLAS_LIB}")
- ELSE (EXISTS ${BLAS_LIB})
- MESSAGE("-- Did not find BLAS library")
- SET(BLAS_AND_LAPACK_FOUND FALSE)
- ENDIF (EXISTS ${BLAS_LIB})
- FIND_LIBRARY(LAPACK_LIB NAMES lapack)
-ENDIF (APPLE)
-
-IF (EXISTS ${LAPACK_LIB})
- MESSAGE("-- Found LAPACK library: ${LAPACK_LIB}")
-ELSE (EXISTS ${LAPACK_LIB})
- SET(BLAS_AND_LAPACK_FOUND FALSE)
- MESSAGE("-- Did not find LAPACK library")
-ENDIF (EXISTS ${LAPACK_LIB})
-
-# We don't use SET(SUITESPARSE_FOUND ${AMD_FOUND} ...) in order to
-# be able to check whether SuiteSparse is available withou expanding
+# We don't use SET(SUITESPARSE_FOUND ${AMD_FOUND} ...) in order to be
+# able to check whether SuiteSparse is available without expanding
# SUITESPARSE_FOUND with ${}. This means further checks could be:
#
# IF (SUITESPARSE_FOUND)
diff --git a/docs/source/solving.rst b/docs/source/solving.rst
index d8b9f4a..f574ec3 100644
--- a/docs/source/solving.rst
+++ b/docs/source/solving.rst
@@ -1147,7 +1147,23 @@
``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL``. See
:ref:`section-preconditioner` for more details.
-.. member:: SparseLinearAlgebraLibrary Solver::Options::sparse_linear_algebra_library
+.. member:: DenseLinearAlgebraLibrary Solver::Options::dense_linear_algebra_library_type
+
+ Default:``EIGEN``
+
+ Ceres supports using multiple dense linear algebra libraries for
+ dense matrix factorizations. Currently ``EIGEN`` and ``LAPACK`` are
+ the valid choices. ``EIGEN`` is always available, ``LAPACK`` refers
+ to the system ``BLAS + LAPACK`` library which may or may not be
+ available.
+
+ This setting affects the ``DENSE_QR``, ``DENSE_NORMAL_CHOLESKY``
+ and ``DENSE_SCHUR`` solvers. For small to moderate sized probem
+ ``EIGEN`` is a fine choice but for large problems, an optimized
+ ``LAPACK + BLAS`` implementation can make a substantial difference
+ in performance.
+
+.. member:: SparseLinearAlgebraLibrary Solver::Options::sparse_linear_algebra_library_type
Default:``SUITE_SPARSE``
@@ -1837,7 +1853,7 @@
TrustRegionStrategyType trust_region_strategy_type;
DoglegType dogleg_type;
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
LineSearchDirectionType line_search_direction_type;
LineSearchType line_search_type;
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc
index c060aed..3586501 100644
--- a/examples/bundle_adjuster.cc
+++ b/examples/bundle_adjuster.cc
@@ -84,6 +84,8 @@
"cluster_tridiagonal.");
DEFINE_string(sparse_linear_algebra_library, "suite_sparse",
"Options are: suite_sparse and cx_sparse.");
+DEFINE_string(dense_linear_algebra_library, "eigen",
+ "Options are: eigen and lapack.");
DEFINE_string(ordering, "automatic", "Options are: automatic, user.");
DEFINE_bool(use_quaternions, false, "If true, uses quaternions to represent "
@@ -125,8 +127,12 @@
&options->preconditioner_type));
CHECK(StringToSparseLinearAlgebraLibraryType(
FLAGS_sparse_linear_algebra_library,
- &options->sparse_linear_algebra_library));
+ &options->sparse_linear_algebra_library_type));
+ CHECK(StringToDenseLinearAlgebraLibraryType(
+ FLAGS_dense_linear_algebra_library,
+ &options->dense_linear_algebra_library_type));
options->num_linear_solver_threads = FLAGS_num_threads;
+
}
void SetOrdering(BALProblem* bal_problem, Solver::Options* options) {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index e7b8d09..ab1d350 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -73,7 +73,6 @@
max_num_line_search_direction_restarts = 5;
line_search_sufficient_curvature_decrease = 0.9;
max_line_search_step_expansion = 10.0;
-
trust_region_strategy_type = LEVENBERG_MARQUARDT;
dogleg_type = TRADITIONAL_DOGLEG;
use_nonmonotonic_steps = false;
@@ -100,11 +99,12 @@
preconditioner_type = JACOBI;
- sparse_linear_algebra_library = SUITE_SPARSE;
+ sparse_linear_algebra_library_type = SUITE_SPARSE;
#if defined(CERES_NO_SUITESPARSE) && !defined(CERES_NO_CXSPARSE)
- sparse_linear_algebra_library = CX_SPARSE;
+ sparse_linear_algebra_library_type = CX_SPARSE;
#endif
+ dense_linear_algebra_library_type = EIGEN;
num_linear_solver_threads = 1;
linear_solver_ordering = NULL;
use_postordering = false;
@@ -388,7 +388,20 @@
// for sparse matrix ordering and factorizations. Currently,
// SUITE_SPARSE and CX_SPARSE are the valid choices, depending on
// whether they are linked into Ceres at build time.
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
+
+ // Ceres supports using multiple dense linear algebra libraries
+ // for dense matrix factorizations. Currently EIGEN and LAPACK are
+ // the valid choices. EIGEN is always available, LAPACK refers to
+ // the system BLAS + LAPACK library which may or may not be
+ // available.
+ //
+ // This setting affects the DENSE_QR, DENSE_NORMAL_CHOLESKY and
+ // DENSE_SCHUR solvers. For small to moderate sized probem EIGEN
+ // is a fine choice but for large problems, an optimized LAPACK +
+ // BLAS implementation can make a substantial difference in
+ // performance.
+ DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
// Number of threads used by Ceres to solve the Newton
// step. Currently only the SPARSE_SCHUR solver is capable of
@@ -783,7 +796,8 @@
TrustRegionStrategyType trust_region_strategy_type;
DoglegType dogleg_type;
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
LineSearchDirectionType line_search_direction_type;
LineSearchType line_search_type;
diff --git a/include/ceres/types.h b/include/ceres/types.h
index a967541..ffa743a 100644
--- a/include/ceres/types.h
+++ b/include/ceres/types.h
@@ -126,6 +126,11 @@
CX_SPARSE
};
+enum DenseLinearAlgebraLibraryType {
+ EIGEN,
+ LAPACK
+};
+
enum LinearSolverTerminationType {
// Termination criterion was met. For factorization based solvers
// the tolerance is assumed to be zero. Any user provided values are
@@ -401,6 +406,12 @@
string value,
SparseLinearAlgebraLibraryType* type);
+const char* DenseLinearAlgebraLibraryTypeToString(
+ DenseLinearAlgebraLibraryType type);
+bool StringToDenseLinearAlgebraLibraryType(
+ string value,
+ DenseLinearAlgebraLibraryType* type);
+
const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type);
bool StringToTrustRegionStrategyType(string value,
TrustRegionStrategyType* type);
@@ -444,7 +455,8 @@
bool IsSchurType(LinearSolverType type);
bool IsSparseLinearAlgebraLibraryTypeAvailable(
SparseLinearAlgebraLibraryType type);
-
+bool IsDenseLinearAlgebraLibraryTypeAvailable(
+ DenseLinearAlgebraLibraryType type);
} // namespace ceres
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 9e2e1ae..9138c0d 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -30,6 +30,7 @@
SET(CERES_INTERNAL_SRC
array_utils.cc
+ blas.cc
block_evaluate_preparer.cc
block_jacobi_preconditioner.cc
block_jacobian_writer.cc
@@ -64,6 +65,7 @@
incomplete_lq_factorization.cc
iterative_schur_complement_solver.cc
levenberg_marquardt_strategy.cc
+ lapack.cc
line_search.cc
line_search_direction.cc
line_search_minimizer.cc
@@ -166,18 +168,23 @@
LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${METIS_LIB})
ENDIF (EXISTS ${METIS_LIB})
- LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB})
-
- IF (EXISTS ${BLAS_LIB})
- LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB})
- ENDIF (EXISTS ${BLAS_LIB})
-
IF (TBB_FOUND)
LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_LIB})
LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_MALLOC_LIB})
ENDIF (TBB_FOUND)
ENDIF (SUITESPARSE_FOUND)
+IF (CXSPARSE_FOUND)
+ LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB})
+ENDIF (CXSPARSE_FOUND)
+
+IF (BLAS_AND_LAPACK_FOUND)
+ LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB})
+
+ IF (EXISTS ${BLAS_LIB})
+ LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB})
+ ENDIF (EXISTS ${BLAS_LIB})
+ENDIF (BLAS_AND_LAPACK_FOUND)
IF (CXSPARSE_FOUND)
LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB})
@@ -242,7 +249,6 @@
CERES_TEST(autodiff)
CERES_TEST(autodiff_cost_function)
CERES_TEST(autodiff_local_parameterization)
- CERES_TEST(blas)
CERES_TEST(block_random_access_crs_matrix)
CERES_TEST(block_random_access_dense_matrix)
CERES_TEST(block_random_access_sparse_matrix)
@@ -285,6 +291,7 @@
CERES_TEST(runtime_numeric_diff_cost_function)
CERES_TEST(schur_complement_solver)
CERES_TEST(schur_eliminator)
+ CERES_TEST(small_blas)
CERES_TEST(solver_impl)
# TODO(sameeragarwal): This test should ultimately be made
diff --git a/internal/ceres/blas.cc b/internal/ceres/blas.cc
new file mode 100644
index 0000000..d63ca51
--- /dev/null
+++ b/internal/ceres/blas.cc
@@ -0,0 +1,78 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "ceres/blas.h"
+#include "glog/logging.h"
+
+extern "C" void dsyrk_(char* uplo,
+ char* trans,
+ int* n,
+ int* k,
+ double* alpha,
+ double* a,
+ int* lda,
+ double* beta,
+ double* c,
+ int* ldc);
+
+namespace ceres {
+namespace internal {
+
+void BLAS::SymmetricRankKUpdate(int num_rows,
+ int num_cols,
+ const double* a,
+ bool transpose,
+ double alpha,
+ double beta,
+ double* c) {
+#ifdef CERES_NO_LAPACK
+ LOG(FATAL) << "Ceres was built without a BLAS library.";
+#else
+ char uplo = 'L';
+ char trans = transpose ? 'T' : 'N';
+ int n = transpose ? num_cols : num_rows;
+ int k = transpose ? num_rows : num_cols;
+ int lda = k;
+ int ldc = n;
+ dsyrk_(&uplo,
+ &trans,
+ &n,
+ &k,
+ &alpha,
+ const_cast<double*>(a),
+ &lda,
+ &beta,
+ c,
+ &ldc);
+#endif
+};
+
+} // namespace internal
+} // namespace ceres
diff --git a/internal/ceres/blas.h b/internal/ceres/blas.h
index 9629b3d..a5c6862 100644
--- a/internal/ceres/blas.h
+++ b/internal/ceres/blas.h
@@ -28,377 +28,29 @@
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
//
-// Simple blas functions for use in the Schur Eliminator. These are
-// fairly basic implementations which already yield a significant
-// speedup in the eliminator performance.
+// Wrapper functions around BLAS functions.
#ifndef CERES_INTERNAL_BLAS_H_
#define CERES_INTERNAL_BLAS_H_
-#include "ceres/internal/eigen.h"
-#include "glog/logging.h"
-
namespace ceres {
namespace internal {
-// Remove the ".noalias()" annotation from the matrix matrix
-// mutliplies to produce a correct build with the Android NDK,
-// including versions 6, 7, 8, and 8b, when built with STLPort and the
-// non-standalone toolchain (i.e. ndk-build). This appears to be a
-// compiler bug; if the workaround is not in place, the line
-//
-// block.noalias() -= A * B;
-//
-// gets compiled to
-//
-// block.noalias() += A * B;
-//
-// which breaks schur elimination. Introducing a temporary by removing the
-// .noalias() annotation causes the issue to disappear. Tracking this
-// issue down was tricky, since the test suite doesn't run when built with
-// the non-standalone toolchain.
-//
-// TODO(keir): Make a reproduction case for this and send it upstream.
-#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
-#define CERES_MAYBE_NOALIAS
-#else
-#define CERES_MAYBE_NOALIAS .noalias()
-#endif
+class BLAS {
+ public:
-// The following three macros are used to share code and reduce
-// template junk across the various GEMM variants.
-#define CERES_GEMM_BEGIN(name) \
- template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> \
- inline void name(const double* A, \
- const int num_row_a, \
- const int num_col_a, \
- const double* B, \
- const int num_row_b, \
- const int num_col_b, \
- double* C, \
- const int start_row_c, \
- const int start_col_c, \
- const int row_stride_c, \
- const int col_stride_c)
-
-#define CERES_GEMM_NAIVE_HEADER \
- DCHECK_GT(num_row_a, 0); \
- DCHECK_GT(num_col_a, 0); \
- DCHECK_GT(num_row_b, 0); \
- DCHECK_GT(num_col_b, 0); \
- DCHECK_GE(start_row_c, 0); \
- DCHECK_GE(start_col_c, 0); \
- DCHECK_GT(row_stride_c, 0); \
- DCHECK_GT(col_stride_c, 0); \
- DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \
- DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \
- DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \
- DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \
- const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \
- const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \
- const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \
- const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
-
-#define CERES_GEMM_EIGEN_HEADER \
- const typename EigenTypes<kRowA, kColA>::ConstMatrixRef \
- Aref(A, num_row_a, num_col_a); \
- const typename EigenTypes<kRowB, kColB>::ConstMatrixRef \
- Bref(B, num_row_b, num_col_b); \
- MatrixRef Cref(C, row_stride_c, col_stride_c); \
-
-#define CERES_CALL_GEMM(name) \
- name<kRowA, kColA, kRowB, kColB, kOperation>( \
- A, num_row_a, num_col_a, \
- B, num_row_b, num_col_b, \
- C, start_row_c, start_col_c, row_stride_c, col_stride_c);
-
-
-// For the matrix-matrix functions below, there are three variants for
-// each functionality. Foo, FooNaive and FooEigen. Foo is the one to
-// be called by the user. FooNaive is a basic loop based
-// implementation and FooEigen uses Eigen's implementation. Foo
-// chooses between FooNaive and FooEigen depending on how many of the
-// template arguments are fixed at compile time. Currently, FooEigen
-// is called if all matrix dimensions are compile time
-// constants. FooNaive is called otherwise. This leads to the best
-// performance currently.
-//
-// The MatrixMatrixMultiply variants compute:
-//
-// C op A * B;
-//
-// The MatrixTransposeMatrixMultiply variants compute:
-//
-// C op A' * B
-//
-// where op can be +=, -=, or =.
-//
-// The template parameters (kRowA, kColA, kRowB, kColB) allow
-// specialization of the loop at compile time. If this information is
-// not available, then Eigen::Dynamic should be used as the template
-// argument.
-//
-// kOperation = 1 -> C += A * B
-// kOperation = -1 -> C -= A * B
-// kOperation = 0 -> C = A * B
-//
-// The functions can write into matrices C which are larger than the
-// matrix A * B. This is done by specifying the true size of C via
-// row_stride_c and col_stride_c, and then indicating where A * B
-// should be written into by start_row_c and start_col_c.
-//
-// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
-// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
-//
-// ------------
-// ------------
-// ------------
-// ------------
-// -----xxxx---
-// -----xxxx---
-// -----xxxx---
-// ------------
-// ------------
-// ------------
-//
-CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) {
- CERES_GEMM_EIGEN_HEADER
- Eigen::Block<MatrixRef, kRowA, kColB>
- block(Cref, start_row_c, start_col_c, num_row_a, num_col_b);
-
- if (kOperation > 0) {
- block CERES_MAYBE_NOALIAS += Aref * Bref;
- } else if (kOperation < 0) {
- block CERES_MAYBE_NOALIAS -= Aref * Bref;
- } else {
- block CERES_MAYBE_NOALIAS = Aref * Bref;
- }
-}
-
-CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) {
- CERES_GEMM_NAIVE_HEADER
- DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
-
- const int NUM_ROW_C = NUM_ROW_A;
- const int NUM_COL_C = NUM_COL_B;
- DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
- DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
-
- for (int row = 0; row < NUM_ROW_C; ++row) {
- for (int col = 0; col < NUM_COL_C; ++col) {
- double tmp = 0.0;
- for (int k = 0; k < NUM_COL_A; ++k) {
- tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
- }
-
- const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
- if (kOperation > 0) {
- C[index] += tmp;
- } else if (kOperation < 0) {
- C[index] -= tmp;
- } else {
- C[index] = tmp;
- }
- }
- }
-}
-
-CERES_GEMM_BEGIN(MatrixMatrixMultiply) {
-#ifdef CERES_NO_CUSTOM_BLAS
-
- CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
- return;
-
-#else
-
- if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
- kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
- CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
- } else {
- CERES_CALL_GEMM(MatrixMatrixMultiplyNaive)
- }
-
-#endif
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) {
- CERES_GEMM_EIGEN_HEADER
- Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
- start_row_c, start_col_c,
- num_col_a, num_col_b);
- if (kOperation > 0) {
- block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
- } else if (kOperation < 0) {
- block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
- } else {
- block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
- }
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) {
- CERES_GEMM_NAIVE_HEADER
- DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
-
- const int NUM_ROW_C = NUM_COL_A;
- const int NUM_COL_C = NUM_COL_B;
- DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
- DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
-
- for (int row = 0; row < NUM_ROW_C; ++row) {
- for (int col = 0; col < NUM_COL_C; ++col) {
- double tmp = 0.0;
- for (int k = 0; k < NUM_ROW_A; ++k) {
- tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
- }
-
- const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
- if (kOperation > 0) {
- C[index]+= tmp;
- } else if (kOperation < 0) {
- C[index]-= tmp;
- } else {
- C[index]= tmp;
- }
- }
- }
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) {
-#ifdef CERES_NO_CUSTOM_BLAS
-
- CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
- return;
-
-#else
-
- if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
- kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
- CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
- } else {
- CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive)
- }
-
-#endif
-}
-
-// Matrix-Vector multiplication
-//
-// c op A * b;
-//
-// where op can be +=, -=, or =.
-//
-// The template parameters (kRowA, kColA) allow specialization of the
-// loop at compile time. If this information is not available, then
-// Eigen::Dynamic should be used as the template argument.
-//
-// kOperation = 1 -> c += A' * b
-// kOperation = -1 -> c -= A' * b
-// kOperation = 0 -> c = A' * b
-template<int kRowA, int kColA, int kOperation>
-inline void MatrixVectorMultiply(const double* A,
- const int num_row_a,
- const int num_col_a,
- const double* b,
- double* c) {
-#ifdef CERES_NO_CUSTOM_BLAS
- const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
- Aref(A, num_row_a, num_col_a);
- const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
- typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
-
- // lazyProduct works better than .noalias() for matrix-vector
- // products.
- if (kOperation > 0) {
- cref += Aref.lazyProduct(bref);
- } else if (kOperation < 0) {
- cref -= Aref.lazyProduct(bref);
- } else {
- cref = Aref.lazyProduct(bref);
- }
-#else
-
- DCHECK_GT(num_row_a, 0);
- DCHECK_GT(num_col_a, 0);
- DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
- DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
-
- const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
- const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
-
- for (int row = 0; row < NUM_ROW_A; ++row) {
- double tmp = 0.0;
- for (int col = 0; col < NUM_COL_A; ++col) {
- tmp += A[row * NUM_COL_A + col] * b[col];
- }
-
- if (kOperation > 0) {
- c[row] += tmp;
- } else if (kOperation < 0) {
- c[row] -= tmp;
- } else {
- c[row] = tmp;
- }
- }
-#endif // CERES_NO_CUSTOM_BLAS
-}
-
-// Similar to MatrixVectorMultiply, except that A is transposed, i.e.,
-//
-// c op A' * b;
-template<int kRowA, int kColA, int kOperation>
-inline void MatrixTransposeVectorMultiply(const double* A,
- const int num_row_a,
- const int num_col_a,
- const double* b,
- double* c) {
-#ifdef CERES_NO_CUSTOM_BLAS
- const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
- Aref(A, num_row_a, num_col_a);
- const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
- typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
-
- // lazyProduct works better than .noalias() for matrix-vector
- // products.
- if (kOperation > 0) {
- cref += Aref.transpose().lazyProduct(bref);
- } else if (kOperation < 0) {
- cref -= Aref.transpose().lazyProduct(bref);
- } else {
- cref = Aref.transpose().lazyProduct(bref);
- }
-#else
-
- DCHECK_GT(num_row_a, 0);
- DCHECK_GT(num_col_a, 0);
- DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
- DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
-
- const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
- const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
-
- for (int row = 0; row < NUM_COL_A; ++row) {
- double tmp = 0.0;
- for (int col = 0; col < NUM_ROW_A; ++col) {
- tmp += A[col * NUM_COL_A + row] * b[col];
- }
-
- if (kOperation > 0) {
- c[row] += tmp;
- } else if (kOperation < 0) {
- c[row] -= tmp;
- } else {
- c[row] = tmp;
- }
- }
-#endif // CERES_NO_CUSTOM_BLAS
-}
-
-
-#undef CERES_MAYBE_NOALIAS
-#undef CERES_GEMM_BEGIN
-#undef CERES_GEMM_EIGEN_HEADER
-#undef CERES_GEMM_NAIVE_HEADER
-#undef CERES_CALL_GEMM
+ // transpose = true : c = alpha * a'a + beta * c;
+ // transpose = false : c = alpha * aa' + beta * c;
+ //
+ // Assumes column major matrices.
+ static void SymmetricRankKUpdate(int num_rows,
+ int num_cols,
+ const double* a,
+ bool transpose,
+ double alpha,
+ double beta,
+ double* c);
+};
} // namespace internal
} // namespace ceres
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index fdd762c..a487262 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc
@@ -33,9 +33,9 @@
#include <cstddef>
#include <algorithm>
#include <vector>
-#include "ceres/blas.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
+#include "ceres/small_blas.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc
index 8e05dcc..677987e 100644
--- a/internal/ceres/dense_normal_cholesky_solver.cc
+++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -33,9 +33,11 @@
#include <cstddef>
#include "Eigen/Dense"
+#include "ceres/blas.h"
#include "ceres/dense_sparse_matrix.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
#include "ceres/linear_solver.h"
#include "ceres/types.h"
#include "ceres/wall_time.h"
@@ -52,6 +54,18 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ if (options_.dense_linear_algebra_library_type == EIGEN) {
+ return SolveUsingEigen(A, b, per_solve_options, x);
+ } else {
+ return SolveUsingLAPACK(A, b, per_solve_options, x);
+ }
+}
+
+LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingEigen(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x) {
EventLogger event_logger("DenseNormalCholeskySolver::Solve");
const int num_rows = A->num_rows();
@@ -62,6 +76,7 @@
lhs.setZero();
event_logger.AddEvent("Setup");
+
// lhs += A'A
//
// Using rankUpdate instead of GEMM, exposes the fact that its the
@@ -76,16 +91,66 @@
ConstVectorRef D(per_solve_options.D, num_cols);
lhs += D.array().square().matrix().asDiagonal();
}
+ event_logger.AddEvent("Product");
+ // Use dsyrk instead for the product.
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = TOLERANCE;
VectorRef(x, num_cols) =
lhs.selfadjointView<Eigen::Upper>().llt().solve(rhs);
event_logger.AddEvent("Solve");
-
return summary;
}
+LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingLAPACK(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x) {
+ EventLogger event_logger("DenseNormalCholeskySolver::Solve");
+
+ if (per_solve_options.D != NULL) {
+ // Temporarily append a diagonal block to the A matrix, but undo
+ // it before returning the matrix to the user.
+ A->AppendDiagonal(per_solve_options.D);
+ }
+
+ const int num_cols = A->num_cols();
+ Matrix lhs(num_cols, num_cols);
+ event_logger.AddEvent("Setup");
+
+ // lhs = A'A
+ //
+ // Note: This is a bit delicate, it assumes that the stride on this
+ // matrix is the same as the number of rows.
+ BLAS::SymmetricRankKUpdate(A->num_rows(),
+ num_cols,
+ A->values(),
+ true,
+ 1.0,
+ 0.0,
+ lhs.data());
+
+ if (per_solve_options.D != NULL) {
+ // Undo the modifications to the matrix A.
+ A->RemoveDiagonal();
+ }
+
+ // TODO(sameeragarwal): Replace this with a gemv call for true blasness.
+ // rhs = A'b
+ VectorRef(x, num_cols) = A->matrix().transpose() * ConstVectorRef(b, A->num_rows());
+ event_logger.AddEvent("Product");
+
+ const int info = LAPACK::SolveInPlaceUsingCholesky(num_cols, lhs.data(), x);
+ event_logger.AddEvent("Solve");
+
+ LinearSolver::Summary summary;
+ summary.num_iterations = 1;
+ summary.termination_type = info == 0 ? TOLERANCE : FAILURE;
+
+ event_logger.AddEvent("TearDown");
+ return summary;
+}
} // namespace internal
} // namespace ceres
diff --git a/internal/ceres/dense_normal_cholesky_solver.h b/internal/ceres/dense_normal_cholesky_solver.h
index de47740..e35053f 100644
--- a/internal/ceres/dense_normal_cholesky_solver.h
+++ b/internal/ceres/dense_normal_cholesky_solver.h
@@ -85,6 +85,18 @@
const LinearSolver::PerSolveOptions& per_solve_options,
double* x);
+ LinearSolver::Summary SolveUsingLAPACK(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x);
+
+ LinearSolver::Summary SolveUsingEigen(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x);
+
const LinearSolver::Options options_;
CERES_DISALLOW_COPY_AND_ASSIGN(DenseNormalCholeskySolver);
};
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 4ab75ab..c61a96f 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -30,12 +30,13 @@
#include "ceres/dense_qr_solver.h"
-#include <cstddef>
+#include <cstddef>
#include "Eigen/Dense"
#include "ceres/dense_sparse_matrix.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
#include "ceres/linear_solver.h"
#include "ceres/types.h"
#include "ceres/wall_time.h"
@@ -44,13 +45,26 @@
namespace internal {
DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options)
- : options_(options) {}
+ : options_(options) {
+ work_.resize(1);
+}
LinearSolver::Summary DenseQRSolver::SolveImpl(
DenseSparseMatrix* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ if (options_.dense_linear_algebra_library_type == EIGEN) {
+ return SolveUsingEigen(A, b, per_solve_options, x);
+ } else {
+ return SolveUsingLAPACK(A, b, per_solve_options, x);
+ }
+}
+LinearSolver::Summary DenseQRSolver::SolveUsingLAPACK(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x) {
EventLogger event_logger("DenseQRSolver::Solve");
const int num_rows = A->num_rows();
@@ -62,6 +76,65 @@
A->AppendDiagonal(per_solve_options.D);
}
+ lhs_ = A->matrix();
+
+ if (per_solve_options.D != NULL) {
+ // Undo the modifications to the matrix A.
+ A->RemoveDiagonal();
+ }
+
+ // rhs = [b;0] to account for the additional rows in the lhs.
+ if (rhs_.rows() != lhs_.rows()) {
+ rhs_.resize(lhs_.rows());
+ }
+ rhs_.setZero();
+ rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
+
+ if (work_.rows() == 1) {
+ const int work_size = LAPACK::EstimateWorkSizeForQR(lhs_.rows(), lhs_.cols());
+ VLOG(3) << "Working memory for Dense QR factorization: "
+ << work_size * sizeof(double);
+ work_.resize(work_size);
+ }
+
+ const int info = LAPACK::SolveUsingQR(lhs_.rows(),
+ lhs_.cols(),
+ lhs_.data(),
+ work_.rows(),
+ work_.data(),
+ rhs_.data());
+ event_logger.AddEvent("Solve");
+
+ LinearSolver::Summary summary;
+ summary.num_iterations = 1;
+ if (info == 0) {
+ VectorRef(x, num_cols) = rhs_.head(num_cols);
+ summary.termination_type = TOLERANCE;
+ } else {
+ summary.termination_type = FAILURE;
+ }
+
+ event_logger.AddEvent("TearDown");
+ return summary;
+}
+
+LinearSolver::Summary DenseQRSolver::SolveUsingEigen(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x) {
+ EventLogger event_logger("DenseQRSolver::Solve");
+
+ const int num_rows = A->num_rows();
+ const int num_cols = A->num_cols();
+
+
+ if (per_solve_options.D != NULL) {
+ // Temporarily append a diagonal block to the A matrix, but undo
+ // it before returning the matrix to the user.
+ A->AppendDiagonal(per_solve_options.D);
+ }
+
// rhs = [b;0] to account for the additional rows in the lhs.
const int augmented_num_rows =
num_rows + ((per_solve_options.D != NULL) ? num_cols : 0);
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h
index f78fa72..e745c63 100644
--- a/internal/ceres/dense_qr_solver.h
+++ b/internal/ceres/dense_qr_solver.h
@@ -90,8 +90,22 @@
const LinearSolver::PerSolveOptions& per_solve_options,
double* x);
+ LinearSolver::Summary SolveUsingEigen(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x);
+
+ LinearSolver::Summary SolveUsingLAPACK(
+ DenseSparseMatrix* A,
+ const double* b,
+ const LinearSolver::PerSolveOptions& per_solve_options,
+ double* x);
+
const LinearSolver::Options options_;
+ ColMajorMatrix lhs_;
Vector rhs_;
+ Vector work_;
CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver);
};
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index d39d7db..0b1cb89 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -97,8 +97,8 @@
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
- preconditioner_options.sparse_linear_algebra_library =
- options_.sparse_linear_algebra_library;
+ preconditioner_options.sparse_linear_algebra_library_type =
+ options_.sparse_linear_algebra_library_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.row_block_size = options_.row_block_size;
preconditioner_options.e_block_size = options_.e_block_size;
diff --git a/internal/ceres/lapack.cc b/internal/ceres/lapack.cc
new file mode 100644
index 0000000..6e7d21b
--- /dev/null
+++ b/internal/ceres/lapack.cc
@@ -0,0 +1,135 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "ceres/lapack.h"
+#include "glog/logging.h"
+
+// C interface to the LAPACK Cholesky factorization and triangular solve.
+extern "C" void dpotrf_(char* uplo,
+ int* n,
+ double* a,
+ int* lda,
+ int* info);
+
+extern "C" void dpotrs_(char* uplo,
+ int* n,
+ int* nrhs,
+ double* a,
+ int* lda,
+ double* b,
+ int* ldb,
+ int* info);
+
+extern "C" void dgels_(char* uplo,
+ int* m,
+ int* n,
+ int* nrhs,
+ double* a,
+ int* lda,
+ double* b,
+ int* ldb,
+ double* work,
+ int* lwork,
+ int* info);
+
+
+namespace ceres {
+namespace internal {
+
+int LAPACK::SolveInPlaceUsingCholesky(int num_rows,
+ const double* in_lhs,
+ double* rhs_and_solution) {
+#ifdef CERES_NO_LAPACK
+ LOG(FATAL) << "Ceres was built without a BLAS library.";
+ return -1;
+#else
+ char uplo = 'L';
+ int n = num_rows;
+ int info = 0;
+ int nrhs = 1;
+ double* lhs = const_cast<double*>(in_lhs);
+
+ dpotrf_(&uplo, &n, lhs, &n, &info);
+ if (info != 0) {
+ LOG(INFO) << "Cholesky factorization (dpotrf) failed: " << info;
+ return info;
+ }
+
+ dpotrs_(&uplo, &n, &nrhs, lhs, &n, rhs_and_solution, &n, &info);
+ if (info != 0) {
+ LOG(INFO) << "Triangular solve (dpotrs) failed: " << info;
+ }
+
+ return info;
+#endif
+};
+
+int LAPACK::EstimateWorkSizeForQR(int num_rows, int num_cols) {
+#ifdef CERES_NO_LAPACK
+ LOG(FATAL) << "Ceres was built without a LAPACK library.";
+ return -1;
+#else
+ char trans = 'N';
+ int nrhs = 1;
+ int lwork = -1;
+ double work;
+ int info = 0;
+ dgels_(&trans, &num_rows, &num_cols, &nrhs, NULL, &num_rows, NULL, &num_rows, &work, &lwork, &info);
+ CHECK_EQ(info, 0);
+ return work;
+#endif
+};
+
+int LAPACK::SolveUsingQR(int num_rows,
+ int num_cols,
+ const double* in_lhs,
+ int work_size,
+ double* work,
+ double* rhs_and_solution) {
+#ifdef CERES_NO_LAPACK
+ LOG(FATAL) << "Ceres was built without a LAPACK library.";
+ return -1;
+#else
+ char trans = 'N';
+ int m = num_rows;
+ int n = num_cols;
+ int nrhs = 1;
+ int lda = num_rows;
+ int ldb = num_rows;
+ int info = 0;
+ double* lhs = const_cast<double*>(in_lhs);
+
+ dgels_(&trans, &m, &n, &nrhs, lhs, &lda, rhs_and_solution, &ldb, work, &work_size, &info);
+ return info;
+#endif
+};
+
+} // namespace internal
+} // namespace ceres
diff --git a/internal/ceres/lapack.h b/internal/ceres/lapack.h
new file mode 100644
index 0000000..4f3a88c
--- /dev/null
+++ b/internal/ceres/lapack.h
@@ -0,0 +1,88 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#ifndef CERES_INTERNAL_LAPACK_H_
+#define CERES_INTERNAL_LAPACK_H_
+
+namespace ceres {
+namespace internal {
+
+class LAPACK {
+ public:
+ // Solve
+ //
+ // lhs * solution = rhs
+ //
+ // using a Cholesky factorization. Here
+ // lhs is a symmetric positive definite matrix. It is assumed to be
+ // column major and only the lower triangular part of the matrix is
+ // referenced.
+ //
+ // This function uses the LAPACK dpotrf and dpotrs routines.
+ //
+ // The return value is zero if the solve is successful.
+ static int SolveInPlaceUsingCholesky(int num_rows,
+ const double* lhs,
+ double* rhs_and_solution);
+
+ // The SolveUsingQR function requires a buffer for its temporary
+ // computation. This function given the size of the lhs matrix will
+ // return the size of the buffer needed.
+ static int EstimateWorkSizeForQR(int num_rows, int num_cols);
+
+ // Solve
+ //
+ // lhs * solution = rhs
+ //
+ // using a dense QR factorization. lhs is an arbitrary (possibly
+ // rectangular) matrix with full column rank.
+ //
+ // work is an array of size work_size that this routine uses for its
+ // temporary storage. The optimal size of this array can be obtained
+ // by calling EstimateWorkSizeForQR.
+ //
+ // When calling, rhs_and_solution contains the rhs, and upon return
+ // the first num_col entries are the solution.
+ //
+ // This function uses the LAPACK dgels routine.
+ //
+ // The return value is zero if the solve is successful.
+ static int SolveUsingQR(int num_rows,
+ int num_cols,
+ const double* lhs,
+ int work_size,
+ double* work,
+ double* rhs_and_solution);
+};
+
+} // namespace internal
+} // namespace ceres
+
+#endif // CERES_INTERNAL_LAPACK_H_
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 67bebe0..22691b3 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -74,7 +74,8 @@
Options()
: type(SPARSE_NORMAL_CHOLESKY),
preconditioner_type(JACOBI),
- sparse_linear_algebra_library(SUITE_SPARSE),
+ dense_linear_algebra_library_type(EIGEN),
+ sparse_linear_algebra_library_type(SUITE_SPARSE),
use_postordering(false),
min_num_iterations(1),
max_num_iterations(1),
@@ -89,7 +90,8 @@
PreconditionerType preconditioner_type;
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
// See solver.h for information about this flag.
bool use_postordering;
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc
index 5dad438..59eaff8 100644
--- a/internal/ceres/partitioned_matrix_view.cc
+++ b/internal/ceres/partitioned_matrix_view.cc
@@ -35,10 +35,10 @@
#include <algorithm>
#include <cstring>
#include <vector>
-#include "ceres/blas.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
+#include "ceres/small_blas.h"
#include "glog/logging.h"
namespace ceres {
diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h
index cb0a381..af64e3c 100644
--- a/internal/ceres/preconditioner.h
+++ b/internal/ceres/preconditioner.h
@@ -48,7 +48,7 @@
struct Options {
Options()
: type(JACOBI),
- sparse_linear_algebra_library(SUITE_SPARSE),
+ sparse_linear_algebra_library_type(SUITE_SPARSE),
num_threads(1),
row_block_size(Eigen::Dynamic),
e_block_size(Eigen::Dynamic),
@@ -57,7 +57,7 @@
PreconditionerType type;
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
// If possible, how many threads the preconditioner can use.
int num_threads;
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h
index 19c7541..8aa2a39 100644
--- a/internal/ceres/program_evaluator.h
+++ b/internal/ceres/program_evaluator.h
@@ -86,13 +86,13 @@
#include <map>
#include <string>
#include <vector>
-#include "ceres/blas.h"
#include "ceres/execution_summary.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
#include "ceres/parameter_block.h"
#include "ceres/program.h"
#include "ceres/residual_block.h"
+#include "ceres/small_blas.h"
namespace ceres {
namespace internal {
diff --git a/internal/ceres/residual_block.cc b/internal/ceres/residual_block.cc
index 649f3f7..621082a 100644
--- a/internal/ceres/residual_block.cc
+++ b/internal/ceres/residual_block.cc
@@ -34,8 +34,6 @@
#include <algorithm>
#include <cstddef>
#include <vector>
-
-#include "ceres/blas.h"
#include "ceres/corrector.h"
#include "ceres/parameter_block.h"
#include "ceres/residual_block_utils.h"
@@ -44,6 +42,7 @@
#include "ceres/internal/fixed_array.h"
#include "ceres/local_parameterization.h"
#include "ceres/loss_function.h"
+#include "ceres/small_blas.h"
using Eigen::Dynamic;
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 0df9304..c21d3b5 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -47,6 +47,7 @@
#include "ceres/internal/eigen.h"
#include "ceres/internal/port.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
#include "ceres/linear_solver.h"
#include "ceres/schur_complement_solver.h"
#include "ceres/suitesparse.h"
@@ -130,15 +131,22 @@
return true;
}
- // TODO(sameeragarwal): Add proper error handling; this completely ignores
- // the quality of the solution to the solve.
- VectorRef(solution, num_rows) =
- ConstMatrixRef(m->values(), num_rows, num_rows)
- .selfadjointView<Eigen::Upper>()
- .llt()
- .solve(ConstVectorRef(rhs(), num_rows));
+ if (options().dense_linear_algebra_library_type == EIGEN) {
+ // TODO(sameeragarwal): Add proper error handling; this completely ignores
+ // the quality of the solution to the solve.
+ VectorRef(solution, num_rows) =
+ ConstMatrixRef(m->values(), num_rows, num_rows)
+ .selfadjointView<Eigen::Upper>()
+ .llt()
+ .solve(ConstVectorRef(rhs(), num_rows));
+ return true;
+ }
- return true;
+ VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
+ const int info = LAPACK::SolveInPlaceUsingCholesky(num_rows,
+ m->values(),
+ solution);
+ return (info == 0);
}
#if !defined(CERES_NO_SUITESPARSE) || !defined(CERES_NO_CXSPARE)
@@ -243,18 +251,18 @@
}
bool SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
- switch (options().sparse_linear_algebra_library) {
+ switch (options().sparse_linear_algebra_library_type) {
case SUITE_SPARSE:
return SolveReducedLinearSystemUsingSuiteSparse(solution);
case CX_SPARSE:
return SolveReducedLinearSystemUsingCXSparse(solution);
default:
LOG(FATAL) << "Unknown sparse linear algebra library : "
- << options().sparse_linear_algebra_library;
+ << options().sparse_linear_algebra_library_type;
}
LOG(FATAL) << "Unknown sparse linear algebra library : "
- << options().sparse_linear_algebra_library;
+ << options().sparse_linear_algebra_library_type;
return false;
}
diff --git a/internal/ceres/schur_complement_solver_test.cc b/internal/ceres/schur_complement_solver_test.cc
index 206d4b5..745ea8e 100644
--- a/internal/ceres/schur_complement_solver_test.cc
+++ b/internal/ceres/schur_complement_solver_test.cc
@@ -87,7 +87,8 @@
int problem_id,
bool regularization,
ceres::LinearSolverType linear_solver_type,
- ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+ ceres::DenseLinearAlgebraLibraryType dense_linear_algebra_library_type,
+ ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
bool use_postordering) {
SetUpFromProblemId(problem_id);
LinearSolver::Options options;
@@ -95,7 +96,10 @@
options.elimination_groups.push_back(
A->block_structure()->cols.size() - num_eliminate_blocks);
options.type = linear_solver_type;
- options.sparse_linear_algebra_library = sparse_linear_algebra_library;
+ options.dense_linear_algebra_library_type =
+ dense_linear_algebra_library_type;
+ options.sparse_linear_algebra_library_type =
+ sparse_linear_algebra_library_type;
options.use_postordering = use_postordering;
scoped_ptr<LinearSolver> solver(LinearSolver::Create(options));
@@ -131,53 +135,65 @@
scoped_array<double> sol_d;
};
-TEST_F(SchurComplementSolverTest, DenseSchurWithSmallProblem) {
- ComputeAndCompareSolutions(2, false, DENSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(2, true, DENSE_SCHUR, SUITE_SPARSE, true);
+TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithSmallProblem) {
+ ComputeAndCompareSolutions(2, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(2, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
}
-TEST_F(SchurComplementSolverTest, DenseSchurWithLargeProblem) {
- ComputeAndCompareSolutions(3, false, DENSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(3, true, DENSE_SCHUR, SUITE_SPARSE, true);
+TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithLargeProblem) {
+ ComputeAndCompareSolutions(3, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(3, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
}
+#ifndef CERES_NO_LAPACK
+TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithSmallProblem) {
+ ComputeAndCompareSolutions(2, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(2, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+}
+
+TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithLargeProblem) {
+ ComputeAndCompareSolutions(3, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(3, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+}
+#endif
+
#ifndef CERES_NO_SUITESPARSE
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseSmallProblemNoPostOrdering) {
- ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, false);
- ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, false);
+ ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
+ ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
}
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseSmallProblemPostOrdering) {
- ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
}
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseLargeProblemNoPostOrdering) {
- ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, false);
- ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, false);
+ ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
+ ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
}
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseLargeProblemPostOrdering) {
- ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
}
#endif // CERES_NO_SUITESPARSE
#ifndef CERES_NO_CXSPARSE
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseSmallProblem) {
- ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
+ ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
}
TEST_F(SchurComplementSolverTest,
SparseSchurWithSuiteSparseLargeProblem) {
- ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true);
- ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+ ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
+ ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
}
#endif // CERES_NO_CXSPARSE
diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h
index f072c88..c09b7fb 100644
--- a/internal/ceres/schur_eliminator_impl.h
+++ b/internal/ceres/schur_eliminator_impl.h
@@ -51,8 +51,6 @@
#include <algorithm>
#include <map>
-
-#include "ceres/blas.h"
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
@@ -61,6 +59,7 @@
#include "ceres/internal/scoped_ptr.h"
#include "ceres/map_util.h"
#include "ceres/schur_eliminator.h"
+#include "ceres/small_blas.h"
#include "ceres/stl_util.h"
#include "Eigen/Dense"
#include "glog/logging.h"
diff --git a/internal/ceres/small_blas.h b/internal/ceres/small_blas.h
new file mode 100644
index 0000000..e14e664
--- /dev/null
+++ b/internal/ceres/small_blas.h
@@ -0,0 +1,406 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// Simple blas functions for use in the Schur Eliminator. These are
+// fairly basic implementations which already yield a significant
+// speedup in the eliminator performance.
+
+#ifndef CERES_INTERNAL_SMALL_BLAS_H_
+#define CERES_INTERNAL_SMALL_BLAS_H_
+
+#include "ceres/internal/eigen.h"
+#include "glog/logging.h"
+
+namespace ceres {
+namespace internal {
+
+// Remove the ".noalias()" annotation from the matrix matrix
+// mutliplies to produce a correct build with the Android NDK,
+// including versions 6, 7, 8, and 8b, when built with STLPort and the
+// non-standalone toolchain (i.e. ndk-build). This appears to be a
+// compiler bug; if the workaround is not in place, the line
+//
+// block.noalias() -= A * B;
+//
+// gets compiled to
+//
+// block.noalias() += A * B;
+//
+// which breaks schur elimination. Introducing a temporary by removing the
+// .noalias() annotation causes the issue to disappear. Tracking this
+// issue down was tricky, since the test suite doesn't run when built with
+// the non-standalone toolchain.
+//
+// TODO(keir): Make a reproduction case for this and send it upstream.
+#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
+#define CERES_MAYBE_NOALIAS
+#else
+#define CERES_MAYBE_NOALIAS .noalias()
+#endif
+
+// The following three macros are used to share code and reduce
+// template junk across the various GEMM variants.
+#define CERES_GEMM_BEGIN(name) \
+ template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> \
+ inline void name(const double* A, \
+ const int num_row_a, \
+ const int num_col_a, \
+ const double* B, \
+ const int num_row_b, \
+ const int num_col_b, \
+ double* C, \
+ const int start_row_c, \
+ const int start_col_c, \
+ const int row_stride_c, \
+ const int col_stride_c)
+
+#define CERES_GEMM_NAIVE_HEADER \
+ DCHECK_GT(num_row_a, 0); \
+ DCHECK_GT(num_col_a, 0); \
+ DCHECK_GT(num_row_b, 0); \
+ DCHECK_GT(num_col_b, 0); \
+ DCHECK_GE(start_row_c, 0); \
+ DCHECK_GE(start_col_c, 0); \
+ DCHECK_GT(row_stride_c, 0); \
+ DCHECK_GT(col_stride_c, 0); \
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \
+ DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \
+ DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \
+ const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \
+ const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
+
+#define CERES_GEMM_EIGEN_HEADER \
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef \
+ Aref(A, num_row_a, num_col_a); \
+ const typename EigenTypes<kRowB, kColB>::ConstMatrixRef \
+ Bref(B, num_row_b, num_col_b); \
+ MatrixRef Cref(C, row_stride_c, col_stride_c); \
+
+#define CERES_CALL_GEMM(name) \
+ name<kRowA, kColA, kRowB, kColB, kOperation>( \
+ A, num_row_a, num_col_a, \
+ B, num_row_b, num_col_b, \
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+
+// For the matrix-matrix functions below, there are three variants for
+// each functionality. Foo, FooNaive and FooEigen. Foo is the one to
+// be called by the user. FooNaive is a basic loop based
+// implementation and FooEigen uses Eigen's implementation. Foo
+// chooses between FooNaive and FooEigen depending on how many of the
+// template arguments are fixed at compile time. Currently, FooEigen
+// is called if all matrix dimensions are compile time
+// constants. FooNaive is called otherwise. This leads to the best
+// performance currently.
+//
+// The MatrixMatrixMultiply variants compute:
+//
+// C op A * B;
+//
+// The MatrixTransposeMatrixMultiply variants compute:
+//
+// C op A' * B
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA, kRowB, kColB) allow
+// specialization of the loop at compile time. If this information is
+// not available, then Eigen::Dynamic should be used as the template
+// argument.
+//
+// kOperation = 1 -> C += A * B
+// kOperation = -1 -> C -= A * B
+// kOperation = 0 -> C = A * B
+//
+// The functions can write into matrices C which are larger than the
+// matrix A * B. This is done by specifying the true size of C via
+// row_stride_c and col_stride_c, and then indicating where A * B
+// should be written into by start_row_c and start_col_c.
+//
+// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
+// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
+//
+// ------------
+// ------------
+// ------------
+// ------------
+// -----xxxx---
+// -----xxxx---
+// -----xxxx---
+// ------------
+// ------------
+// ------------
+//
+CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) {
+ CERES_GEMM_EIGEN_HEADER
+ Eigen::Block<MatrixRef, kRowA, kColB>
+ block(Cref, start_row_c, start_col_c, num_row_a, num_col_b);
+
+ if (kOperation > 0) {
+ block CERES_MAYBE_NOALIAS += Aref * Bref;
+ } else if (kOperation < 0) {
+ block CERES_MAYBE_NOALIAS -= Aref * Bref;
+ } else {
+ block CERES_MAYBE_NOALIAS = Aref * Bref;
+ }
+}
+
+CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) {
+ CERES_GEMM_NAIVE_HEADER
+ DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
+
+ const int NUM_ROW_C = NUM_ROW_A;
+ const int NUM_COL_C = NUM_COL_B;
+ DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
+ DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
+
+ for (int row = 0; row < NUM_ROW_C; ++row) {
+ for (int col = 0; col < NUM_COL_C; ++col) {
+ double tmp = 0.0;
+ for (int k = 0; k < NUM_COL_A; ++k) {
+ tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
+ }
+
+ const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+ if (kOperation > 0) {
+ C[index] += tmp;
+ } else if (kOperation < 0) {
+ C[index] -= tmp;
+ } else {
+ C[index] = tmp;
+ }
+ }
+ }
+}
+
+CERES_GEMM_BEGIN(MatrixMatrixMultiply) {
+#ifdef CERES_NO_CUSTOM_BLAS
+
+ CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
+ return;
+
+#else
+
+ if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+ kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+ CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
+ } else {
+ CERES_CALL_GEMM(MatrixMatrixMultiplyNaive)
+ }
+
+#endif
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) {
+ CERES_GEMM_EIGEN_HEADER
+ Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
+ start_row_c, start_col_c,
+ num_col_a, num_col_b);
+ if (kOperation > 0) {
+ block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
+ } else if (kOperation < 0) {
+ block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
+ } else {
+ block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
+ }
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) {
+ CERES_GEMM_NAIVE_HEADER
+ DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
+
+ const int NUM_ROW_C = NUM_COL_A;
+ const int NUM_COL_C = NUM_COL_B;
+ DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
+ DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
+
+ for (int row = 0; row < NUM_ROW_C; ++row) {
+ for (int col = 0; col < NUM_COL_C; ++col) {
+ double tmp = 0.0;
+ for (int k = 0; k < NUM_ROW_A; ++k) {
+ tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
+ }
+
+ const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+ if (kOperation > 0) {
+ C[index]+= tmp;
+ } else if (kOperation < 0) {
+ C[index]-= tmp;
+ } else {
+ C[index]= tmp;
+ }
+ }
+ }
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) {
+#ifdef CERES_NO_CUSTOM_BLAS
+
+ CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
+ return;
+
+#else
+
+ if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+ kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+ CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
+ } else {
+ CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive)
+ }
+
+#endif
+}
+
+// Matrix-Vector multiplication
+//
+// c op A * b;
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA) allow specialization of the
+// loop at compile time. If this information is not available, then
+// Eigen::Dynamic should be used as the template argument.
+//
+// kOperation = 1 -> c += A' * b
+// kOperation = -1 -> c -= A' * b
+// kOperation = 0 -> c = A' * b
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixVectorMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* b,
+ double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
+ Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
+ typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
+
+ // lazyProduct works better than .noalias() for matrix-vector
+ // products.
+ if (kOperation > 0) {
+ cref += Aref.lazyProduct(bref);
+ } else if (kOperation < 0) {
+ cref -= Aref.lazyProduct(bref);
+ } else {
+ cref = Aref.lazyProduct(bref);
+ }
+#else
+
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+ for (int row = 0; row < NUM_ROW_A; ++row) {
+ double tmp = 0.0;
+ for (int col = 0; col < NUM_COL_A; ++col) {
+ tmp += A[row * NUM_COL_A + col] * b[col];
+ }
+
+ if (kOperation > 0) {
+ c[row] += tmp;
+ } else if (kOperation < 0) {
+ c[row] -= tmp;
+ } else {
+ c[row] = tmp;
+ }
+ }
+#endif // CERES_NO_CUSTOM_BLAS
+}
+
+// Similar to MatrixVectorMultiply, except that A is transposed, i.e.,
+//
+// c op A' * b;
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixTransposeVectorMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* b,
+ double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
+ Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
+ typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
+
+ // lazyProduct works better than .noalias() for matrix-vector
+ // products.
+ if (kOperation > 0) {
+ cref += Aref.transpose().lazyProduct(bref);
+ } else if (kOperation < 0) {
+ cref -= Aref.transpose().lazyProduct(bref);
+ } else {
+ cref = Aref.transpose().lazyProduct(bref);
+ }
+#else
+
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+ for (int row = 0; row < NUM_COL_A; ++row) {
+ double tmp = 0.0;
+ for (int col = 0; col < NUM_ROW_A; ++col) {
+ tmp += A[col * NUM_COL_A + row] * b[col];
+ }
+
+ if (kOperation > 0) {
+ c[row] += tmp;
+ } else if (kOperation < 0) {
+ c[row] -= tmp;
+ } else {
+ c[row] = tmp;
+ }
+ }
+#endif // CERES_NO_CUSTOM_BLAS
+}
+
+
+#undef CERES_MAYBE_NOALIAS
+#undef CERES_GEMM_BEGIN
+#undef CERES_GEMM_EIGEN_HEADER
+#undef CERES_GEMM_NAIVE_HEADER
+#undef CERES_CALL_GEMM
+
+} // namespace internal
+} // namespace ceres
+
+#endif // CERES_INTERNAL_SMALL_BLAS_H_
diff --git a/internal/ceres/blas_test.cc b/internal/ceres/small_blas_test.cc
similarity index 99%
rename from internal/ceres/blas_test.cc
rename to internal/ceres/small_blas_test.cc
index efa7e7b..b8b5bc5 100644
--- a/internal/ceres/blas_test.cc
+++ b/internal/ceres/small_blas_test.cc
@@ -28,7 +28,7 @@
//
// Author: keir@google.com (Keir Mierle)
-#include "ceres/blas.h"
+#include "ceres/small_blas.h"
#include "gtest/gtest.h"
#include "ceres/internal/eigen.h"
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index 5d8447d..3b67746 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -120,7 +120,8 @@
inner_iterations_used(false),
preconditioner_type(IDENTITY),
trust_region_strategy_type(LEVENBERG_MARQUARDT),
- sparse_linear_algebra_library(SUITE_SPARSE),
+ dense_linear_algebra_library_type(EIGEN),
+ sparse_linear_algebra_library_type(SUITE_SPARSE),
line_search_direction_type(LBFGS),
line_search_type(ARMIJO) {
}
@@ -192,6 +193,15 @@
// TRUST_SEARCH HEADER
StringAppendF(&report, "\nMinimizer %19s\n",
"TRUST_REGION");
+
+ if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY ||
+ linear_solver_type_used == DENSE_SCHUR ||
+ linear_solver_type_used == DENSE_QR) {
+ StringAppendF(&report, "\nDense linear algebra library %15s\n",
+ DenseLinearAlgebraLibraryTypeToString(
+ dense_linear_algebra_library_type));
+ }
+
if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
linear_solver_type_used == SPARSE_SCHUR ||
(linear_solver_type_used == ITERATIVE_SCHUR &&
@@ -199,7 +209,7 @@
preconditioner_type == CLUSTER_TRIDIAGONAL))) {
StringAppendF(&report, "\nSparse linear algebra library %15s\n",
SparseLinearAlgebraLibraryTypeToString(
- sparse_linear_algebra_library));
+ sparse_linear_algebra_library_type));
}
StringAppendF(&report, "Trust region strategy %19s",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index d6ef731..bd731fe 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -506,8 +506,10 @@
original_options.num_linear_solver_threads;
summary->num_linear_solver_threads_used = options.num_linear_solver_threads;
- summary->sparse_linear_algebra_library =
- options.sparse_linear_algebra_library;
+ summary->dense_linear_algebra_library_type =
+ options.dense_linear_algebra_library_type;
+ summary->sparse_linear_algebra_library_type =
+ options.sparse_linear_algebra_library_type;
summary->trust_region_strategy_type = options.trust_region_strategy_type;
summary->dogleg_type = options.dogleg_type;
@@ -1092,7 +1094,7 @@
if (IsSchurType(options->linear_solver_type)) {
if (!ReorderProgramForSchurTypeLinearSolver(
options->linear_solver_type,
- options->sparse_linear_algebra_library,
+ options->sparse_linear_algebra_library_type,
problem_impl->parameter_map(),
linear_solver_ordering,
transformed_program.get(),
@@ -1104,7 +1106,7 @@
if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY) {
if (!ReorderProgramForSparseNormalCholesky(
- options->sparse_linear_algebra_library,
+ options->sparse_linear_algebra_library_type,
linear_solver_ordering,
transformed_program.get(),
error)) {
@@ -1134,9 +1136,32 @@
}
}
+#ifdef CERES_NO_LAPACK
+ if (options->linear_solver_type == DENSE_NORMAL_CHOLESKY &&
+ options->dense_linear_algebra_library_type == LAPACK) {
+ *error = "Can't use DENSE_NORMAL_CHOLESKY with LAPACK because "
+ "LAPACK was not enabled when Ceres was built.";
+ return NULL;
+ }
+
+ if (options->linear_solver_type == DENSE_QR &&
+ options->dense_linear_algebra_library_type == LAPACK) {
+ *error = "Can't use DENSE_QR with LAPACK because "
+ "LAPACK was not enabled when Ceres was built.";
+ return NULL;
+ }
+
+ if (options->linear_solver_type == DENSE_SCHUR &&
+ options->dense_linear_algebra_library_type == LAPACK) {
+ *error = "Can't use DENSE_SCHUR with LAPACK because "
+ "LAPACK was not enabled when Ceres was built.";
+ return NULL;
+ }
+#endif
+
#ifdef CERES_NO_SUITESPARSE
if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY &&
- options->sparse_linear_algebra_library == SUITE_SPARSE) {
+ options->sparse_linear_algebra_library_type == SUITE_SPARSE) {
*error = "Can't use SPARSE_NORMAL_CHOLESKY with SUITESPARSE because "
"SuiteSparse was not enabled when Ceres was built.";
return NULL;
@@ -1157,7 +1182,7 @@
#ifdef CERES_NO_CXSPARSE
if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY &&
- options->sparse_linear_algebra_library == CX_SPARSE) {
+ options->sparse_linear_algebra_library_type == CX_SPARSE) {
*error = "Can't use SPARSE_NORMAL_CHOLESKY with CXSPARSE because "
"CXSparse was not enabled when Ceres was built.";
return NULL;
@@ -1194,8 +1219,10 @@
options->max_linear_solver_iterations;
linear_solver_options.type = options->linear_solver_type;
linear_solver_options.preconditioner_type = options->preconditioner_type;
- linear_solver_options.sparse_linear_algebra_library =
- options->sparse_linear_algebra_library;
+ linear_solver_options.sparse_linear_algebra_library_type =
+ options->sparse_linear_algebra_library_type;
+ linear_solver_options.dense_linear_algebra_library_type =
+ options->dense_linear_algebra_library_type;
linear_solver_options.use_postordering = options->use_postordering;
// Ignore user's postordering preferences and force it to be true if
@@ -1204,7 +1231,7 @@
// done.
#if !defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CAMD)
if (IsSchurType(linear_solver_options.type) &&
- linear_solver_options.sparse_linear_algebra_library == SUITE_SPARSE) {
+ linear_solver_options.sparse_linear_algebra_library_type == SUITE_SPARSE) {
linear_solver_options.use_postordering = true;
}
#endif
diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc
index d81858c..4f1e549 100644
--- a/internal/ceres/solver_impl_test.cc
+++ b/internal/ceres/solver_impl_test.cc
@@ -592,7 +592,7 @@
#ifndef CERES_NO_SUITESPARSE
options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
- options.sparse_linear_algebra_library = SUITE_SPARSE;
+ options.sparse_linear_algebra_library_type = SUITE_SPARSE;
solver.reset(SolverImpl::CreateLinearSolver(&options, &error));
EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
EXPECT_TRUE(solver.get() != NULL);
@@ -600,7 +600,7 @@
#ifndef CERES_NO_CXSPARSE
options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
- options.sparse_linear_algebra_library = CX_SPARSE;
+ options.sparse_linear_algebra_library_type = CX_SPARSE;
solver.reset(SolverImpl::CreateLinearSolver(&options, &error));
EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
EXPECT_TRUE(solver.get() != NULL);
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc
index 9601142..cfb0f17 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -85,18 +85,18 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double * x) {
- switch (options_.sparse_linear_algebra_library) {
+ switch (options_.sparse_linear_algebra_library_type) {
case SUITE_SPARSE:
return SolveImplUsingSuiteSparse(A, b, per_solve_options, x);
case CX_SPARSE:
return SolveImplUsingCXSparse(A, b, per_solve_options, x);
default:
LOG(FATAL) << "Unknown sparse linear algebra library : "
- << options_.sparse_linear_algebra_library;
+ << options_.sparse_linear_algebra_library_type;
}
LOG(FATAL) << "Unknown sparse linear algebra library : "
- << options_.sparse_linear_algebra_library;
+ << options_.sparse_linear_algebra_library_type;
return LinearSolver::Summary();
}
diff --git a/internal/ceres/system_test.cc b/internal/ceres/system_test.cc
index 095b51e..7b0e02d 100644
--- a/internal/ceres/system_test.cc
+++ b/internal/ceres/system_test.cc
@@ -64,21 +64,21 @@
// Struct used for configuring the solver.
struct SolverConfig {
SolverConfig(LinearSolverType linear_solver_type,
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
bool use_automatic_ordering)
: linear_solver_type(linear_solver_type),
- sparse_linear_algebra_library(sparse_linear_algebra_library),
+ sparse_linear_algebra_library_type(sparse_linear_algebra_library_type),
use_automatic_ordering(use_automatic_ordering),
preconditioner_type(IDENTITY),
num_threads(1) {
}
SolverConfig(LinearSolverType linear_solver_type,
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
bool use_automatic_ordering,
PreconditionerType preconditioner_type)
: linear_solver_type(linear_solver_type),
- sparse_linear_algebra_library(sparse_linear_algebra_library),
+ sparse_linear_algebra_library_type(sparse_linear_algebra_library_type),
use_automatic_ordering(use_automatic_ordering),
preconditioner_type(preconditioner_type),
num_threads(1) {
@@ -88,14 +88,14 @@
return StringPrintf(
"(%s, %s, %s, %s, %d)",
LinearSolverTypeToString(linear_solver_type),
- SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library),
+ SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library_type),
use_automatic_ordering ? "AUTOMATIC" : "USER",
PreconditionerTypeToString(preconditioner_type),
num_threads);
}
LinearSolverType linear_solver_type;
- SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+ SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
bool use_automatic_ordering;
PreconditionerType preconditioner_type;
int num_threads;
@@ -130,8 +130,8 @@
Solver::Options& options = *(system_test_problem->mutable_solver_options());
options.linear_solver_type = config.linear_solver_type;
- options.sparse_linear_algebra_library =
- config.sparse_linear_algebra_library;
+ options.sparse_linear_algebra_library_type =
+ config.sparse_linear_algebra_library_type;
options.preconditioner_type = config.preconditioner_type;
options.num_threads = config.num_threads;
options.num_linear_solver_threads = config.num_threads;
@@ -281,9 +281,9 @@
TEST(SystemTest, PowellsFunction) {
vector<SolverConfig> configs;
-#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering) \
- configs.push_back(SolverConfig(linear_solver, \
- sparse_linear_algebra_library, \
+#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering) \
+ configs.push_back(SolverConfig(linear_solver, \
+ sparse_linear_algebra_library_type, \
ordering))
CONFIGURE(DENSE_QR, SUITE_SPARSE, kAutomaticOrdering);
@@ -485,9 +485,9 @@
TEST(SystemTest, BundleAdjustmentProblem) {
vector<SolverConfig> configs;
-#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering, preconditioner) \
+#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering, preconditioner) \
configs.push_back(SolverConfig(linear_solver, \
- sparse_linear_algebra_library, \
+ sparse_linear_algebra_library_type, \
ordering, \
preconditioner))
diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc
index 164185e..a97f1a5 100644
--- a/internal/ceres/types.cc
+++ b/internal/ceres/types.cc
@@ -101,7 +101,6 @@
}
}
-
bool StringToSparseLinearAlgebraLibraryType(
string value,
SparseLinearAlgebraLibraryType* type) {
@@ -111,6 +110,25 @@
return false;
}
+const char* DenseLinearAlgebraLibraryTypeToString(
+ DenseLinearAlgebraLibraryType type) {
+ switch (type) {
+ CASESTR(EIGEN);
+ CASESTR(LAPACK);
+ default:
+ return "UNKNOWN";
+ }
+}
+
+bool StringToDenseLinearAlgebraLibraryType(
+ string value,
+ DenseLinearAlgebraLibraryType* type) {
+ UpperCase(&value);
+ STRENUM(EIGEN);
+ STRENUM(LAPACK);
+ return false;
+}
+
const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) {
switch (type) {
CASESTR(LEVENBERG_MARQUARDT);
@@ -318,4 +336,21 @@
return false;
}
+bool IsDenseLinearAlgebraLibraryTypeAvailable(
+ DenseLinearAlgebraLibraryType type) {
+ if (type == EIGEN) {
+ return true;
+ }
+ if (type == LAPACK) {
+#ifdef CERES_NO_LAPACK
+ return false;
+#else
+ return true;
+#endif
+ }
+
+ LOG(WARNING) << "Unknown dense linear algebra library " << type;
+ return false;
+}
+
} // namespace ceres
diff --git a/internal/ceres/unsymmetric_linear_solver_test.cc b/internal/ceres/unsymmetric_linear_solver_test.cc
index 232f34c..af9dffe 100644
--- a/internal/ceres/unsymmetric_linear_solver_test.cc
+++ b/internal/ceres/unsymmetric_linear_solver_test.cc
@@ -118,23 +118,41 @@
scoped_array<double> sol_regularized_;
};
-TEST_F(UnsymmetricLinearSolverTest, DenseQR) {
+TEST_F(UnsymmetricLinearSolverTest, EigenDenseQR) {
LinearSolver::Options options;
options.type = DENSE_QR;
+ options.dense_linear_algebra_library_type = EIGEN;
TestSolver(options);
}
-TEST_F(UnsymmetricLinearSolverTest, DenseNormalCholesky) {
+TEST_F(UnsymmetricLinearSolverTest, EigenDenseNormalCholesky) {
LinearSolver::Options options;
+ options.dense_linear_algebra_library_type = EIGEN;
options.type = DENSE_NORMAL_CHOLESKY;
TestSolver(options);
}
+#ifndef CERES_NO_LAPACK
+TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseQR) {
+ LinearSolver::Options options;
+ options.type = DENSE_QR;
+ options.dense_linear_algebra_library_type = LAPACK;
+ TestSolver(options);
+}
+
+TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseNormalCholesky) {
+ LinearSolver::Options options;
+ options.dense_linear_algebra_library_type = LAPACK;
+ options.type = DENSE_NORMAL_CHOLESKY;
+ TestSolver(options);
+}
+#endif
+
#ifndef CERES_NO_SUITESPARSE
TEST_F(UnsymmetricLinearSolverTest,
SparseNormalCholeskyUsingSuiteSparsePreOrdering) {
LinearSolver::Options options;
- options.sparse_linear_algebra_library = SUITE_SPARSE;
+ options.sparse_linear_algebra_library_type = SUITE_SPARSE;
options.type = SPARSE_NORMAL_CHOLESKY;
options.use_postordering = false;
TestSolver(options);
@@ -143,7 +161,7 @@
TEST_F(UnsymmetricLinearSolverTest,
SparseNormalCholeskyUsingSuiteSparsePostOrdering) {
LinearSolver::Options options;
- options.sparse_linear_algebra_library = SUITE_SPARSE;
+ options.sparse_linear_algebra_library_type = SUITE_SPARSE;
options.type = SPARSE_NORMAL_CHOLESKY;
options.use_postordering = true;
TestSolver(options);
@@ -154,7 +172,7 @@
TEST_F(UnsymmetricLinearSolverTest,
SparseNormalCholeskyUsingCXSparsePreOrdering) {
LinearSolver::Options options;
- options.sparse_linear_algebra_library = CX_SPARSE;
+ options.sparse_linear_algebra_library_type = CX_SPARSE;
options.type = SPARSE_NORMAL_CHOLESKY;
options.use_postordering = false;
TestSolver(options);
@@ -163,7 +181,7 @@
TEST_F(UnsymmetricLinearSolverTest,
SparseNormalCholeskyUsingCXSparsePostOrdering) {
LinearSolver::Options options;
- options.sparse_linear_algebra_library = CX_SPARSE;
+ options.sparse_linear_algebra_library_type = CX_SPARSE;
options.type = SPARSE_NORMAL_CHOLESKY;
options.use_postordering = true;
TestSolver(options);
diff --git a/jni/Android.mk b/jni/Android.mk
index b881d88..d628177 100644
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -99,6 +99,7 @@
LOCAL_CPP_EXTENSION := .cc
LOCAL_CFLAGS := $(CERES_EXTRA_DEFINES) \
+ -DCERES_NO_LAPACK \
-DCERES_NO_SUITESPARSE \
-DCERES_NO_GFLAGS \
-DCERES_NO_THREADS \