Multiple dense linear algebra backends. 1. When a LAPACK implementation is present, then DENSE_QR, DENSE_NORMAL_CHOLESKY and DENSE_SCHUR can use it for doing dense linear algebra operations. 2. The user can switch dense linear algebra libraries by setting Solver::Options::dense_linear_algebra_library_type. 3. Solver::Options::sparse_linear_algebra_library is now Solver::Options::sparse_linear_algebra_library_type to be consistent with all the other enums in Solver::Options. 4. Updated documentation as well as Solver::Summary::FullReport to reflect these changes. Change-Id: I5ab930bc15e90906b648bc399b551e6bd5d6498f
diff --git a/CMakeLists.txt b/CMakeLists.txt index c14cc2c..a85bd14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -131,6 +131,40 @@ SET(CXSPARSE_SEARCH_HEADERS ${SEARCH_HEADERS}) LIST(APPEND CXSPARSE_SEARCH_HEADERS /usr/include/suitesparse) # Ubuntu +SET(BLAS_AND_LAPACK_FOUND TRUE) +IF ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK)) + MESSAGE("${LAPACK}") + + IF (APPLE) + # Mac OS X has LAPACK/BLAS bundled in a framework called + # "vecLib". Search for that instead of for the normal "lapack" + # library. + FIND_LIBRARY(LAPACK_LIB NAMES vecLib) + ELSE (APPLE) + FIND_LIBRARY(BLAS_LIB NAMES blas) + IF (EXISTS ${BLAS_LIB}) + MESSAGE("-- Found BLAS library: ${BLAS_LIB}") + ELSE (EXISTS ${BLAS_LIB}) + MESSAGE("-- Did not find BLAS library") + SET(BLAS_AND_LAPACK_FOUND FALSE) + ENDIF (EXISTS ${BLAS_LIB}) + FIND_LIBRARY(LAPACK_LIB NAMES lapack) + ENDIF (APPLE) +ELSE ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK)) + SET(BLAS_AND_LAPACK_FOUND FALSE) +ENDIF ((NOT DEFINED LAPACK) OR (DEFINED LAPACK AND LAPACK)) + +IF (EXISTS ${LAPACK_LIB}) + MESSAGE("-- Found LAPACK library: ${LAPACK_LIB}") +ELSE (EXISTS ${LAPACK_LIB}) + SET(BLAS_AND_LAPACK_FOUND FALSE) + MESSAGE("-- Did not find LAPACK library") +ENDIF (EXISTS ${LAPACK_LIB}) + +IF (NOT BLAS_AND_LAPACK_FOUND) + ADD_DEFINITIONS(-DCERES_NO_LAPACK) +ENDIF (NOT BLAS_AND_LAPACK_FOUND) + IF ((NOT DEFINED SUITESPARSE) OR (DEFINED SUITESPARSE AND SUITESPARSE)) # Check for SuiteSparse dependencies @@ -298,32 +332,8 @@ SET(TBB_FOUND FALSE) ENDIF (EXISTS ${TBB_MALLOC_LIB}) -SET(BLAS_AND_LAPACK_FOUND TRUE) -IF (APPLE) - # Mac OS X has LAPACK/BLAS bundled in a framework called - # "vecLib". Search for that instead of for the normal "lapack" - # library. - FIND_LIBRARY(LAPACK_LIB NAMES vecLib) -ELSE (APPLE) - FIND_LIBRARY(BLAS_LIB NAMES blas) - IF (EXISTS ${BLAS_LIB}) - MESSAGE("-- Found BLAS library: ${BLAS_LIB}") - ELSE (EXISTS ${BLAS_LIB}) - MESSAGE("-- Did not find BLAS library") - SET(BLAS_AND_LAPACK_FOUND FALSE) - ENDIF (EXISTS ${BLAS_LIB}) - FIND_LIBRARY(LAPACK_LIB NAMES lapack) -ENDIF (APPLE) - -IF (EXISTS ${LAPACK_LIB}) - MESSAGE("-- Found LAPACK library: ${LAPACK_LIB}") -ELSE (EXISTS ${LAPACK_LIB}) - SET(BLAS_AND_LAPACK_FOUND FALSE) - MESSAGE("-- Did not find LAPACK library") -ENDIF (EXISTS ${LAPACK_LIB}) - -# We don't use SET(SUITESPARSE_FOUND ${AMD_FOUND} ...) in order to -# be able to check whether SuiteSparse is available withou expanding +# We don't use SET(SUITESPARSE_FOUND ${AMD_FOUND} ...) in order to be +# able to check whether SuiteSparse is available without expanding # SUITESPARSE_FOUND with ${}. This means further checks could be: # # IF (SUITESPARSE_FOUND)
diff --git a/docs/source/solving.rst b/docs/source/solving.rst index d8b9f4a..f574ec3 100644 --- a/docs/source/solving.rst +++ b/docs/source/solving.rst
@@ -1147,7 +1147,23 @@ ``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL``. See :ref:`section-preconditioner` for more details. -.. member:: SparseLinearAlgebraLibrary Solver::Options::sparse_linear_algebra_library +.. member:: DenseLinearAlgebraLibrary Solver::Options::dense_linear_algebra_library_type + + Default:``EIGEN`` + + Ceres supports using multiple dense linear algebra libraries for + dense matrix factorizations. Currently ``EIGEN`` and ``LAPACK`` are + the valid choices. ``EIGEN`` is always available, ``LAPACK`` refers + to the system ``BLAS + LAPACK`` library which may or may not be + available. + + This setting affects the ``DENSE_QR``, ``DENSE_NORMAL_CHOLESKY`` + and ``DENSE_SCHUR`` solvers. For small to moderate sized probem + ``EIGEN`` is a fine choice but for large problems, an optimized + ``LAPACK + BLAS`` implementation can make a substantial difference + in performance. + +.. member:: SparseLinearAlgebraLibrary Solver::Options::sparse_linear_algebra_library_type Default:``SUITE_SPARSE`` @@ -1837,7 +1853,7 @@ TrustRegionStrategyType trust_region_strategy_type; DoglegType dogleg_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; LineSearchDirectionType line_search_direction_type; LineSearchType line_search_type;
diff --git a/examples/bundle_adjuster.cc b/examples/bundle_adjuster.cc index c060aed..3586501 100644 --- a/examples/bundle_adjuster.cc +++ b/examples/bundle_adjuster.cc
@@ -84,6 +84,8 @@ "cluster_tridiagonal."); DEFINE_string(sparse_linear_algebra_library, "suite_sparse", "Options are: suite_sparse and cx_sparse."); +DEFINE_string(dense_linear_algebra_library, "eigen", + "Options are: eigen and lapack."); DEFINE_string(ordering, "automatic", "Options are: automatic, user."); DEFINE_bool(use_quaternions, false, "If true, uses quaternions to represent " @@ -125,8 +127,12 @@ &options->preconditioner_type)); CHECK(StringToSparseLinearAlgebraLibraryType( FLAGS_sparse_linear_algebra_library, - &options->sparse_linear_algebra_library)); + &options->sparse_linear_algebra_library_type)); + CHECK(StringToDenseLinearAlgebraLibraryType( + FLAGS_dense_linear_algebra_library, + &options->dense_linear_algebra_library_type)); options->num_linear_solver_threads = FLAGS_num_threads; + } void SetOrdering(BALProblem* bal_problem, Solver::Options* options) {
diff --git a/include/ceres/solver.h b/include/ceres/solver.h index e7b8d09..ab1d350 100644 --- a/include/ceres/solver.h +++ b/include/ceres/solver.h
@@ -73,7 +73,6 @@ max_num_line_search_direction_restarts = 5; line_search_sufficient_curvature_decrease = 0.9; max_line_search_step_expansion = 10.0; - trust_region_strategy_type = LEVENBERG_MARQUARDT; dogleg_type = TRADITIONAL_DOGLEG; use_nonmonotonic_steps = false; @@ -100,11 +99,12 @@ preconditioner_type = JACOBI; - sparse_linear_algebra_library = SUITE_SPARSE; + sparse_linear_algebra_library_type = SUITE_SPARSE; #if defined(CERES_NO_SUITESPARSE) && !defined(CERES_NO_CXSPARSE) - sparse_linear_algebra_library = CX_SPARSE; + sparse_linear_algebra_library_type = CX_SPARSE; #endif + dense_linear_algebra_library_type = EIGEN; num_linear_solver_threads = 1; linear_solver_ordering = NULL; use_postordering = false; @@ -388,7 +388,20 @@ // for sparse matrix ordering and factorizations. Currently, // SUITE_SPARSE and CX_SPARSE are the valid choices, depending on // whether they are linked into Ceres at build time. - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; + + // Ceres supports using multiple dense linear algebra libraries + // for dense matrix factorizations. Currently EIGEN and LAPACK are + // the valid choices. EIGEN is always available, LAPACK refers to + // the system BLAS + LAPACK library which may or may not be + // available. + // + // This setting affects the DENSE_QR, DENSE_NORMAL_CHOLESKY and + // DENSE_SCHUR solvers. For small to moderate sized probem EIGEN + // is a fine choice but for large problems, an optimized LAPACK + + // BLAS implementation can make a substantial difference in + // performance. + DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; // Number of threads used by Ceres to solve the Newton // step. Currently only the SPARSE_SCHUR solver is capable of @@ -783,7 +796,8 @@ TrustRegionStrategyType trust_region_strategy_type; DoglegType dogleg_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; LineSearchDirectionType line_search_direction_type; LineSearchType line_search_type;
diff --git a/include/ceres/types.h b/include/ceres/types.h index a967541..ffa743a 100644 --- a/include/ceres/types.h +++ b/include/ceres/types.h
@@ -126,6 +126,11 @@ CX_SPARSE }; +enum DenseLinearAlgebraLibraryType { + EIGEN, + LAPACK +}; + enum LinearSolverTerminationType { // Termination criterion was met. For factorization based solvers // the tolerance is assumed to be zero. Any user provided values are @@ -401,6 +406,12 @@ string value, SparseLinearAlgebraLibraryType* type); +const char* DenseLinearAlgebraLibraryTypeToString( + DenseLinearAlgebraLibraryType type); +bool StringToDenseLinearAlgebraLibraryType( + string value, + DenseLinearAlgebraLibraryType* type); + const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type); bool StringToTrustRegionStrategyType(string value, TrustRegionStrategyType* type); @@ -444,7 +455,8 @@ bool IsSchurType(LinearSolverType type); bool IsSparseLinearAlgebraLibraryTypeAvailable( SparseLinearAlgebraLibraryType type); - +bool IsDenseLinearAlgebraLibraryTypeAvailable( + DenseLinearAlgebraLibraryType type); } // namespace ceres
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 9e2e1ae..9138c0d 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -30,6 +30,7 @@ SET(CERES_INTERNAL_SRC array_utils.cc + blas.cc block_evaluate_preparer.cc block_jacobi_preconditioner.cc block_jacobian_writer.cc @@ -64,6 +65,7 @@ incomplete_lq_factorization.cc iterative_schur_complement_solver.cc levenberg_marquardt_strategy.cc + lapack.cc line_search.cc line_search_direction.cc line_search_minimizer.cc @@ -166,18 +168,23 @@ LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${METIS_LIB}) ENDIF (EXISTS ${METIS_LIB}) - LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB}) - - IF (EXISTS ${BLAS_LIB}) - LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB}) - ENDIF (EXISTS ${BLAS_LIB}) - IF (TBB_FOUND) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_LIB}) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_MALLOC_LIB}) ENDIF (TBB_FOUND) ENDIF (SUITESPARSE_FOUND) +IF (CXSPARSE_FOUND) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB}) +ENDIF (CXSPARSE_FOUND) + +IF (BLAS_AND_LAPACK_FOUND) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB}) + + IF (EXISTS ${BLAS_LIB}) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB}) + ENDIF (EXISTS ${BLAS_LIB}) +ENDIF (BLAS_AND_LAPACK_FOUND) IF (CXSPARSE_FOUND) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB}) @@ -242,7 +249,6 @@ CERES_TEST(autodiff) CERES_TEST(autodiff_cost_function) CERES_TEST(autodiff_local_parameterization) - CERES_TEST(blas) CERES_TEST(block_random_access_crs_matrix) CERES_TEST(block_random_access_dense_matrix) CERES_TEST(block_random_access_sparse_matrix) @@ -285,6 +291,7 @@ CERES_TEST(runtime_numeric_diff_cost_function) CERES_TEST(schur_complement_solver) CERES_TEST(schur_eliminator) + CERES_TEST(small_blas) CERES_TEST(solver_impl) # TODO(sameeragarwal): This test should ultimately be made
diff --git a/internal/ceres/blas.cc b/internal/ceres/blas.cc new file mode 100644 index 0000000..d63ca51 --- /dev/null +++ b/internal/ceres/blas.cc
@@ -0,0 +1,78 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#include "ceres/blas.h" +#include "glog/logging.h" + +extern "C" void dsyrk_(char* uplo, + char* trans, + int* n, + int* k, + double* alpha, + double* a, + int* lda, + double* beta, + double* c, + int* ldc); + +namespace ceres { +namespace internal { + +void BLAS::SymmetricRankKUpdate(int num_rows, + int num_cols, + const double* a, + bool transpose, + double alpha, + double beta, + double* c) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a BLAS library."; +#else + char uplo = 'L'; + char trans = transpose ? 'T' : 'N'; + int n = transpose ? num_cols : num_rows; + int k = transpose ? num_rows : num_cols; + int lda = k; + int ldc = n; + dsyrk_(&uplo, + &trans, + &n, + &k, + &alpha, + const_cast<double*>(a), + &lda, + &beta, + c, + &ldc); +#endif +}; + +} // namespace internal +} // namespace ceres
diff --git a/internal/ceres/blas.h b/internal/ceres/blas.h index 9629b3d..a5c6862 100644 --- a/internal/ceres/blas.h +++ b/internal/ceres/blas.h
@@ -28,377 +28,29 @@ // // Author: sameeragarwal@google.com (Sameer Agarwal) // -// Simple blas functions for use in the Schur Eliminator. These are -// fairly basic implementations which already yield a significant -// speedup in the eliminator performance. +// Wrapper functions around BLAS functions. #ifndef CERES_INTERNAL_BLAS_H_ #define CERES_INTERNAL_BLAS_H_ -#include "ceres/internal/eigen.h" -#include "glog/logging.h" - namespace ceres { namespace internal { -// Remove the ".noalias()" annotation from the matrix matrix -// mutliplies to produce a correct build with the Android NDK, -// including versions 6, 7, 8, and 8b, when built with STLPort and the -// non-standalone toolchain (i.e. ndk-build). This appears to be a -// compiler bug; if the workaround is not in place, the line -// -// block.noalias() -= A * B; -// -// gets compiled to -// -// block.noalias() += A * B; -// -// which breaks schur elimination. Introducing a temporary by removing the -// .noalias() annotation causes the issue to disappear. Tracking this -// issue down was tricky, since the test suite doesn't run when built with -// the non-standalone toolchain. -// -// TODO(keir): Make a reproduction case for this and send it upstream. -#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG -#define CERES_MAYBE_NOALIAS -#else -#define CERES_MAYBE_NOALIAS .noalias() -#endif +class BLAS { + public: -// The following three macros are used to share code and reduce -// template junk across the various GEMM variants. -#define CERES_GEMM_BEGIN(name) \ - template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> \ - inline void name(const double* A, \ - const int num_row_a, \ - const int num_col_a, \ - const double* B, \ - const int num_row_b, \ - const int num_col_b, \ - double* C, \ - const int start_row_c, \ - const int start_col_c, \ - const int row_stride_c, \ - const int col_stride_c) - -#define CERES_GEMM_NAIVE_HEADER \ - DCHECK_GT(num_row_a, 0); \ - DCHECK_GT(num_col_a, 0); \ - DCHECK_GT(num_row_b, 0); \ - DCHECK_GT(num_col_b, 0); \ - DCHECK_GE(start_row_c, 0); \ - DCHECK_GE(start_col_c, 0); \ - DCHECK_GT(row_stride_c, 0); \ - DCHECK_GT(col_stride_c, 0); \ - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \ - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \ - DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \ - DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \ - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \ - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \ - const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \ - const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); - -#define CERES_GEMM_EIGEN_HEADER \ - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef \ - Aref(A, num_row_a, num_col_a); \ - const typename EigenTypes<kRowB, kColB>::ConstMatrixRef \ - Bref(B, num_row_b, num_col_b); \ - MatrixRef Cref(C, row_stride_c, col_stride_c); \ - -#define CERES_CALL_GEMM(name) \ - name<kRowA, kColA, kRowB, kColB, kOperation>( \ - A, num_row_a, num_col_a, \ - B, num_row_b, num_col_b, \ - C, start_row_c, start_col_c, row_stride_c, col_stride_c); - - -// For the matrix-matrix functions below, there are three variants for -// each functionality. Foo, FooNaive and FooEigen. Foo is the one to -// be called by the user. FooNaive is a basic loop based -// implementation and FooEigen uses Eigen's implementation. Foo -// chooses between FooNaive and FooEigen depending on how many of the -// template arguments are fixed at compile time. Currently, FooEigen -// is called if all matrix dimensions are compile time -// constants. FooNaive is called otherwise. This leads to the best -// performance currently. -// -// The MatrixMatrixMultiply variants compute: -// -// C op A * B; -// -// The MatrixTransposeMatrixMultiply variants compute: -// -// C op A' * B -// -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA, kRowB, kColB) allow -// specialization of the loop at compile time. If this information is -// not available, then Eigen::Dynamic should be used as the template -// argument. -// -// kOperation = 1 -> C += A * B -// kOperation = -1 -> C -= A * B -// kOperation = 0 -> C = A * B -// -// The functions can write into matrices C which are larger than the -// matrix A * B. This is done by specifying the true size of C via -// row_stride_c and col_stride_c, and then indicating where A * B -// should be written into by start_row_c and start_col_c. -// -// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = -// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get -// -// ------------ -// ------------ -// ------------ -// ------------ -// -----xxxx--- -// -----xxxx--- -// -----xxxx--- -// ------------ -// ------------ -// ------------ -// -CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) { - CERES_GEMM_EIGEN_HEADER - Eigen::Block<MatrixRef, kRowA, kColB> - block(Cref, start_row_c, start_col_c, num_row_a, num_col_b); - - if (kOperation > 0) { - block CERES_MAYBE_NOALIAS += Aref * Bref; - } else if (kOperation < 0) { - block CERES_MAYBE_NOALIAS -= Aref * Bref; - } else { - block CERES_MAYBE_NOALIAS = Aref * Bref; - } -} - -CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) { - CERES_GEMM_NAIVE_HEADER - DCHECK_EQ(NUM_COL_A, NUM_ROW_B); - - const int NUM_ROW_C = NUM_ROW_A; - const int NUM_COL_C = NUM_COL_B; - DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); - DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); - - for (int row = 0; row < NUM_ROW_C; ++row) { - for (int col = 0; col < NUM_COL_C; ++col) { - double tmp = 0.0; - for (int k = 0; k < NUM_COL_A; ++k) { - tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col]; - } - - const int index = (row + start_row_c) * col_stride_c + start_col_c + col; - if (kOperation > 0) { - C[index] += tmp; - } else if (kOperation < 0) { - C[index] -= tmp; - } else { - C[index] = tmp; - } - } - } -} - -CERES_GEMM_BEGIN(MatrixMatrixMultiply) { -#ifdef CERES_NO_CUSTOM_BLAS - - CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) - return; - -#else - - if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && - kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) - } else { - CERES_CALL_GEMM(MatrixMatrixMultiplyNaive) - } - -#endif -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) { - CERES_GEMM_EIGEN_HEADER - Eigen::Block<MatrixRef, kColA, kColB> block(Cref, - start_row_c, start_col_c, - num_col_a, num_col_b); - if (kOperation > 0) { - block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref; - } else if (kOperation < 0) { - block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref; - } else { - block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref; - } -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) { - CERES_GEMM_NAIVE_HEADER - DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); - - const int NUM_ROW_C = NUM_COL_A; - const int NUM_COL_C = NUM_COL_B; - DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); - DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); - - for (int row = 0; row < NUM_ROW_C; ++row) { - for (int col = 0; col < NUM_COL_C; ++col) { - double tmp = 0.0; - for (int k = 0; k < NUM_ROW_A; ++k) { - tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col]; - } - - const int index = (row + start_row_c) * col_stride_c + start_col_c + col; - if (kOperation > 0) { - C[index]+= tmp; - } else if (kOperation < 0) { - C[index]-= tmp; - } else { - C[index]= tmp; - } - } - } -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) { -#ifdef CERES_NO_CUSTOM_BLAS - - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) - return; - -#else - - if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && - kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) - } else { - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive) - } - -#endif -} - -// Matrix-Vector multiplication -// -// c op A * b; -// -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA) allow specialization of the -// loop at compile time. If this information is not available, then -// Eigen::Dynamic should be used as the template argument. -// -// kOperation = 1 -> c += A' * b -// kOperation = -1 -> c -= A' * b -// kOperation = 0 -> c = A' * b -template<int kRowA, int kColA, int kOperation> -inline void MatrixVectorMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* b, - double* c) { -#ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef - Aref(A, num_row_a, num_col_a); - const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a); - typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a); - - // lazyProduct works better than .noalias() for matrix-vector - // products. - if (kOperation > 0) { - cref += Aref.lazyProduct(bref); - } else if (kOperation < 0) { - cref -= Aref.lazyProduct(bref); - } else { - cref = Aref.lazyProduct(bref); - } -#else - - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - - for (int row = 0; row < NUM_ROW_A; ++row) { - double tmp = 0.0; - for (int col = 0; col < NUM_COL_A; ++col) { - tmp += A[row * NUM_COL_A + col] * b[col]; - } - - if (kOperation > 0) { - c[row] += tmp; - } else if (kOperation < 0) { - c[row] -= tmp; - } else { - c[row] = tmp; - } - } -#endif // CERES_NO_CUSTOM_BLAS -} - -// Similar to MatrixVectorMultiply, except that A is transposed, i.e., -// -// c op A' * b; -template<int kRowA, int kColA, int kOperation> -inline void MatrixTransposeVectorMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* b, - double* c) { -#ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef - Aref(A, num_row_a, num_col_a); - const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a); - typename EigenTypes<kColA>::VectorRef cref(c, num_col_a); - - // lazyProduct works better than .noalias() for matrix-vector - // products. - if (kOperation > 0) { - cref += Aref.transpose().lazyProduct(bref); - } else if (kOperation < 0) { - cref -= Aref.transpose().lazyProduct(bref); - } else { - cref = Aref.transpose().lazyProduct(bref); - } -#else - - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - - for (int row = 0; row < NUM_COL_A; ++row) { - double tmp = 0.0; - for (int col = 0; col < NUM_ROW_A; ++col) { - tmp += A[col * NUM_COL_A + row] * b[col]; - } - - if (kOperation > 0) { - c[row] += tmp; - } else if (kOperation < 0) { - c[row] -= tmp; - } else { - c[row] = tmp; - } - } -#endif // CERES_NO_CUSTOM_BLAS -} - - -#undef CERES_MAYBE_NOALIAS -#undef CERES_GEMM_BEGIN -#undef CERES_GEMM_EIGEN_HEADER -#undef CERES_GEMM_NAIVE_HEADER -#undef CERES_CALL_GEMM + // transpose = true : c = alpha * a'a + beta * c; + // transpose = false : c = alpha * aa' + beta * c; + // + // Assumes column major matrices. + static void SymmetricRankKUpdate(int num_rows, + int num_cols, + const double* a, + bool transpose, + double alpha, + double beta, + double* c); +}; } // namespace internal } // namespace ceres
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc index fdd762c..a487262 100644 --- a/internal/ceres/block_sparse_matrix.cc +++ b/internal/ceres/block_sparse_matrix.cc
@@ -33,9 +33,9 @@ #include <cstddef> #include <algorithm> #include <vector> -#include "ceres/blas.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/small_blas.h" #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h"
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc index 8e05dcc..677987e 100644 --- a/internal/ceres/dense_normal_cholesky_solver.cc +++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -33,9 +33,11 @@ #include <cstddef> #include "Eigen/Dense" +#include "ceres/blas.h" #include "ceres/dense_sparse_matrix.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -52,6 +54,18 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + if (options_.dense_linear_algebra_library_type == EIGEN) { + return SolveUsingEigen(A, b, per_solve_options, x); + } else { + return SolveUsingLAPACK(A, b, per_solve_options, x); + } +} + +LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { EventLogger event_logger("DenseNormalCholeskySolver::Solve"); const int num_rows = A->num_rows(); @@ -62,6 +76,7 @@ lhs.setZero(); event_logger.AddEvent("Setup"); + // lhs += A'A // // Using rankUpdate instead of GEMM, exposes the fact that its the @@ -76,16 +91,66 @@ ConstVectorRef D(per_solve_options.D, num_cols); lhs += D.array().square().matrix().asDiagonal(); } + event_logger.AddEvent("Product"); + // Use dsyrk instead for the product. LinearSolver::Summary summary; summary.num_iterations = 1; summary.termination_type = TOLERANCE; VectorRef(x, num_cols) = lhs.selfadjointView<Eigen::Upper>().llt().solve(rhs); event_logger.AddEvent("Solve"); - return summary; } +LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { + EventLogger event_logger("DenseNormalCholeskySolver::Solve"); + + if (per_solve_options.D != NULL) { + // Temporarily append a diagonal block to the A matrix, but undo + // it before returning the matrix to the user. + A->AppendDiagonal(per_solve_options.D); + } + + const int num_cols = A->num_cols(); + Matrix lhs(num_cols, num_cols); + event_logger.AddEvent("Setup"); + + // lhs = A'A + // + // Note: This is a bit delicate, it assumes that the stride on this + // matrix is the same as the number of rows. + BLAS::SymmetricRankKUpdate(A->num_rows(), + num_cols, + A->values(), + true, + 1.0, + 0.0, + lhs.data()); + + if (per_solve_options.D != NULL) { + // Undo the modifications to the matrix A. + A->RemoveDiagonal(); + } + + // TODO(sameeragarwal): Replace this with a gemv call for true blasness. + // rhs = A'b + VectorRef(x, num_cols) = A->matrix().transpose() * ConstVectorRef(b, A->num_rows()); + event_logger.AddEvent("Product"); + + const int info = LAPACK::SolveInPlaceUsingCholesky(num_cols, lhs.data(), x); + event_logger.AddEvent("Solve"); + + LinearSolver::Summary summary; + summary.num_iterations = 1; + summary.termination_type = info == 0 ? TOLERANCE : FAILURE; + + event_logger.AddEvent("TearDown"); + return summary; +} } // namespace internal } // namespace ceres
diff --git a/internal/ceres/dense_normal_cholesky_solver.h b/internal/ceres/dense_normal_cholesky_solver.h index de47740..e35053f 100644 --- a/internal/ceres/dense_normal_cholesky_solver.h +++ b/internal/ceres/dense_normal_cholesky_solver.h
@@ -85,6 +85,18 @@ const LinearSolver::PerSolveOptions& per_solve_options, double* x); + LinearSolver::Summary SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + + LinearSolver::Summary SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + const LinearSolver::Options options_; CERES_DISALLOW_COPY_AND_ASSIGN(DenseNormalCholeskySolver); };
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc index 4ab75ab..c61a96f 100644 --- a/internal/ceres/dense_qr_solver.cc +++ b/internal/ceres/dense_qr_solver.cc
@@ -30,12 +30,13 @@ #include "ceres/dense_qr_solver.h" -#include <cstddef> +#include <cstddef> #include "Eigen/Dense" #include "ceres/dense_sparse_matrix.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -44,13 +45,26 @@ namespace internal { DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options) - : options_(options) {} + : options_(options) { + work_.resize(1); +} LinearSolver::Summary DenseQRSolver::SolveImpl( DenseSparseMatrix* A, const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + if (options_.dense_linear_algebra_library_type == EIGEN) { + return SolveUsingEigen(A, b, per_solve_options, x); + } else { + return SolveUsingLAPACK(A, b, per_solve_options, x); + } +} +LinearSolver::Summary DenseQRSolver::SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { EventLogger event_logger("DenseQRSolver::Solve"); const int num_rows = A->num_rows(); @@ -62,6 +76,65 @@ A->AppendDiagonal(per_solve_options.D); } + lhs_ = A->matrix(); + + if (per_solve_options.D != NULL) { + // Undo the modifications to the matrix A. + A->RemoveDiagonal(); + } + + // rhs = [b;0] to account for the additional rows in the lhs. + if (rhs_.rows() != lhs_.rows()) { + rhs_.resize(lhs_.rows()); + } + rhs_.setZero(); + rhs_.head(num_rows) = ConstVectorRef(b, num_rows); + + if (work_.rows() == 1) { + const int work_size = LAPACK::EstimateWorkSizeForQR(lhs_.rows(), lhs_.cols()); + VLOG(3) << "Working memory for Dense QR factorization: " + << work_size * sizeof(double); + work_.resize(work_size); + } + + const int info = LAPACK::SolveUsingQR(lhs_.rows(), + lhs_.cols(), + lhs_.data(), + work_.rows(), + work_.data(), + rhs_.data()); + event_logger.AddEvent("Solve"); + + LinearSolver::Summary summary; + summary.num_iterations = 1; + if (info == 0) { + VectorRef(x, num_cols) = rhs_.head(num_cols); + summary.termination_type = TOLERANCE; + } else { + summary.termination_type = FAILURE; + } + + event_logger.AddEvent("TearDown"); + return summary; +} + +LinearSolver::Summary DenseQRSolver::SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { + EventLogger event_logger("DenseQRSolver::Solve"); + + const int num_rows = A->num_rows(); + const int num_cols = A->num_cols(); + + + if (per_solve_options.D != NULL) { + // Temporarily append a diagonal block to the A matrix, but undo + // it before returning the matrix to the user. + A->AppendDiagonal(per_solve_options.D); + } + // rhs = [b;0] to account for the additional rows in the lhs. const int augmented_num_rows = num_rows + ((per_solve_options.D != NULL) ? num_cols : 0);
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h index f78fa72..e745c63 100644 --- a/internal/ceres/dense_qr_solver.h +++ b/internal/ceres/dense_qr_solver.h
@@ -90,8 +90,22 @@ const LinearSolver::PerSolveOptions& per_solve_options, double* x); + LinearSolver::Summary SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + + LinearSolver::Summary SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + const LinearSolver::Options options_; + ColMajorMatrix lhs_; Vector rhs_; + Vector work_; CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver); };
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc index d39d7db..0b1cb89 100644 --- a/internal/ceres/iterative_schur_complement_solver.cc +++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -97,8 +97,8 @@ Preconditioner::Options preconditioner_options; preconditioner_options.type = options_.preconditioner_type; - preconditioner_options.sparse_linear_algebra_library = - options_.sparse_linear_algebra_library; + preconditioner_options.sparse_linear_algebra_library_type = + options_.sparse_linear_algebra_library_type; preconditioner_options.num_threads = options_.num_threads; preconditioner_options.row_block_size = options_.row_block_size; preconditioner_options.e_block_size = options_.e_block_size;
diff --git a/internal/ceres/lapack.cc b/internal/ceres/lapack.cc new file mode 100644 index 0000000..6e7d21b --- /dev/null +++ b/internal/ceres/lapack.cc
@@ -0,0 +1,135 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#include "ceres/lapack.h" +#include "glog/logging.h" + +// C interface to the LAPACK Cholesky factorization and triangular solve. +extern "C" void dpotrf_(char* uplo, + int* n, + double* a, + int* lda, + int* info); + +extern "C" void dpotrs_(char* uplo, + int* n, + int* nrhs, + double* a, + int* lda, + double* b, + int* ldb, + int* info); + +extern "C" void dgels_(char* uplo, + int* m, + int* n, + int* nrhs, + double* a, + int* lda, + double* b, + int* ldb, + double* work, + int* lwork, + int* info); + + +namespace ceres { +namespace internal { + +int LAPACK::SolveInPlaceUsingCholesky(int num_rows, + const double* in_lhs, + double* rhs_and_solution) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a BLAS library."; + return -1; +#else + char uplo = 'L'; + int n = num_rows; + int info = 0; + int nrhs = 1; + double* lhs = const_cast<double*>(in_lhs); + + dpotrf_(&uplo, &n, lhs, &n, &info); + if (info != 0) { + LOG(INFO) << "Cholesky factorization (dpotrf) failed: " << info; + return info; + } + + dpotrs_(&uplo, &n, &nrhs, lhs, &n, rhs_and_solution, &n, &info); + if (info != 0) { + LOG(INFO) << "Triangular solve (dpotrs) failed: " << info; + } + + return info; +#endif +}; + +int LAPACK::EstimateWorkSizeForQR(int num_rows, int num_cols) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a LAPACK library."; + return -1; +#else + char trans = 'N'; + int nrhs = 1; + int lwork = -1; + double work; + int info = 0; + dgels_(&trans, &num_rows, &num_cols, &nrhs, NULL, &num_rows, NULL, &num_rows, &work, &lwork, &info); + CHECK_EQ(info, 0); + return work; +#endif +}; + +int LAPACK::SolveUsingQR(int num_rows, + int num_cols, + const double* in_lhs, + int work_size, + double* work, + double* rhs_and_solution) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a LAPACK library."; + return -1; +#else + char trans = 'N'; + int m = num_rows; + int n = num_cols; + int nrhs = 1; + int lda = num_rows; + int ldb = num_rows; + int info = 0; + double* lhs = const_cast<double*>(in_lhs); + + dgels_(&trans, &m, &n, &nrhs, lhs, &lda, rhs_and_solution, &ldb, work, &work_size, &info); + return info; +#endif +}; + +} // namespace internal +} // namespace ceres
diff --git a/internal/ceres/lapack.h b/internal/ceres/lapack.h new file mode 100644 index 0000000..4f3a88c --- /dev/null +++ b/internal/ceres/lapack.h
@@ -0,0 +1,88 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_INTERNAL_LAPACK_H_ +#define CERES_INTERNAL_LAPACK_H_ + +namespace ceres { +namespace internal { + +class LAPACK { + public: + // Solve + // + // lhs * solution = rhs + // + // using a Cholesky factorization. Here + // lhs is a symmetric positive definite matrix. It is assumed to be + // column major and only the lower triangular part of the matrix is + // referenced. + // + // This function uses the LAPACK dpotrf and dpotrs routines. + // + // The return value is zero if the solve is successful. + static int SolveInPlaceUsingCholesky(int num_rows, + const double* lhs, + double* rhs_and_solution); + + // The SolveUsingQR function requires a buffer for its temporary + // computation. This function given the size of the lhs matrix will + // return the size of the buffer needed. + static int EstimateWorkSizeForQR(int num_rows, int num_cols); + + // Solve + // + // lhs * solution = rhs + // + // using a dense QR factorization. lhs is an arbitrary (possibly + // rectangular) matrix with full column rank. + // + // work is an array of size work_size that this routine uses for its + // temporary storage. The optimal size of this array can be obtained + // by calling EstimateWorkSizeForQR. + // + // When calling, rhs_and_solution contains the rhs, and upon return + // the first num_col entries are the solution. + // + // This function uses the LAPACK dgels routine. + // + // The return value is zero if the solve is successful. + static int SolveUsingQR(int num_rows, + int num_cols, + const double* lhs, + int work_size, + double* work, + double* rhs_and_solution); +}; + +} // namespace internal +} // namespace ceres + +#endif // CERES_INTERNAL_LAPACK_H_
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h index 67bebe0..22691b3 100644 --- a/internal/ceres/linear_solver.h +++ b/internal/ceres/linear_solver.h
@@ -74,7 +74,8 @@ Options() : type(SPARSE_NORMAL_CHOLESKY), preconditioner_type(JACOBI), - sparse_linear_algebra_library(SUITE_SPARSE), + dense_linear_algebra_library_type(EIGEN), + sparse_linear_algebra_library_type(SUITE_SPARSE), use_postordering(false), min_num_iterations(1), max_num_iterations(1), @@ -89,7 +90,8 @@ PreconditionerType preconditioner_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; // See solver.h for information about this flag. bool use_postordering;
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc index 5dad438..59eaff8 100644 --- a/internal/ceres/partitioned_matrix_view.cc +++ b/internal/ceres/partitioned_matrix_view.cc
@@ -35,10 +35,10 @@ #include <algorithm> #include <cstring> #include <vector> -#include "ceres/blas.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/small_blas.h" #include "glog/logging.h" namespace ceres {
diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h index cb0a381..af64e3c 100644 --- a/internal/ceres/preconditioner.h +++ b/internal/ceres/preconditioner.h
@@ -48,7 +48,7 @@ struct Options { Options() : type(JACOBI), - sparse_linear_algebra_library(SUITE_SPARSE), + sparse_linear_algebra_library_type(SUITE_SPARSE), num_threads(1), row_block_size(Eigen::Dynamic), e_block_size(Eigen::Dynamic), @@ -57,7 +57,7 @@ PreconditionerType type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; // If possible, how many threads the preconditioner can use. int num_threads;
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h index 19c7541..8aa2a39 100644 --- a/internal/ceres/program_evaluator.h +++ b/internal/ceres/program_evaluator.h
@@ -86,13 +86,13 @@ #include <map> #include <string> #include <vector> -#include "ceres/blas.h" #include "ceres/execution_summary.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/parameter_block.h" #include "ceres/program.h" #include "ceres/residual_block.h" +#include "ceres/small_blas.h" namespace ceres { namespace internal {
diff --git a/internal/ceres/residual_block.cc b/internal/ceres/residual_block.cc index 649f3f7..621082a 100644 --- a/internal/ceres/residual_block.cc +++ b/internal/ceres/residual_block.cc
@@ -34,8 +34,6 @@ #include <algorithm> #include <cstddef> #include <vector> - -#include "ceres/blas.h" #include "ceres/corrector.h" #include "ceres/parameter_block.h" #include "ceres/residual_block_utils.h" @@ -44,6 +42,7 @@ #include "ceres/internal/fixed_array.h" #include "ceres/local_parameterization.h" #include "ceres/loss_function.h" +#include "ceres/small_blas.h" using Eigen::Dynamic;
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index 0df9304..c21d3b5 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc
@@ -47,6 +47,7 @@ #include "ceres/internal/eigen.h" #include "ceres/internal/port.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/schur_complement_solver.h" #include "ceres/suitesparse.h" @@ -130,15 +131,22 @@ return true; } - // TODO(sameeragarwal): Add proper error handling; this completely ignores - // the quality of the solution to the solve. - VectorRef(solution, num_rows) = - ConstMatrixRef(m->values(), num_rows, num_rows) - .selfadjointView<Eigen::Upper>() - .llt() - .solve(ConstVectorRef(rhs(), num_rows)); + if (options().dense_linear_algebra_library_type == EIGEN) { + // TODO(sameeragarwal): Add proper error handling; this completely ignores + // the quality of the solution to the solve. + VectorRef(solution, num_rows) = + ConstMatrixRef(m->values(), num_rows, num_rows) + .selfadjointView<Eigen::Upper>() + .llt() + .solve(ConstVectorRef(rhs(), num_rows)); + return true; + } - return true; + VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows); + const int info = LAPACK::SolveInPlaceUsingCholesky(num_rows, + m->values(), + solution); + return (info == 0); } #if !defined(CERES_NO_SUITESPARSE) || !defined(CERES_NO_CXSPARE) @@ -243,18 +251,18 @@ } bool SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { - switch (options().sparse_linear_algebra_library) { + switch (options().sparse_linear_algebra_library_type) { case SUITE_SPARSE: return SolveReducedLinearSystemUsingSuiteSparse(solution); case CX_SPARSE: return SolveReducedLinearSystemUsingCXSparse(solution); default: LOG(FATAL) << "Unknown sparse linear algebra library : " - << options().sparse_linear_algebra_library; + << options().sparse_linear_algebra_library_type; } LOG(FATAL) << "Unknown sparse linear algebra library : " - << options().sparse_linear_algebra_library; + << options().sparse_linear_algebra_library_type; return false; }
diff --git a/internal/ceres/schur_complement_solver_test.cc b/internal/ceres/schur_complement_solver_test.cc index 206d4b5..745ea8e 100644 --- a/internal/ceres/schur_complement_solver_test.cc +++ b/internal/ceres/schur_complement_solver_test.cc
@@ -87,7 +87,8 @@ int problem_id, bool regularization, ceres::LinearSolverType linear_solver_type, - ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + ceres::DenseLinearAlgebraLibraryType dense_linear_algebra_library_type, + ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_postordering) { SetUpFromProblemId(problem_id); LinearSolver::Options options; @@ -95,7 +96,10 @@ options.elimination_groups.push_back( A->block_structure()->cols.size() - num_eliminate_blocks); options.type = linear_solver_type; - options.sparse_linear_algebra_library = sparse_linear_algebra_library; + options.dense_linear_algebra_library_type = + dense_linear_algebra_library_type; + options.sparse_linear_algebra_library_type = + sparse_linear_algebra_library_type; options.use_postordering = use_postordering; scoped_ptr<LinearSolver> solver(LinearSolver::Create(options)); @@ -131,53 +135,65 @@ scoped_array<double> sol_d; }; -TEST_F(SchurComplementSolverTest, DenseSchurWithSmallProblem) { - ComputeAndCompareSolutions(2, false, DENSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, DENSE_SCHUR, SUITE_SPARSE, true); +TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithSmallProblem) { + ComputeAndCompareSolutions(2, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); } -TEST_F(SchurComplementSolverTest, DenseSchurWithLargeProblem) { - ComputeAndCompareSolutions(3, false, DENSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, DENSE_SCHUR, SUITE_SPARSE, true); +TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithLargeProblem) { + ComputeAndCompareSolutions(3, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); } +#ifndef CERES_NO_LAPACK +TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithSmallProblem) { + ComputeAndCompareSolutions(2, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); +} + +TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithLargeProblem) { + ComputeAndCompareSolutions(3, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); +} +#endif + #ifndef CERES_NO_SUITESPARSE TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblemNoPostOrdering) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, false); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, false); + ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblemPostOrdering) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblemNoPostOrdering) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, false); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, false); + ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblemPostOrdering) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); } #endif // CERES_NO_SUITESPARSE #ifndef CERES_NO_CXSPARSE TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblem) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblem) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); } #endif // CERES_NO_CXSPARSE
diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h index f072c88..c09b7fb 100644 --- a/internal/ceres/schur_eliminator_impl.h +++ b/internal/ceres/schur_eliminator_impl.h
@@ -51,8 +51,6 @@ #include <algorithm> #include <map> - -#include "ceres/blas.h" #include "ceres/block_random_access_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" @@ -61,6 +59,7 @@ #include "ceres/internal/scoped_ptr.h" #include "ceres/map_util.h" #include "ceres/schur_eliminator.h" +#include "ceres/small_blas.h" #include "ceres/stl_util.h" #include "Eigen/Dense" #include "glog/logging.h"
diff --git a/internal/ceres/small_blas.h b/internal/ceres/small_blas.h new file mode 100644 index 0000000..e14e664 --- /dev/null +++ b/internal/ceres/small_blas.h
@@ -0,0 +1,406 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Simple blas functions for use in the Schur Eliminator. These are +// fairly basic implementations which already yield a significant +// speedup in the eliminator performance. + +#ifndef CERES_INTERNAL_SMALL_BLAS_H_ +#define CERES_INTERNAL_SMALL_BLAS_H_ + +#include "ceres/internal/eigen.h" +#include "glog/logging.h" + +namespace ceres { +namespace internal { + +// Remove the ".noalias()" annotation from the matrix matrix +// mutliplies to produce a correct build with the Android NDK, +// including versions 6, 7, 8, and 8b, when built with STLPort and the +// non-standalone toolchain (i.e. ndk-build). This appears to be a +// compiler bug; if the workaround is not in place, the line +// +// block.noalias() -= A * B; +// +// gets compiled to +// +// block.noalias() += A * B; +// +// which breaks schur elimination. Introducing a temporary by removing the +// .noalias() annotation causes the issue to disappear. Tracking this +// issue down was tricky, since the test suite doesn't run when built with +// the non-standalone toolchain. +// +// TODO(keir): Make a reproduction case for this and send it upstream. +#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG +#define CERES_MAYBE_NOALIAS +#else +#define CERES_MAYBE_NOALIAS .noalias() +#endif + +// The following three macros are used to share code and reduce +// template junk across the various GEMM variants. +#define CERES_GEMM_BEGIN(name) \ + template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> \ + inline void name(const double* A, \ + const int num_row_a, \ + const int num_col_a, \ + const double* B, \ + const int num_row_b, \ + const int num_col_b, \ + double* C, \ + const int start_row_c, \ + const int start_col_c, \ + const int row_stride_c, \ + const int col_stride_c) + +#define CERES_GEMM_NAIVE_HEADER \ + DCHECK_GT(num_row_a, 0); \ + DCHECK_GT(num_col_a, 0); \ + DCHECK_GT(num_row_b, 0); \ + DCHECK_GT(num_col_b, 0); \ + DCHECK_GE(start_row_c, 0); \ + DCHECK_GE(start_col_c, 0); \ + DCHECK_GT(row_stride_c, 0); \ + DCHECK_GT(col_stride_c, 0); \ + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \ + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \ + DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \ + DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \ + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \ + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \ + const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \ + const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); + +#define CERES_GEMM_EIGEN_HEADER \ + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef \ + Aref(A, num_row_a, num_col_a); \ + const typename EigenTypes<kRowB, kColB>::ConstMatrixRef \ + Bref(B, num_row_b, num_col_b); \ + MatrixRef Cref(C, row_stride_c, col_stride_c); \ + +#define CERES_CALL_GEMM(name) \ + name<kRowA, kColA, kRowB, kColB, kOperation>( \ + A, num_row_a, num_col_a, \ + B, num_row_b, num_col_b, \ + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + + +// For the matrix-matrix functions below, there are three variants for +// each functionality. Foo, FooNaive and FooEigen. Foo is the one to +// be called by the user. FooNaive is a basic loop based +// implementation and FooEigen uses Eigen's implementation. Foo +// chooses between FooNaive and FooEigen depending on how many of the +// template arguments are fixed at compile time. Currently, FooEigen +// is called if all matrix dimensions are compile time +// constants. FooNaive is called otherwise. This leads to the best +// performance currently. +// +// The MatrixMatrixMultiply variants compute: +// +// C op A * B; +// +// The MatrixTransposeMatrixMultiply variants compute: +// +// C op A' * B +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA, kRowB, kColB) allow +// specialization of the loop at compile time. If this information is +// not available, then Eigen::Dynamic should be used as the template +// argument. +// +// kOperation = 1 -> C += A * B +// kOperation = -1 -> C -= A * B +// kOperation = 0 -> C = A * B +// +// The functions can write into matrices C which are larger than the +// matrix A * B. This is done by specifying the true size of C via +// row_stride_c and col_stride_c, and then indicating where A * B +// should be written into by start_row_c and start_col_c. +// +// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = +// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get +// +// ------------ +// ------------ +// ------------ +// ------------ +// -----xxxx--- +// -----xxxx--- +// -----xxxx--- +// ------------ +// ------------ +// ------------ +// +CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER + Eigen::Block<MatrixRef, kRowA, kColB> + block(Cref, start_row_c, start_col_c, num_row_a, num_col_b); + + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref * Bref; + } +} + +CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER + DCHECK_EQ(NUM_COL_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_ROW_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_COL_A; ++k) { + tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index] += tmp; + } else if (kOperation < 0) { + C[index] -= tmp; + } else { + C[index] = tmp; + } + } + } +} + +CERES_GEMM_BEGIN(MatrixMatrixMultiply) { +#ifdef CERES_NO_CUSTOM_BLAS + + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) + } else { + CERES_CALL_GEMM(MatrixMatrixMultiplyNaive) + } + +#endif +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER + Eigen::Block<MatrixRef, kColA, kColB> block(Cref, + start_row_c, start_col_c, + num_col_a, num_col_b); + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref; + } +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER + DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_COL_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_ROW_A; ++k) { + tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index]+= tmp; + } else if (kOperation < 0) { + C[index]-= tmp; + } else { + C[index]= tmp; + } + } + } +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) { +#ifdef CERES_NO_CUSTOM_BLAS + + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) + } else { + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive) + } + +#endif +} + +// Matrix-Vector multiplication +// +// c op A * b; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA) allow specialization of the +// loop at compile time. If this information is not available, then +// Eigen::Dynamic should be used as the template argument. +// +// kOperation = 1 -> c += A' * b +// kOperation = -1 -> c -= A' * b +// kOperation = 0 -> c = A' * b +template<int kRowA, int kColA, int kOperation> +inline void MatrixVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef + Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a); + typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.lazyProduct(bref); + } else { + cref = Aref.lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_ROW_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_COL_A; ++col) { + tmp += A[row * NUM_COL_A + col] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + +// Similar to MatrixVectorMultiply, except that A is transposed, i.e., +// +// c op A' * b; +template<int kRowA, int kColA, int kOperation> +inline void MatrixTransposeVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef + Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a); + typename EigenTypes<kColA>::VectorRef cref(c, num_col_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.transpose().lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.transpose().lazyProduct(bref); + } else { + cref = Aref.transpose().lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_COL_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_ROW_A; ++col) { + tmp += A[col * NUM_COL_A + row] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + + +#undef CERES_MAYBE_NOALIAS +#undef CERES_GEMM_BEGIN +#undef CERES_GEMM_EIGEN_HEADER +#undef CERES_GEMM_NAIVE_HEADER +#undef CERES_CALL_GEMM + +} // namespace internal +} // namespace ceres + +#endif // CERES_INTERNAL_SMALL_BLAS_H_
diff --git a/internal/ceres/blas_test.cc b/internal/ceres/small_blas_test.cc similarity index 99% rename from internal/ceres/blas_test.cc rename to internal/ceres/small_blas_test.cc index efa7e7b..b8b5bc5 100644 --- a/internal/ceres/blas_test.cc +++ b/internal/ceres/small_blas_test.cc
@@ -28,7 +28,7 @@ // // Author: keir@google.com (Keir Mierle) -#include "ceres/blas.h" +#include "ceres/small_blas.h" #include "gtest/gtest.h" #include "ceres/internal/eigen.h"
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc index 5d8447d..3b67746 100644 --- a/internal/ceres/solver.cc +++ b/internal/ceres/solver.cc
@@ -120,7 +120,8 @@ inner_iterations_used(false), preconditioner_type(IDENTITY), trust_region_strategy_type(LEVENBERG_MARQUARDT), - sparse_linear_algebra_library(SUITE_SPARSE), + dense_linear_algebra_library_type(EIGEN), + sparse_linear_algebra_library_type(SUITE_SPARSE), line_search_direction_type(LBFGS), line_search_type(ARMIJO) { } @@ -192,6 +193,15 @@ // TRUST_SEARCH HEADER StringAppendF(&report, "\nMinimizer %19s\n", "TRUST_REGION"); + + if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY || + linear_solver_type_used == DENSE_SCHUR || + linear_solver_type_used == DENSE_QR) { + StringAppendF(&report, "\nDense linear algebra library %15s\n", + DenseLinearAlgebraLibraryTypeToString( + dense_linear_algebra_library_type)); + } + if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY || linear_solver_type_used == SPARSE_SCHUR || (linear_solver_type_used == ITERATIVE_SCHUR && @@ -199,7 +209,7 @@ preconditioner_type == CLUSTER_TRIDIAGONAL))) { StringAppendF(&report, "\nSparse linear algebra library %15s\n", SparseLinearAlgebraLibraryTypeToString( - sparse_linear_algebra_library)); + sparse_linear_algebra_library_type)); } StringAppendF(&report, "Trust region strategy %19s",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc index d6ef731..bd731fe 100644 --- a/internal/ceres/solver_impl.cc +++ b/internal/ceres/solver_impl.cc
@@ -506,8 +506,10 @@ original_options.num_linear_solver_threads; summary->num_linear_solver_threads_used = options.num_linear_solver_threads; - summary->sparse_linear_algebra_library = - options.sparse_linear_algebra_library; + summary->dense_linear_algebra_library_type = + options.dense_linear_algebra_library_type; + summary->sparse_linear_algebra_library_type = + options.sparse_linear_algebra_library_type; summary->trust_region_strategy_type = options.trust_region_strategy_type; summary->dogleg_type = options.dogleg_type; @@ -1092,7 +1094,7 @@ if (IsSchurType(options->linear_solver_type)) { if (!ReorderProgramForSchurTypeLinearSolver( options->linear_solver_type, - options->sparse_linear_algebra_library, + options->sparse_linear_algebra_library_type, problem_impl->parameter_map(), linear_solver_ordering, transformed_program.get(), @@ -1104,7 +1106,7 @@ if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY) { if (!ReorderProgramForSparseNormalCholesky( - options->sparse_linear_algebra_library, + options->sparse_linear_algebra_library_type, linear_solver_ordering, transformed_program.get(), error)) { @@ -1134,9 +1136,32 @@ } } +#ifdef CERES_NO_LAPACK + if (options->linear_solver_type == DENSE_NORMAL_CHOLESKY && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_NORMAL_CHOLESKY with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } + + if (options->linear_solver_type == DENSE_QR && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_QR with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } + + if (options->linear_solver_type == DENSE_SCHUR && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_SCHUR with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } +#endif + #ifdef CERES_NO_SUITESPARSE if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY && - options->sparse_linear_algebra_library == SUITE_SPARSE) { + options->sparse_linear_algebra_library_type == SUITE_SPARSE) { *error = "Can't use SPARSE_NORMAL_CHOLESKY with SUITESPARSE because " "SuiteSparse was not enabled when Ceres was built."; return NULL; @@ -1157,7 +1182,7 @@ #ifdef CERES_NO_CXSPARSE if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY && - options->sparse_linear_algebra_library == CX_SPARSE) { + options->sparse_linear_algebra_library_type == CX_SPARSE) { *error = "Can't use SPARSE_NORMAL_CHOLESKY with CXSPARSE because " "CXSparse was not enabled when Ceres was built."; return NULL; @@ -1194,8 +1219,10 @@ options->max_linear_solver_iterations; linear_solver_options.type = options->linear_solver_type; linear_solver_options.preconditioner_type = options->preconditioner_type; - linear_solver_options.sparse_linear_algebra_library = - options->sparse_linear_algebra_library; + linear_solver_options.sparse_linear_algebra_library_type = + options->sparse_linear_algebra_library_type; + linear_solver_options.dense_linear_algebra_library_type = + options->dense_linear_algebra_library_type; linear_solver_options.use_postordering = options->use_postordering; // Ignore user's postordering preferences and force it to be true if @@ -1204,7 +1231,7 @@ // done. #if !defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CAMD) if (IsSchurType(linear_solver_options.type) && - linear_solver_options.sparse_linear_algebra_library == SUITE_SPARSE) { + linear_solver_options.sparse_linear_algebra_library_type == SUITE_SPARSE) { linear_solver_options.use_postordering = true; } #endif
diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc index d81858c..4f1e549 100644 --- a/internal/ceres/solver_impl_test.cc +++ b/internal/ceres/solver_impl_test.cc
@@ -592,7 +592,7 @@ #ifndef CERES_NO_SUITESPARSE options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; solver.reset(SolverImpl::CreateLinearSolver(&options, &error)); EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY); EXPECT_TRUE(solver.get() != NULL); @@ -600,7 +600,7 @@ #ifndef CERES_NO_CXSPARSE options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; solver.reset(SolverImpl::CreateLinearSolver(&options, &error)); EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY); EXPECT_TRUE(solver.get() != NULL);
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc index 9601142..cfb0f17 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -85,18 +85,18 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double * x) { - switch (options_.sparse_linear_algebra_library) { + switch (options_.sparse_linear_algebra_library_type) { case SUITE_SPARSE: return SolveImplUsingSuiteSparse(A, b, per_solve_options, x); case CX_SPARSE: return SolveImplUsingCXSparse(A, b, per_solve_options, x); default: LOG(FATAL) << "Unknown sparse linear algebra library : " - << options_.sparse_linear_algebra_library; + << options_.sparse_linear_algebra_library_type; } LOG(FATAL) << "Unknown sparse linear algebra library : " - << options_.sparse_linear_algebra_library; + << options_.sparse_linear_algebra_library_type; return LinearSolver::Summary(); }
diff --git a/internal/ceres/system_test.cc b/internal/ceres/system_test.cc index 095b51e..7b0e02d 100644 --- a/internal/ceres/system_test.cc +++ b/internal/ceres/system_test.cc
@@ -64,21 +64,21 @@ // Struct used for configuring the solver. struct SolverConfig { SolverConfig(LinearSolverType linear_solver_type, - SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_automatic_ordering) : linear_solver_type(linear_solver_type), - sparse_linear_algebra_library(sparse_linear_algebra_library), + sparse_linear_algebra_library_type(sparse_linear_algebra_library_type), use_automatic_ordering(use_automatic_ordering), preconditioner_type(IDENTITY), num_threads(1) { } SolverConfig(LinearSolverType linear_solver_type, - SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_automatic_ordering, PreconditionerType preconditioner_type) : linear_solver_type(linear_solver_type), - sparse_linear_algebra_library(sparse_linear_algebra_library), + sparse_linear_algebra_library_type(sparse_linear_algebra_library_type), use_automatic_ordering(use_automatic_ordering), preconditioner_type(preconditioner_type), num_threads(1) { @@ -88,14 +88,14 @@ return StringPrintf( "(%s, %s, %s, %s, %d)", LinearSolverTypeToString(linear_solver_type), - SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library), + SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library_type), use_automatic_ordering ? "AUTOMATIC" : "USER", PreconditionerTypeToString(preconditioner_type), num_threads); } LinearSolverType linear_solver_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; bool use_automatic_ordering; PreconditionerType preconditioner_type; int num_threads; @@ -130,8 +130,8 @@ Solver::Options& options = *(system_test_problem->mutable_solver_options()); options.linear_solver_type = config.linear_solver_type; - options.sparse_linear_algebra_library = - config.sparse_linear_algebra_library; + options.sparse_linear_algebra_library_type = + config.sparse_linear_algebra_library_type; options.preconditioner_type = config.preconditioner_type; options.num_threads = config.num_threads; options.num_linear_solver_threads = config.num_threads; @@ -281,9 +281,9 @@ TEST(SystemTest, PowellsFunction) { vector<SolverConfig> configs; -#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering) \ - configs.push_back(SolverConfig(linear_solver, \ - sparse_linear_algebra_library, \ +#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering) \ + configs.push_back(SolverConfig(linear_solver, \ + sparse_linear_algebra_library_type, \ ordering)) CONFIGURE(DENSE_QR, SUITE_SPARSE, kAutomaticOrdering); @@ -485,9 +485,9 @@ TEST(SystemTest, BundleAdjustmentProblem) { vector<SolverConfig> configs; -#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering, preconditioner) \ +#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering, preconditioner) \ configs.push_back(SolverConfig(linear_solver, \ - sparse_linear_algebra_library, \ + sparse_linear_algebra_library_type, \ ordering, \ preconditioner))
diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc index 164185e..a97f1a5 100644 --- a/internal/ceres/types.cc +++ b/internal/ceres/types.cc
@@ -101,7 +101,6 @@ } } - bool StringToSparseLinearAlgebraLibraryType( string value, SparseLinearAlgebraLibraryType* type) { @@ -111,6 +110,25 @@ return false; } +const char* DenseLinearAlgebraLibraryTypeToString( + DenseLinearAlgebraLibraryType type) { + switch (type) { + CASESTR(EIGEN); + CASESTR(LAPACK); + default: + return "UNKNOWN"; + } +} + +bool StringToDenseLinearAlgebraLibraryType( + string value, + DenseLinearAlgebraLibraryType* type) { + UpperCase(&value); + STRENUM(EIGEN); + STRENUM(LAPACK); + return false; +} + const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) { switch (type) { CASESTR(LEVENBERG_MARQUARDT); @@ -318,4 +336,21 @@ return false; } +bool IsDenseLinearAlgebraLibraryTypeAvailable( + DenseLinearAlgebraLibraryType type) { + if (type == EIGEN) { + return true; + } + if (type == LAPACK) { +#ifdef CERES_NO_LAPACK + return false; +#else + return true; +#endif + } + + LOG(WARNING) << "Unknown dense linear algebra library " << type; + return false; +} + } // namespace ceres
diff --git a/internal/ceres/unsymmetric_linear_solver_test.cc b/internal/ceres/unsymmetric_linear_solver_test.cc index 232f34c..af9dffe 100644 --- a/internal/ceres/unsymmetric_linear_solver_test.cc +++ b/internal/ceres/unsymmetric_linear_solver_test.cc
@@ -118,23 +118,41 @@ scoped_array<double> sol_regularized_; }; -TEST_F(UnsymmetricLinearSolverTest, DenseQR) { +TEST_F(UnsymmetricLinearSolverTest, EigenDenseQR) { LinearSolver::Options options; options.type = DENSE_QR; + options.dense_linear_algebra_library_type = EIGEN; TestSolver(options); } -TEST_F(UnsymmetricLinearSolverTest, DenseNormalCholesky) { +TEST_F(UnsymmetricLinearSolverTest, EigenDenseNormalCholesky) { LinearSolver::Options options; + options.dense_linear_algebra_library_type = EIGEN; options.type = DENSE_NORMAL_CHOLESKY; TestSolver(options); } +#ifndef CERES_NO_LAPACK +TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseQR) { + LinearSolver::Options options; + options.type = DENSE_QR; + options.dense_linear_algebra_library_type = LAPACK; + TestSolver(options); +} + +TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseNormalCholesky) { + LinearSolver::Options options; + options.dense_linear_algebra_library_type = LAPACK; + options.type = DENSE_NORMAL_CHOLESKY; + TestSolver(options); +} +#endif + #ifndef CERES_NO_SUITESPARSE TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingSuiteSparsePreOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = false; TestSolver(options); @@ -143,7 +161,7 @@ TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingSuiteSparsePostOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = true; TestSolver(options); @@ -154,7 +172,7 @@ TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingCXSparsePreOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = false; TestSolver(options); @@ -163,7 +181,7 @@ TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingCXSparsePostOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = true; TestSolver(options);
diff --git a/jni/Android.mk b/jni/Android.mk index b881d88..d628177 100644 --- a/jni/Android.mk +++ b/jni/Android.mk
@@ -99,6 +99,7 @@ LOCAL_CPP_EXTENSION := .cc LOCAL_CFLAGS := $(CERES_EXTRA_DEFINES) \ + -DCERES_NO_LAPACK \ -DCERES_NO_SUITESPARSE \ -DCERES_NO_GFLAGS \ -DCERES_NO_THREADS \