Replace more instances of Eigen GEMV with Ceres BLAS. With this ITERATIVE_SCHUR with JACOBI preconditioner went down from 280 seconds to 150 seconds on problem-744-543562-pre.txt. Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f234c7..eee67fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -654,9 +654,13 @@ "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter") ENDIF ("${UNIX}") -# We can be even stricter when using CLang +# Use a larger inlining threshold for Clang, since it hobbles Eigen, +# resulting in an unreasonably slow version of the blas routines. The +# -Qunused-arguments is needed because CMake passes the inline +# threshold to the linker and clang complains about it and dies. IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage") + SET(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -Qunused-arguments -mllvm -inline-threshold=600 -Wno-return-type-c-linkage") ENDIF() ADD_SUBDIRECTORY(internal/ceres)
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc index dbe5ec9..ab6fcef 100644 --- a/internal/ceres/block_sparse_matrix.cc +++ b/internal/ceres/block_sparse_matrix.cc
@@ -33,6 +33,7 @@ #include <cstddef> #include <algorithm> #include <vector> +#include "ceres/blas.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" #include "ceres/matrix_proto.h" @@ -117,16 +118,15 @@ for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - VectorRef yref(y + row_block_pos, row_block_size); const vector<Cell>& cells = block_structure_->rows[i].cells; for (int j = 0; j < cells.size(); ++j) { int col_block_id = cells[j].block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; - ConstVectorRef xref(x + col_block_pos, col_block_size); - MatrixRef m(values_.get() + cells[j].position, - row_block_size, col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + values_.get() + cells[j].position, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); } } } @@ -138,16 +138,16 @@ for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - const ConstVectorRef xref(x + row_block_pos, row_block_size); const vector<Cell>& cells = block_structure_->rows[i].cells; for (int j = 0; j < cells.size(); ++j) { int col_block_id = cells[j].block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; - VectorRef yref(y + col_block_pos, col_block_size); - MatrixRef m(values_.get() + cells[j].position, - row_block_size, col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + values_.get() + cells[j].position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos); + } } }
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc index 0722fc8..c488184 100644 --- a/internal/ceres/partitioned_matrix_view.cc +++ b/internal/ceres/partitioned_matrix_view.cc
@@ -35,6 +35,7 @@ #include <algorithm> #include <cstring> #include <vector> +#include "ceres/blas.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" @@ -103,13 +104,10 @@ const int col_block_id = cell.block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + col_block_pos, col_block_size); - VectorRef yref(y + row_block_pos, row_block_size); - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); } } @@ -124,20 +122,16 @@ for (int r = 0; r < bs->rows.size(); ++r) { const int row_block_pos = bs->rows[r].block.position; const int row_block_size = bs->rows[r].block.size; - VectorRef yref(y + row_block_pos, row_block_size); const vector<Cell>& cells = bs->rows[r].cells; for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const double* row_values = matrix_.RowBlockValues(r); const int col_block_id = cells[c].block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + col_block_pos - num_cols_e(), - col_block_size); - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + x + col_block_pos - num_cols_e(), + y + row_block_pos); } } } @@ -155,13 +149,10 @@ const int col_block_id = cell.block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + row_block_pos, row_block_size); - VectorRef yref(y + col_block_pos, col_block_size); - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos); } } @@ -176,19 +167,16 @@ for (int r = 0; r < bs->rows.size(); ++r) { const int row_block_pos = bs->rows[r].block.position; const int row_block_size = bs->rows[r].block.size; - ConstVectorRef xref(x + row_block_pos, row_block_size); const vector<Cell>& cells = bs->rows[r].cells; for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const double* row_values = matrix_.RowBlockValues(r); const int col_block_id = cells[c].block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size); - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos - num_cols_e()); } } } @@ -267,15 +255,15 @@ const int row_block_size = bs->rows[r].block.size; const int block_id = cell.block_id; const int col_block_size = bs->cols[block_id].size; - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - const int cell_position = block_diagonal_structure->rows[block_id].cells[0].position; - MatrixRef(block_diagonal->mutable_values() + cell_position, - col_block_size, col_block_size).noalias() += m.transpose() * m; + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + row_values + cell.position, row_block_size, col_block_size, + block_diagonal->mutable_values() + cell_position, + 0, 0, col_block_size, col_block_size); } } @@ -298,15 +286,16 @@ for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const int col_block_id = cells[c].block_id; const int col_block_size = bs->cols[col_block_id].size; - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); const int diagonal_block_id = col_block_id - num_col_blocks_e_; const int cell_position = block_diagonal_structure->rows[diagonal_block_id].cells[0].position; - MatrixRef(block_diagonal->mutable_values() + cell_position, - col_block_size, col_block_size).noalias() += m.transpose() * m; + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + row_values + cells[c].position, row_block_size, col_block_size, + block_diagonal->mutable_values() + cell_position, + 0, 0, col_block_size, col_block_size); } } }