Replace more instances of Eigen GEMV with Ceres BLAS.
With this ITERATIVE_SCHUR with JACOBI preconditioner went down from
280 seconds to 150 seconds on problem-744-543562-pre.txt.
Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f234c7..eee67fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -654,9 +654,13 @@
"${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter")
ENDIF ("${UNIX}")
-# We can be even stricter when using CLang
+# Use a larger inlining threshold for Clang, since it hobbles Eigen,
+# resulting in an unreasonably slow version of the blas routines. The
+# -Qunused-arguments is needed because CMake passes the inline
+# threshold to the linker and clang complains about it and dies.
IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage")
+ SET(CMAKE_CXX_FLAGS
+ "${CMAKE_CXX_FLAGS} -Qunused-arguments -mllvm -inline-threshold=600 -Wno-return-type-c-linkage")
ENDIF()
ADD_SUBDIRECTORY(internal/ceres)
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index dbe5ec9..ab6fcef 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc
@@ -33,6 +33,7 @@
#include <cstddef>
#include <algorithm>
#include <vector>
+#include "ceres/blas.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/matrix_proto.h"
@@ -117,16 +118,15 @@
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
- VectorRef yref(y + row_block_pos, row_block_size);
const vector<Cell>& cells = block_structure_->rows[i].cells;
for (int j = 0; j < cells.size(); ++j) {
int col_block_id = cells[j].block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
- ConstVectorRef xref(x + col_block_pos, col_block_size);
- MatrixRef m(values_.get() + cells[j].position,
- row_block_size, col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ values_.get() + cells[j].position, row_block_size, col_block_size,
+ x + col_block_pos,
+ y + row_block_pos);
}
}
}
@@ -138,16 +138,16 @@
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
- const ConstVectorRef xref(x + row_block_pos, row_block_size);
const vector<Cell>& cells = block_structure_->rows[i].cells;
for (int j = 0; j < cells.size(); ++j) {
int col_block_id = cells[j].block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
- VectorRef yref(y + col_block_pos, col_block_size);
- MatrixRef m(values_.get() + cells[j].position,
- row_block_size, col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ values_.get() + cells[j].position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos);
+
}
}
}
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc
index 0722fc8..c488184 100644
--- a/internal/ceres/partitioned_matrix_view.cc
+++ b/internal/ceres/partitioned_matrix_view.cc
@@ -35,6 +35,7 @@
#include <algorithm>
#include <cstring>
#include <vector>
+#include "ceres/blas.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
@@ -103,13 +104,10 @@
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + col_block_pos, col_block_size);
- VectorRef yref(y + row_block_pos, row_block_size);
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ x + col_block_pos,
+ y + row_block_pos);
}
}
@@ -124,20 +122,16 @@
for (int r = 0; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
- VectorRef yref(y + row_block_pos, row_block_size);
const vector<Cell>& cells = bs->rows[r].cells;
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const double* row_values = matrix_.RowBlockValues(r);
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + col_block_pos - num_cols_e(),
- col_block_size);
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ x + col_block_pos - num_cols_e(),
+ y + row_block_pos);
}
}
}
@@ -155,13 +149,10 @@
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + row_block_pos, row_block_size);
- VectorRef yref(y + col_block_pos, col_block_size);
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos);
}
}
@@ -176,19 +167,16 @@
for (int r = 0; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
- ConstVectorRef xref(x + row_block_pos, row_block_size);
const vector<Cell>& cells = bs->rows[r].cells;
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const double* row_values = matrix_.RowBlockValues(r);
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size);
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos - num_cols_e());
}
}
}
@@ -267,15 +255,15 @@
const int row_block_size = bs->rows[r].block.size;
const int block_id = cell.block_id;
const int col_block_size = bs->cols[block_id].size;
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
-
const int cell_position =
block_diagonal_structure->rows[block_id].cells[0].position;
- MatrixRef(block_diagonal->mutable_values() + cell_position,
- col_block_size, col_block_size).noalias() += m.transpose() * m;
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ row_values + cell.position, row_block_size, col_block_size,
+ block_diagonal->mutable_values() + cell_position,
+ 0, 0, col_block_size, col_block_size);
}
}
@@ -298,15 +286,16 @@
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_size = bs->cols[col_block_id].size;
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
- MatrixRef(block_diagonal->mutable_values() + cell_position,
- col_block_size, col_block_size).noalias() += m.transpose() * m;
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ row_values + cells[c].position, row_block_size, col_block_size,
+ block_diagonal->mutable_values() + cell_position,
+ 0, 0, col_block_size, col_block_size);
}
}
}