Faster BlockRandomAccessSparseMatrix::SymmetricRightMultiply.
Trade a small amount of memory to improve the cache coherency of
the SymmetricRightMultiply operation.
The resulting code leads to a 10-20% speedup in the linear solver
end to end.
Change-Id: I8ab2fe152099e849b211b5b19e4ef9f03d8e7f1c
diff --git a/internal/ceres/block_random_access_sparse_matrix.cc b/internal/ceres/block_random_access_sparse_matrix.cc
index 9da16a4..c43a9b7 100644
--- a/internal/ceres/block_random_access_sparse_matrix.cc
+++ b/internal/ceres/block_random_access_sparse_matrix.cc
@@ -88,6 +88,8 @@
++it) {
const int row_block_size = blocks_[it->first];
const int col_block_size = blocks_[it->second];
+ cell_values_.push_back(make_pair(make_pair(it->first, it->second),
+ values + pos));
layout_[IntPairToLong(it->first, it->second)] =
new CellInfo(values + pos);
pos += row_block_size * col_block_size;
@@ -156,20 +158,19 @@
void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
double* y) const {
- for (LayoutType::const_iterator it = layout_.begin();
- it != layout_.end();
- ++it) {
- int row;
- int col;
- LongToIntPair(it->first, &row, &col);
-
+ vector< pair<pair<int, int>, double*> >::const_iterator it =
+ cell_values_.begin();
+ for (; it != cell_values_.end(); ++it) {
+ const int row = it->first.first;
const int row_block_size = blocks_[row];
const int row_block_pos = block_positions_[row];
+
+ const int col = it->first.second;
const int col_block_size = blocks_[col];
const int col_block_pos = block_positions_[col];
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
- it->second->values, row_block_size, col_block_size,
+ it->second, row_block_size, col_block_size,
x + col_block_pos,
y + row_block_pos);
@@ -179,7 +180,7 @@
// triangular multiply also.
if (row != col) {
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
- it->second->values, row_block_size, col_block_size,
+ it->second, row_block_size, col_block_size,
x + row_block_pos,
y + col_block_pos);
}
diff --git a/internal/ceres/block_random_access_sparse_matrix.h b/internal/ceres/block_random_access_sparse_matrix.h
index a4f89d8..51b5d20 100644
--- a/internal/ceres/block_random_access_sparse_matrix.h
+++ b/internal/ceres/block_random_access_sparse_matrix.h
@@ -111,6 +111,10 @@
typedef HashMap<long int, CellInfo* > LayoutType;
LayoutType layout_;
+ // In order traversal of contents of the matrix. This allows us to
+ // implement a matrix-vector which is 20% faster than using the
+ // iterator in the Layout object instead.
+ vector<pair<pair<int, int>, double*> > cell_values_;
// The underlying matrix object which actually stores the cells.
scoped_ptr<TripletSparseMatrix> tsm_;