Faster BlockRandomAccessSparseMatrix::SymmetricRightMultiply. Trade a small amount of memory to improve the cache coherency of the SymmetricRightMultiply operation. The resulting code leads to a 10-20% speedup in the linear solver end to end. Change-Id: I8ab2fe152099e849b211b5b19e4ef9f03d8e7f1c
diff --git a/internal/ceres/block_random_access_sparse_matrix.cc b/internal/ceres/block_random_access_sparse_matrix.cc index 9da16a4..c43a9b7 100644 --- a/internal/ceres/block_random_access_sparse_matrix.cc +++ b/internal/ceres/block_random_access_sparse_matrix.cc
@@ -88,6 +88,8 @@ ++it) { const int row_block_size = blocks_[it->first]; const int col_block_size = blocks_[it->second]; + cell_values_.push_back(make_pair(make_pair(it->first, it->second), + values + pos)); layout_[IntPairToLong(it->first, it->second)] = new CellInfo(values + pos); pos += row_block_size * col_block_size; @@ -156,20 +158,19 @@ void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x, double* y) const { - for (LayoutType::const_iterator it = layout_.begin(); - it != layout_.end(); - ++it) { - int row; - int col; - LongToIntPair(it->first, &row, &col); - + vector< pair<pair<int, int>, double*> >::const_iterator it = + cell_values_.begin(); + for (; it != cell_values_.end(); ++it) { + const int row = it->first.first; const int row_block_size = blocks_[row]; const int row_block_pos = block_positions_[row]; + + const int col = it->first.second; const int col_block_size = blocks_[col]; const int col_block_pos = block_positions_[col]; MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( - it->second->values, row_block_size, col_block_size, + it->second, row_block_size, col_block_size, x + col_block_pos, y + row_block_pos); @@ -179,7 +180,7 @@ // triangular multiply also. if (row != col) { MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( - it->second->values, row_block_size, col_block_size, + it->second, row_block_size, col_block_size, x + row_block_pos, y + col_block_pos); }
diff --git a/internal/ceres/block_random_access_sparse_matrix.h b/internal/ceres/block_random_access_sparse_matrix.h index a4f89d8..51b5d20 100644 --- a/internal/ceres/block_random_access_sparse_matrix.h +++ b/internal/ceres/block_random_access_sparse_matrix.h
@@ -111,6 +111,10 @@ typedef HashMap<long int, CellInfo* > LayoutType; LayoutType layout_; + // In order traversal of contents of the matrix. This allows us to + // implement a matrix-vector which is 20% faster than using the + // iterator in the Layout object instead. + vector<pair<pair<int, int>, double*> > cell_values_; // The underlying matrix object which actually stores the cells. scoped_ptr<TripletSparseMatrix> tsm_;