CUDA partitioned matrix view
Converts BlockSparseMatrix into two instances of CudaSparseMatrix,
corresponding to left and right sub-matrix.
Values of submatrix E are always just copied as-is, and values of
submatrix F are copied if each row-block of F submatrix satisfies
at least one of the following conditions:
- There is atmost one cell in row-block
- Row block has height of 1 row
Otherwise, indices of values in CRS order corresponding to value indices
in block-sparse order are computed on-the-fly.
Change-Id: I14eee00c36ee74b6b83fc85927907641383abfc7
diff --git a/internal/ceres/block_jacobian_writer.cc b/internal/ceres/block_jacobian_writer.cc
index f74d64d..b009e96 100644
--- a/internal/ceres/block_jacobian_writer.cc
+++ b/internal/ceres/block_jacobian_writer.cc
@@ -54,6 +54,13 @@
// the first num_eliminate_blocks parameter blocks as indicated by the parameter
// block ordering. The remaining parameter blocks are the F blocks.
//
+// In order to simplify handling block-sparse to CRS conversion, cells within
+// the row-block of non-partitioned matrix are stored in memory sequentially in
+// the order of increasing column-block id. In case of partitioned matrices,
+// cells corresponding to F sub-matrix are stored sequentially in the order of
+// increasing column-block id (with cells corresponding to E sub-matrix stored
+// separately).
+//
// TODO(keir): Consider if we should use a boolean for each parameter block
// instead of num_eliminate_blocks.
void BuildJacobianLayout(const Program& program,
@@ -95,29 +102,54 @@
int e_block_pos = 0;
int* jacobian_pos = jacobian_layout_storage->data();
+ std::vector<std::pair<int, int>> active_parameter_blocks;
for (int i = 0; i < residual_blocks.size(); ++i) {
const ResidualBlock* residual_block = residual_blocks[i];
const int num_residuals = residual_block->NumResiduals();
const int num_parameter_blocks = residual_block->NumParameterBlocks();
(*jacobian_layout)[i] = jacobian_pos;
+ // Cells from F sub-matrix are to be stored sequentially with increasing
+ // column block id. For each non-constant parameter block, a pair of indices
+ // (index in the list of active parameter blocks and index in the list of
+ // all parameter blocks) is computed, and index pairs are sorted by the
+ // index of corresponding column block id.
+ active_parameter_blocks.clear();
+ active_parameter_blocks.reserve(num_parameter_blocks);
for (int j = 0; j < num_parameter_blocks; ++j) {
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
- const int parameter_block_index = parameter_block->index();
if (parameter_block->IsConstant()) {
continue;
}
+ const int k = active_parameter_blocks.size();
+ active_parameter_blocks.emplace_back(k, j);
+ }
+ std::sort(active_parameter_blocks.begin(),
+ active_parameter_blocks.end(),
+ [&residual_block](const std::pair<int, int>& a,
+ const std::pair<int, int>& b) {
+ return residual_block->parameter_blocks()[a.second]->index() <
+ residual_block->parameter_blocks()[b.second]->index();
+ });
+ // Cell positions for each active parameter block are filled in the order of
+ // active parameter block indices sorted by columnd block index. This
+ // guarantees that cells are laid out sequentially with increasing column
+ // block indices.
+ for (const auto& indices : active_parameter_blocks) {
+ const auto [k, j] = indices;
+ ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
+ const int parameter_block_index = parameter_block->index();
const int jacobian_block_size =
num_residuals * parameter_block->TangentSize();
if (parameter_block_index < num_eliminate_blocks) {
- *jacobian_pos = e_block_pos;
+ jacobian_pos[k] = e_block_pos;
e_block_pos += jacobian_block_size;
} else {
- *jacobian_pos = f_block_pos;
+ jacobian_pos[k] = f_block_pos;
f_block_pos += jacobian_block_size;
}
- jacobian_pos++;
}
+ jacobian_pos += active_parameter_blocks.size();
}
}