CUDA partitioned matrix view Converts BlockSparseMatrix into two instances of CudaSparseMatrix, corresponding to left and right sub-matrix. Values of submatrix E are always just copied as-is, and values of submatrix F are copied if each row-block of F submatrix satisfies at least one of the following conditions: - There is atmost one cell in row-block - Row block has height of 1 row Otherwise, indices of values in CRS order corresponding to value indices in block-sparse order are computed on-the-fly. Change-Id: I14eee00c36ee74b6b83fc85927907641383abfc7
diff --git a/internal/ceres/block_jacobian_writer.cc b/internal/ceres/block_jacobian_writer.cc index f74d64d..b009e96 100644 --- a/internal/ceres/block_jacobian_writer.cc +++ b/internal/ceres/block_jacobian_writer.cc
@@ -54,6 +54,13 @@ // the first num_eliminate_blocks parameter blocks as indicated by the parameter // block ordering. The remaining parameter blocks are the F blocks. // +// In order to simplify handling block-sparse to CRS conversion, cells within +// the row-block of non-partitioned matrix are stored in memory sequentially in +// the order of increasing column-block id. In case of partitioned matrices, +// cells corresponding to F sub-matrix are stored sequentially in the order of +// increasing column-block id (with cells corresponding to E sub-matrix stored +// separately). +// // TODO(keir): Consider if we should use a boolean for each parameter block // instead of num_eliminate_blocks. void BuildJacobianLayout(const Program& program, @@ -95,29 +102,54 @@ int e_block_pos = 0; int* jacobian_pos = jacobian_layout_storage->data(); + std::vector<std::pair<int, int>> active_parameter_blocks; for (int i = 0; i < residual_blocks.size(); ++i) { const ResidualBlock* residual_block = residual_blocks[i]; const int num_residuals = residual_block->NumResiduals(); const int num_parameter_blocks = residual_block->NumParameterBlocks(); (*jacobian_layout)[i] = jacobian_pos; + // Cells from F sub-matrix are to be stored sequentially with increasing + // column block id. For each non-constant parameter block, a pair of indices + // (index in the list of active parameter blocks and index in the list of + // all parameter blocks) is computed, and index pairs are sorted by the + // index of corresponding column block id. + active_parameter_blocks.clear(); + active_parameter_blocks.reserve(num_parameter_blocks); for (int j = 0; j < num_parameter_blocks; ++j) { ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; - const int parameter_block_index = parameter_block->index(); if (parameter_block->IsConstant()) { continue; } + const int k = active_parameter_blocks.size(); + active_parameter_blocks.emplace_back(k, j); + } + std::sort(active_parameter_blocks.begin(), + active_parameter_blocks.end(), + [&residual_block](const std::pair<int, int>& a, + const std::pair<int, int>& b) { + return residual_block->parameter_blocks()[a.second]->index() < + residual_block->parameter_blocks()[b.second]->index(); + }); + // Cell positions for each active parameter block are filled in the order of + // active parameter block indices sorted by columnd block index. This + // guarantees that cells are laid out sequentially with increasing column + // block indices. + for (const auto& indices : active_parameter_blocks) { + const auto [k, j] = indices; + ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; + const int parameter_block_index = parameter_block->index(); const int jacobian_block_size = num_residuals * parameter_block->TangentSize(); if (parameter_block_index < num_eliminate_blocks) { - *jacobian_pos = e_block_pos; + jacobian_pos[k] = e_block_pos; e_block_pos += jacobian_block_size; } else { - *jacobian_pos = f_block_pos; + jacobian_pos[k] = f_block_pos; f_block_pos += jacobian_block_size; } - jacobian_pos++; } + jacobian_pos += active_parameter_blocks.size(); } }