Parallel updates to block-diagonal EtE FtF -------------------------------------------------------------------------------- Benchmark Time -------------------------------------------------------------------------------- PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/1 5056275941 ns PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/2 3677677097 ns PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/4 1932236015 ns PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/8 984585015 ns PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/16 614918752 ns PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/1 449324491 ns PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/2 273147462 ns PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/4 150742698 ns PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/8 81602564 ns PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/16 47010769 ns PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/1 774598200 ns PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/2 611312877 ns PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/4 326701149 ns PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/8 165634457 ns PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/16 90631068 ns PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/1 80651817 ns PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/2 49688691 ns PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/4 27199153 ns PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/8 14301768 ns PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/16 8683479 ns PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/1 104422529 ns PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/2 81555176 ns PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/4 43227593 ns PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/8 22177895 ns PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/16 12505813 ns PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/1 14205253 ns PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/2 7102357 ns PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/4 3806598 ns PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/8 2112615 ns PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/16 1245771 ns PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/1 190102870 ns PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/2 157359897 ns PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/4 82657662 ns PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/8 42746490 ns PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/16 23434967 ns PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/1 18894355 ns PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/2 12138228 ns PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/4 6808771 ns PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/8 3829718 ns PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/16 2103688 ns PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/1 34230036 ns PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/2 20121184 ns PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/4 10899938 ns PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/8 6186359 ns PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/16 4207103 ns PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/1 3077110 ns PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/2 2224104 ns PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/4 1274841 ns PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/8 721140 ns PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/16 437715 ns Change-Id: If5a342a063869bd9c0505bf96b6d957da5169c1d
diff --git a/internal/ceres/evaluation_benchmark.cc b/internal/ceres/evaluation_benchmark.cc index 9849b5a..9717d0f 100644 --- a/internal/ceres/evaluation_benchmark.cc +++ b/internal/ceres/evaluation_benchmark.cc
@@ -147,19 +147,28 @@ const PartitionedView* PartitionedMatrixViewJacobian( const LinearSolver::Options& options) { - auto block_sparse = BlockSparseJacobian(options.context); + auto block_sparse = BlockSparseJacobianWithTranspose(options.context); partitioned_view_jacobian = std::make_unique<PartitionedView>(options, *block_sparse); return partitioned_view_jacobian.get(); } - const PartitionedView* PartitionedMatrixViewJacobianWithTranspose( - const LinearSolver::Options& options) { - auto block_sparse_transpose = - BlockSparseJacobianWithTranspose(options.context); - partitioned_view_jacobian_with_transpose = - std::make_unique<PartitionedView>(options, *block_sparse_transpose); - return partitioned_view_jacobian_with_transpose.get(); + BlockSparseMatrix* BlockDiagonalEtE(const LinearSolver::Options& options) { + if (!block_diagonal_ete) { + auto partitioned_view = + PartitionedMatrixViewJacobian(options); + block_diagonal_ete = partitioned_view->CreateBlockDiagonalEtE(); + } + return block_diagonal_ete.get(); + } + + BlockSparseMatrix* BlockDiagonalFtF(const LinearSolver::Options& options) { + if (!block_diagonal_ftf) { + auto partitioned_view = + PartitionedMatrixViewJacobian(options); + block_diagonal_ftf = partitioned_view->CreateBlockDiagonalFtF(); + } + return block_diagonal_ftf.get(); } Vector parameters; @@ -169,7 +178,8 @@ std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian_with_transpose; std::unique_ptr<CompressedRowSparseMatrix> crs_jacobian; std::unique_ptr<PartitionedView> partitioned_view_jacobian; - std::unique_ptr<PartitionedView> partitioned_view_jacobian_with_transpose; + std::unique_ptr<BlockSparseMatrix> block_diagonal_ete; + std::unique_ptr<BlockSparseMatrix> block_diagonal_ftf; }; static void Residuals(benchmark::State& state, @@ -262,7 +272,7 @@ options.num_threads = state.range(0); options.elimination_groups.push_back(data->bal_problem->num_points()); options.context = context; - auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options); + auto jacobian = data->PartitionedMatrixViewJacobian(options); Vector y = Vector::Zero(jacobian->num_cols_f()); Vector x = Vector::Random(jacobian->num_rows()); @@ -298,7 +308,7 @@ options.num_threads = state.range(0); options.elimination_groups.push_back(data->bal_problem->num_points()); options.context = context; - auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options); + auto jacobian = data->PartitionedMatrixViewJacobian(options); Vector y = Vector::Zero(jacobian->num_cols_e()); Vector x = Vector::Random(jacobian->num_rows()); @@ -309,6 +319,36 @@ CHECK_GT(y.squaredNorm(), 0.); } +static void PMVUpdateBlockDiagonalEtE(benchmark::State& state, + BALData* data, + ContextImpl* context) { + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobian(options); + auto block_diagonal_ete = data->BlockDiagonalEtE(options); + + for (auto _ : state) { + jacobian->UpdateBlockDiagonalEtE(block_diagonal_ete); + } +} + +static void PMVUpdateBlockDiagonalFtF(benchmark::State& state, + BALData* data, + ContextImpl* context) { + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobian(options); + auto block_diagonal_ftf = data->BlockDiagonalFtF(options); + + for (auto _ : state) { + jacobian->UpdateBlockDiagonalFtF(block_diagonal_ftf); + } +} + static void JacobianRightMultiplyAndAccumulate(benchmark::State& state, BALData* data, ContextImpl* context) { @@ -485,6 +525,30 @@ ->Arg(8) ->Arg(16); + const std::string name_update_block_diagonal_ftf = + "PMVUpdateBlockDiagonalFtF<" + path + ">"; + ::benchmark::RegisterBenchmark(name_update_block_diagonal_ftf.c_str(), + ceres::internal::PMVUpdateBlockDiagonalFtF, + data, + &context) + ->Arg(1) + ->Arg(2) + ->Arg(4) + ->Arg(8) + ->Arg(16); + + const std::string name_update_block_diagonal_ete = + "PMVUpdateBlockDiagonalEtE<" + path + ">"; + ::benchmark::RegisterBenchmark(name_update_block_diagonal_ete.c_str(), + ceres::internal::PMVUpdateBlockDiagonalEtE, + data, + &context) + ->Arg(1) + ->Arg(2) + ->Arg(4) + ->Arg(8) + ->Arg(16); + #ifndef CERES_NO_CUDA const std::string name_right_product_cuda = "JacobianRightMultiplyAndAccumulateCuda<" + path + ">";
diff --git a/internal/ceres/partitioned_matrix_view.h b/internal/ceres/partitioned_matrix_view.h index f34fd4a..380b576 100644 --- a/internal/ceres/partitioned_matrix_view.h +++ b/internal/ceres/partitioned_matrix_view.h
@@ -166,7 +166,15 @@ std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const final; std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalFtF() const final; void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final; + void UpdateBlockDiagonalEtESingleThreaded( + BlockSparseMatrix* block_diagonal) const; + void UpdateBlockDiagonalEtEMultiThreaded( + BlockSparseMatrix* block_diagonal) const; void UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const final; + void UpdateBlockDiagonalFtFSingleThreaded( + BlockSparseMatrix* block_diagonal) const; + void UpdateBlockDiagonalFtFMultiThreaded( + BlockSparseMatrix* block_diagonal) const; // clang-format off int num_col_blocks_e() const final { return num_col_blocks_e_; } int num_col_blocks_f() const final { return num_col_blocks_f_; }
diff --git a/internal/ceres/partitioned_matrix_view_impl.h b/internal/ceres/partitioned_matrix_view_impl.h index 1ca2e14..e685f5b 100644 --- a/internal/ceres/partitioned_matrix_view_impl.h +++ b/internal/ceres/partitioned_matrix_view_impl.h
@@ -444,10 +444,10 @@ // template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - const CompressedRowBlockStructure* block_diagonal_structure = - block_diagonal->block_structure(); + UpdateBlockDiagonalEtESingleThreaded( + BlockSparseMatrix* block_diagonal) const { + auto bs = matrix_.block_structure(); + auto block_diagonal_structure = block_diagonal->block_structure(); block_diagonal->SetZero(); const double* values = matrix_.values(); @@ -470,6 +470,57 @@ } } +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + UpdateBlockDiagonalEtEMultiThreaded( + BlockSparseMatrix* block_diagonal) const { + auto transpose_block_structure = matrix_.transpose_block_structure(); + CHECK(transpose_block_structure != nullptr); + auto block_diagonal_structure = block_diagonal->block_structure(); + + const double* values = matrix_.values(); + double* values_diagonal = block_diagonal->mutable_values(); + ParallelFor( + options_.context, + 0, + num_col_blocks_e_, + options_.num_threads, + [values, + transpose_block_structure, + values_diagonal, + block_diagonal_structure](int col_block_id) { + int cell_position = + block_diagonal_structure->rows[col_block_id].cells[0].position; + double* cell_values = values_diagonal + cell_position; + int col_block_size = + transpose_block_structure->rows[col_block_id].block.size; + auto& cells = transpose_block_structure->rows[col_block_id].cells; + MatrixRef(cell_values, col_block_size, col_block_size).setZero(); + + for (auto& c : cells) { + int row_block_size = transpose_block_structure->cols[c.block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( + values + c.position, row_block_size, col_block_size, + values + c.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + }, + e_cols_partition_); +} + +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { + if (options_.num_threads == 1) { + UpdateBlockDiagonalEtESingleThreaded(block_diagonal); + } else { + CHECK(options_.context != nullptr); + UpdateBlockDiagonalEtEMultiThreaded(block_diagonal); + } +} + // Similar to the code in RightMultiplyAndAccumulateF, except instead of the // matrix vector multiply its an outer product. // @@ -477,10 +528,10 @@ // template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - const CompressedRowBlockStructure* block_diagonal_structure = - block_diagonal->block_structure(); + UpdateBlockDiagonalFtFSingleThreaded( + BlockSparseMatrix* block_diagonal) const { + auto bs = matrix_.block_structure(); + auto block_diagonal_structure = block_diagonal->block_structure(); block_diagonal->SetZero(); const double* values = matrix_.values(); @@ -527,4 +578,82 @@ } } +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + UpdateBlockDiagonalFtFMultiThreaded( + BlockSparseMatrix* block_diagonal) const { + auto transpose_block_structure = matrix_.transpose_block_structure(); + CHECK(transpose_block_structure != nullptr); + auto block_diagonal_structure = block_diagonal->block_structure(); + + const double* values = matrix_.values(); + double* values_diagonal = block_diagonal->mutable_values(); + + const int num_col_blocks_e = num_col_blocks_e_; + const int num_row_blocks_e = num_row_blocks_e_; + ParallelFor( + options_.context, + num_col_blocks_e_, + num_col_blocks_e + num_col_blocks_f_, + options_.num_threads, + [transpose_block_structure, + block_diagonal_structure, + num_col_blocks_e, + num_row_blocks_e, + values, + values_diagonal](int col_block_id) { + const int col_block_size = + transpose_block_structure->rows[col_block_id].block.size; + const int diagonal_block_id = col_block_id - num_col_blocks_e; + const int cell_position = + block_diagonal_structure->rows[diagonal_block_id].cells[0].position; + double* cell_values = values_diagonal + cell_position; + + MatrixRef(cell_values, col_block_size, col_block_size).setZero(); + + auto& cells = transpose_block_structure->rows[col_block_id].cells; + const int num_cells = cells.size(); + int i = 0; + for (; i < num_cells; ++i) { + auto& cell = cells[i]; + const int row_block_id = cell.block_id; + if (row_block_id >= num_row_blocks_e) break; + const int row_block_size = + transpose_block_structure->cols[row_block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply + <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( + values + cell.position, row_block_size, col_block_size, + values + cell.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + for (; i < num_cells; ++i) { + auto& cell = cells[i]; + const int row_block_id = cell.block_id; + const int row_block_size = + transpose_block_structure->cols[row_block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + values + cell.position, row_block_size, col_block_size, + values + cell.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + }, + f_cols_partition_); +} + +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { + if (options_.num_threads == 1) { + UpdateBlockDiagonalFtFSingleThreaded(block_diagonal); + } else { + CHECK(options_.context != nullptr); + UpdateBlockDiagonalFtFMultiThreaded(block_diagonal); + } +} + } // namespace ceres::internal
diff --git a/internal/ceres/partitioned_matrix_view_test.cc b/internal/ceres/partitioned_matrix_view_test.cc index 3d10b1c..f74351e 100644 --- a/internal/ceres/partitioned_matrix_view_test.cc +++ b/internal/ceres/partitioned_matrix_view_test.cc
@@ -49,63 +49,6 @@ const double kEpsilon = 1e-14; -class PartitionedMatrixViewTest : public ::testing::Test { - protected: - void SetUp() final { - std::unique_ptr<LinearLeastSquaresProblem> problem = - CreateLinearLeastSquaresProblemFromId(2); - CHECK(problem != nullptr); - A_ = std::move(problem->A); - - num_cols_ = A_->num_cols(); - num_rows_ = A_->num_rows(); - num_eliminate_blocks_ = problem->num_eliminate_blocks; - LinearSolver::Options options; - options.elimination_groups.push_back(num_eliminate_blocks_); - pmv_ = PartitionedMatrixViewBase::Create( - options, *down_cast<BlockSparseMatrix*>(A_.get())); - } - - double RandDouble() { return distribution_(prng_); } - - int num_rows_; - int num_cols_; - int num_eliminate_blocks_; - std::unique_ptr<SparseMatrix> A_; - std::unique_ptr<PartitionedMatrixViewBase> pmv_; - std::mt19937 prng_; - std::uniform_real_distribution<double> distribution_ = - std::uniform_real_distribution<double>(0.0, 1.0); -}; - -TEST_F(PartitionedMatrixViewTest, BlockDiagonalEtE) { - std::unique_ptr<BlockSparseMatrix> block_diagonal_ee( - pmv_->CreateBlockDiagonalEtE()); - const CompressedRowBlockStructure* bs = block_diagonal_ee->block_structure(); - - EXPECT_EQ(block_diagonal_ee->num_rows(), 2); - EXPECT_EQ(block_diagonal_ee->num_cols(), 2); - EXPECT_EQ(bs->cols.size(), 2); - EXPECT_EQ(bs->rows.size(), 2); - - EXPECT_NEAR(block_diagonal_ee->values()[0], 10.0, kEpsilon); - EXPECT_NEAR(block_diagonal_ee->values()[1], 155.0, kEpsilon); -} - -TEST_F(PartitionedMatrixViewTest, BlockDiagonalFtF) { - std::unique_ptr<BlockSparseMatrix> block_diagonal_ff( - pmv_->CreateBlockDiagonalFtF()); - const CompressedRowBlockStructure* bs = block_diagonal_ff->block_structure(); - - EXPECT_EQ(block_diagonal_ff->num_rows(), 3); - EXPECT_EQ(block_diagonal_ff->num_cols(), 3); - EXPECT_EQ(bs->cols.size(), 3); - EXPECT_EQ(bs->rows.size(), 3); - EXPECT_NEAR(block_diagonal_ff->values()[0], 70.0, kEpsilon); - EXPECT_NEAR(block_diagonal_ff->values()[1], 17.0, kEpsilon); - EXPECT_NEAR(block_diagonal_ff->values()[2], 37.0, kEpsilon); -} - // Param = <problem_id, num_threads> using Param = ::testing::tuple<int, int>; @@ -116,7 +59,7 @@ return ss.str(); } -class PartitionedMatrixViewSpMVTest : public ::testing::TestWithParam<Param> { +class PartitionedMatrixViewTest : public ::testing::TestWithParam<Param> { protected: void SetUp() final { const int problem_id = ::testing::get<0>(GetParam()); @@ -132,6 +75,10 @@ options_.elimination_groups.push_back(problem->num_eliminate_blocks); pmv_ = PartitionedMatrixViewBase::Create(options_, *block_sparse); + LinearSolver::Options options_single_threaded = options_; + options_single_threaded.num_threads = 1; + pmv_single_threaded_ = PartitionedMatrixViewBase::Create(options_, *block_sparse); + EXPECT_EQ(pmv_->num_col_blocks_e(), problem->num_eliminate_blocks); EXPECT_EQ(pmv_->num_col_blocks_f(), block_sparse->block_structure()->cols.size() - @@ -147,12 +94,13 @@ std::unique_ptr<LinearLeastSquaresProblem> problem_; std::unique_ptr<SparseMatrix> A_; std::unique_ptr<PartitionedMatrixViewBase> pmv_; + std::unique_ptr<PartitionedMatrixViewBase> pmv_single_threaded_; std::mt19937 prng_; std::uniform_real_distribution<double> distribution_ = std::uniform_real_distribution<double>(0.0, 1.0); }; -TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateE) { +TEST_P(PartitionedMatrixViewTest, RightMultiplyAndAccumulateE) { Vector x1(pmv_->num_cols_e()); Vector x2(pmv_->num_cols()); x2.setZero(); @@ -172,7 +120,7 @@ } } -TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateF) { +TEST_P(PartitionedMatrixViewTest, RightMultiplyAndAccumulateF) { Vector x1(pmv_->num_cols_f()); Vector x2(pmv_->num_cols()); x2.setZero(); @@ -192,7 +140,7 @@ } } -TEST_P(PartitionedMatrixViewSpMVTest, LeftMultiplyAndAccumulate) { +TEST_P(PartitionedMatrixViewTest, LeftMultiplyAndAccumulate) { Vector x = Vector::Zero(pmv_->num_rows()); for (int i = 0; i < pmv_->num_rows(); ++i) { x(i) = RandDouble(); @@ -215,9 +163,109 @@ } } +TEST_P(PartitionedMatrixViewTest, BlockDiagonalFtF) { + std::unique_ptr<BlockSparseMatrix> block_diagonal_ff( + pmv_->CreateBlockDiagonalFtF()); + const auto bs_diagonal = block_diagonal_ff->block_structure(); + const int num_rows = pmv_->num_rows(); + const int num_cols_f = pmv_->num_cols_f(); + const int num_cols_e = pmv_->num_cols_e(); + const int num_col_blocks_f = pmv_->num_col_blocks_f(); + const int num_col_blocks_e = pmv_->num_col_blocks_e(); + + CHECK_EQ(block_diagonal_ff->num_rows(), num_cols_f); + CHECK_EQ(block_diagonal_ff->num_cols(), num_cols_f); + + EXPECT_EQ(bs_diagonal->cols.size(), num_col_blocks_f); + EXPECT_EQ(bs_diagonal->rows.size(), num_col_blocks_f); + + Matrix EF; + A_->ToDenseMatrix(&EF); + const auto F = EF.topRightCorner(num_rows, num_cols_f); + + Matrix expected_FtF = F.transpose() * F; + Matrix actual_FtF; + block_diagonal_ff->ToDenseMatrix(&actual_FtF); + + // FtF might be not block-diagonal + auto bs = down_cast<BlockSparseMatrix*>(A_.get())->block_structure(); + for (int i = 0; i < num_col_blocks_f; ++i) { + const auto col_block_f = bs->cols[num_col_blocks_e + i]; + const int block_size = col_block_f.size; + const int block_pos = col_block_f.position - num_cols_e; + const auto cell_expected = + expected_FtF.block(block_pos, block_pos, block_size, block_size); + auto cell_actual = + actual_FtF.block(block_pos, block_pos, block_size, block_size); + cell_actual -= cell_expected; + EXPECT_NEAR(cell_actual.norm(), 0., kEpsilon); + } + // There should be nothing remaining outside block-diagonal + EXPECT_NEAR(actual_FtF.norm(), 0., kEpsilon); +} + +TEST_P(PartitionedMatrixViewTest, BlockDiagonalEtE) { + std::unique_ptr<BlockSparseMatrix> block_diagonal_ee( + pmv_->CreateBlockDiagonalEtE()); + const CompressedRowBlockStructure* bs = block_diagonal_ee->block_structure(); + const int num_rows = pmv_->num_rows(); + const int num_cols_e = pmv_->num_cols_e(); + const int num_col_blocks_e = pmv_->num_col_blocks_e(); + + CHECK_EQ(block_diagonal_ee->num_rows(), num_cols_e); + CHECK_EQ(block_diagonal_ee->num_cols(), num_cols_e); + + EXPECT_EQ(bs->cols.size(), num_col_blocks_e); + EXPECT_EQ(bs->rows.size(), num_col_blocks_e); + + Matrix EF; + A_->ToDenseMatrix(&EF); + const auto E = EF.topLeftCorner(num_rows, num_cols_e); + + Matrix expected_EtE = E.transpose() * E; + Matrix actual_EtE; + block_diagonal_ee->ToDenseMatrix(&actual_EtE); + + EXPECT_NEAR((expected_EtE - actual_EtE).norm(), 0., kEpsilon); +} + +TEST_P(PartitionedMatrixViewTest, UpdateBlockDiagonalEtE) { + std::unique_ptr<BlockSparseMatrix> block_diagonal_ete( + pmv_->CreateBlockDiagonalEtE()); + const CompressedRowBlockStructure* bs = block_diagonal_ete->block_structure(); + const int num_cols = pmv_->num_cols_e(); + + Matrix multi_threaded(num_cols, num_cols); + pmv_->UpdateBlockDiagonalEtE(block_diagonal_ete.get()); + block_diagonal_ete->ToDenseMatrix(&multi_threaded); + + Matrix single_threaded(num_cols, num_cols); + pmv_single_threaded_->UpdateBlockDiagonalEtE(block_diagonal_ete.get()); + block_diagonal_ete->ToDenseMatrix(&single_threaded); + + EXPECT_NEAR((multi_threaded - single_threaded).norm(), 0., kEpsilon); +} + +TEST_P(PartitionedMatrixViewTest, UpdateBlockDiagonalFtF) { + std::unique_ptr<BlockSparseMatrix> block_diagonal_ftf( + pmv_->CreateBlockDiagonalFtF()); + const CompressedRowBlockStructure* bs = block_diagonal_ftf->block_structure(); + const int num_cols = pmv_->num_cols_f(); + + Matrix multi_threaded(num_cols, num_cols); + pmv_->UpdateBlockDiagonalFtF(block_diagonal_ftf.get()); + block_diagonal_ftf->ToDenseMatrix(&multi_threaded); + + Matrix single_threaded(num_cols, num_cols); + pmv_single_threaded_->UpdateBlockDiagonalFtF(block_diagonal_ftf.get()); + block_diagonal_ftf->ToDenseMatrix(&single_threaded); + + EXPECT_NEAR((multi_threaded - single_threaded).norm(), 0., kEpsilon); +} + INSTANTIATE_TEST_SUITE_P( ParallelProducts, - PartitionedMatrixViewSpMVTest, + PartitionedMatrixViewTest, ::testing::Combine(::testing::Values(2, 4, 6), ::testing::Values(1, 2, 3, 4, 5, 6, 7, 8)), ParamInfoToString);