Parallel updates to block-diagonal EtE FtF
--------------------------------------------------------------------------------
Benchmark Time
--------------------------------------------------------------------------------
PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/1 5056275941 ns
PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/2 3677677097 ns
PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/4 1932236015 ns
PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/8 984585015 ns
PMVUpdateBlockDiagonalFtF<final/problem-13682-4456117-pre.txt>/16 614918752 ns
PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/1 449324491 ns
PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/2 273147462 ns
PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/4 150742698 ns
PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/8 81602564 ns
PMVUpdateBlockDiagonalEtE<final/problem-13682-4456117-pre.txt>/16 47010769 ns
PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/1 774598200 ns
PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/2 611312877 ns
PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/4 326701149 ns
PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/8 165634457 ns
PMVUpdateBlockDiagonalFtF<venice/problem-1778-993923-pre.txt>/16 90631068 ns
PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/1 80651817 ns
PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/2 49688691 ns
PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/4 27199153 ns
PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/8 14301768 ns
PMVUpdateBlockDiagonalEtE<venice/problem-1778-993923-pre.txt>/16 8683479 ns
PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/1 104422529 ns
PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/2 81555176 ns
PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/4 43227593 ns
PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/8 22177895 ns
PMVUpdateBlockDiagonalFtF<ladybug/problem-1723-156502-pre.txt>/16 12505813 ns
PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/1 14205253 ns
PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/2 7102357 ns
PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/4 3806598 ns
PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/8 2112615 ns
PMVUpdateBlockDiagonalEtE<ladybug/problem-1723-156502-pre.txt>/16 1245771 ns
PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/1 190102870 ns
PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/2 157359897 ns
PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/4 82657662 ns
PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/8 42746490 ns
PMVUpdateBlockDiagonalFtF<dubrovnik/problem-356-226730-pre.txt>/16 23434967 ns
PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/1 18894355 ns
PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/2 12138228 ns
PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/4 6808771 ns
PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/8 3829718 ns
PMVUpdateBlockDiagonalEtE<dubrovnik/problem-356-226730-pre.txt>/16 2103688 ns
PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/1 34230036 ns
PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/2 20121184 ns
PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/4 10899938 ns
PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/8 6186359 ns
PMVUpdateBlockDiagonalFtF<trafalgar/problem-257-65132-pre.txt>/16 4207103 ns
PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/1 3077110 ns
PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/2 2224104 ns
PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/4 1274841 ns
PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/8 721140 ns
PMVUpdateBlockDiagonalEtE<trafalgar/problem-257-65132-pre.txt>/16 437715 ns
Change-Id: If5a342a063869bd9c0505bf96b6d957da5169c1d
diff --git a/internal/ceres/evaluation_benchmark.cc b/internal/ceres/evaluation_benchmark.cc
index 9849b5a..9717d0f 100644
--- a/internal/ceres/evaluation_benchmark.cc
+++ b/internal/ceres/evaluation_benchmark.cc
@@ -147,19 +147,28 @@
const PartitionedView* PartitionedMatrixViewJacobian(
const LinearSolver::Options& options) {
- auto block_sparse = BlockSparseJacobian(options.context);
+ auto block_sparse = BlockSparseJacobianWithTranspose(options.context);
partitioned_view_jacobian =
std::make_unique<PartitionedView>(options, *block_sparse);
return partitioned_view_jacobian.get();
}
- const PartitionedView* PartitionedMatrixViewJacobianWithTranspose(
- const LinearSolver::Options& options) {
- auto block_sparse_transpose =
- BlockSparseJacobianWithTranspose(options.context);
- partitioned_view_jacobian_with_transpose =
- std::make_unique<PartitionedView>(options, *block_sparse_transpose);
- return partitioned_view_jacobian_with_transpose.get();
+ BlockSparseMatrix* BlockDiagonalEtE(const LinearSolver::Options& options) {
+ if (!block_diagonal_ete) {
+ auto partitioned_view =
+ PartitionedMatrixViewJacobian(options);
+ block_diagonal_ete = partitioned_view->CreateBlockDiagonalEtE();
+ }
+ return block_diagonal_ete.get();
+ }
+
+ BlockSparseMatrix* BlockDiagonalFtF(const LinearSolver::Options& options) {
+ if (!block_diagonal_ftf) {
+ auto partitioned_view =
+ PartitionedMatrixViewJacobian(options);
+ block_diagonal_ftf = partitioned_view->CreateBlockDiagonalFtF();
+ }
+ return block_diagonal_ftf.get();
}
Vector parameters;
@@ -169,7 +178,8 @@
std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian_with_transpose;
std::unique_ptr<CompressedRowSparseMatrix> crs_jacobian;
std::unique_ptr<PartitionedView> partitioned_view_jacobian;
- std::unique_ptr<PartitionedView> partitioned_view_jacobian_with_transpose;
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ete;
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ftf;
};
static void Residuals(benchmark::State& state,
@@ -262,7 +272,7 @@
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
- auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options);
+ auto jacobian = data->PartitionedMatrixViewJacobian(options);
Vector y = Vector::Zero(jacobian->num_cols_f());
Vector x = Vector::Random(jacobian->num_rows());
@@ -298,7 +308,7 @@
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
- auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options);
+ auto jacobian = data->PartitionedMatrixViewJacobian(options);
Vector y = Vector::Zero(jacobian->num_cols_e());
Vector x = Vector::Random(jacobian->num_rows());
@@ -309,6 +319,36 @@
CHECK_GT(y.squaredNorm(), 0.);
}
+static void PMVUpdateBlockDiagonalEtE(benchmark::State& state,
+ BALData* data,
+ ContextImpl* context) {
+ LinearSolver::Options options;
+ options.num_threads = state.range(0);
+ options.elimination_groups.push_back(data->bal_problem->num_points());
+ options.context = context;
+ auto jacobian = data->PartitionedMatrixViewJacobian(options);
+ auto block_diagonal_ete = data->BlockDiagonalEtE(options);
+
+ for (auto _ : state) {
+ jacobian->UpdateBlockDiagonalEtE(block_diagonal_ete);
+ }
+}
+
+static void PMVUpdateBlockDiagonalFtF(benchmark::State& state,
+ BALData* data,
+ ContextImpl* context) {
+ LinearSolver::Options options;
+ options.num_threads = state.range(0);
+ options.elimination_groups.push_back(data->bal_problem->num_points());
+ options.context = context;
+ auto jacobian = data->PartitionedMatrixViewJacobian(options);
+ auto block_diagonal_ftf = data->BlockDiagonalFtF(options);
+
+ for (auto _ : state) {
+ jacobian->UpdateBlockDiagonalFtF(block_diagonal_ftf);
+ }
+}
+
static void JacobianRightMultiplyAndAccumulate(benchmark::State& state,
BALData* data,
ContextImpl* context) {
@@ -485,6 +525,30 @@
->Arg(8)
->Arg(16);
+ const std::string name_update_block_diagonal_ftf =
+ "PMVUpdateBlockDiagonalFtF<" + path + ">";
+ ::benchmark::RegisterBenchmark(name_update_block_diagonal_ftf.c_str(),
+ ceres::internal::PMVUpdateBlockDiagonalFtF,
+ data,
+ &context)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(4)
+ ->Arg(8)
+ ->Arg(16);
+
+ const std::string name_update_block_diagonal_ete =
+ "PMVUpdateBlockDiagonalEtE<" + path + ">";
+ ::benchmark::RegisterBenchmark(name_update_block_diagonal_ete.c_str(),
+ ceres::internal::PMVUpdateBlockDiagonalEtE,
+ data,
+ &context)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(4)
+ ->Arg(8)
+ ->Arg(16);
+
#ifndef CERES_NO_CUDA
const std::string name_right_product_cuda =
"JacobianRightMultiplyAndAccumulateCuda<" + path + ">";
diff --git a/internal/ceres/partitioned_matrix_view.h b/internal/ceres/partitioned_matrix_view.h
index f34fd4a..380b576 100644
--- a/internal/ceres/partitioned_matrix_view.h
+++ b/internal/ceres/partitioned_matrix_view.h
@@ -166,7 +166,15 @@
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const final;
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalFtF() const final;
void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final;
+ void UpdateBlockDiagonalEtESingleThreaded(
+ BlockSparseMatrix* block_diagonal) const;
+ void UpdateBlockDiagonalEtEMultiThreaded(
+ BlockSparseMatrix* block_diagonal) const;
void UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const final;
+ void UpdateBlockDiagonalFtFSingleThreaded(
+ BlockSparseMatrix* block_diagonal) const;
+ void UpdateBlockDiagonalFtFMultiThreaded(
+ BlockSparseMatrix* block_diagonal) const;
// clang-format off
int num_col_blocks_e() const final { return num_col_blocks_e_; }
int num_col_blocks_f() const final { return num_col_blocks_f_; }
diff --git a/internal/ceres/partitioned_matrix_view_impl.h b/internal/ceres/partitioned_matrix_view_impl.h
index 1ca2e14..e685f5b 100644
--- a/internal/ceres/partitioned_matrix_view_impl.h
+++ b/internal/ceres/partitioned_matrix_view_impl.h
@@ -444,10 +444,10 @@
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
- UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
- const CompressedRowBlockStructure* bs = matrix_.block_structure();
- const CompressedRowBlockStructure* block_diagonal_structure =
- block_diagonal->block_structure();
+ UpdateBlockDiagonalEtESingleThreaded(
+ BlockSparseMatrix* block_diagonal) const {
+ auto bs = matrix_.block_structure();
+ auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
@@ -470,6 +470,57 @@
}
}
+template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
+void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
+ UpdateBlockDiagonalEtEMultiThreaded(
+ BlockSparseMatrix* block_diagonal) const {
+ auto transpose_block_structure = matrix_.transpose_block_structure();
+ CHECK(transpose_block_structure != nullptr);
+ auto block_diagonal_structure = block_diagonal->block_structure();
+
+ const double* values = matrix_.values();
+ double* values_diagonal = block_diagonal->mutable_values();
+ ParallelFor(
+ options_.context,
+ 0,
+ num_col_blocks_e_,
+ options_.num_threads,
+ [values,
+ transpose_block_structure,
+ values_diagonal,
+ block_diagonal_structure](int col_block_id) {
+ int cell_position =
+ block_diagonal_structure->rows[col_block_id].cells[0].position;
+ double* cell_values = values_diagonal + cell_position;
+ int col_block_size =
+ transpose_block_structure->rows[col_block_id].block.size;
+ auto& cells = transpose_block_structure->rows[col_block_id].cells;
+ MatrixRef(cell_values, col_block_size, col_block_size).setZero();
+
+ for (auto& c : cells) {
+ int row_block_size = transpose_block_structure->cols[c.block_id].size;
+ // clang-format off
+ MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
+ values + c.position, row_block_size, col_block_size,
+ values + c.position, row_block_size, col_block_size,
+ cell_values, 0, 0, col_block_size, col_block_size);
+ // clang-format on
+ }
+ },
+ e_cols_partition_);
+}
+
+template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
+void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
+ UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
+ if (options_.num_threads == 1) {
+ UpdateBlockDiagonalEtESingleThreaded(block_diagonal);
+ } else {
+ CHECK(options_.context != nullptr);
+ UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);
+ }
+}
+
// Similar to the code in RightMultiplyAndAccumulateF, except instead of the
// matrix vector multiply its an outer product.
//
@@ -477,10 +528,10 @@
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
- UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
- const CompressedRowBlockStructure* bs = matrix_.block_structure();
- const CompressedRowBlockStructure* block_diagonal_structure =
- block_diagonal->block_structure();
+ UpdateBlockDiagonalFtFSingleThreaded(
+ BlockSparseMatrix* block_diagonal) const {
+ auto bs = matrix_.block_structure();
+ auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
@@ -527,4 +578,82 @@
}
}
+template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
+void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
+ UpdateBlockDiagonalFtFMultiThreaded(
+ BlockSparseMatrix* block_diagonal) const {
+ auto transpose_block_structure = matrix_.transpose_block_structure();
+ CHECK(transpose_block_structure != nullptr);
+ auto block_diagonal_structure = block_diagonal->block_structure();
+
+ const double* values = matrix_.values();
+ double* values_diagonal = block_diagonal->mutable_values();
+
+ const int num_col_blocks_e = num_col_blocks_e_;
+ const int num_row_blocks_e = num_row_blocks_e_;
+ ParallelFor(
+ options_.context,
+ num_col_blocks_e_,
+ num_col_blocks_e + num_col_blocks_f_,
+ options_.num_threads,
+ [transpose_block_structure,
+ block_diagonal_structure,
+ num_col_blocks_e,
+ num_row_blocks_e,
+ values,
+ values_diagonal](int col_block_id) {
+ const int col_block_size =
+ transpose_block_structure->rows[col_block_id].block.size;
+ const int diagonal_block_id = col_block_id - num_col_blocks_e;
+ const int cell_position =
+ block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
+ double* cell_values = values_diagonal + cell_position;
+
+ MatrixRef(cell_values, col_block_size, col_block_size).setZero();
+
+ auto& cells = transpose_block_structure->rows[col_block_id].cells;
+ const int num_cells = cells.size();
+ int i = 0;
+ for (; i < num_cells; ++i) {
+ auto& cell = cells[i];
+ const int row_block_id = cell.block_id;
+ if (row_block_id >= num_row_blocks_e) break;
+ const int row_block_size =
+ transpose_block_structure->cols[row_block_id].size;
+ // clang-format off
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
+ values + cell.position, row_block_size, col_block_size,
+ values + cell.position, row_block_size, col_block_size,
+ cell_values, 0, 0, col_block_size, col_block_size);
+ // clang-format on
+ }
+ for (; i < num_cells; ++i) {
+ auto& cell = cells[i];
+ const int row_block_id = cell.block_id;
+ const int row_block_size =
+ transpose_block_structure->cols[row_block_id].size;
+ // clang-format off
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ values + cell.position, row_block_size, col_block_size,
+ values + cell.position, row_block_size, col_block_size,
+ cell_values, 0, 0, col_block_size, col_block_size);
+ // clang-format on
+ }
+ },
+ f_cols_partition_);
+}
+
+template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
+void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
+ UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
+ if (options_.num_threads == 1) {
+ UpdateBlockDiagonalFtFSingleThreaded(block_diagonal);
+ } else {
+ CHECK(options_.context != nullptr);
+ UpdateBlockDiagonalFtFMultiThreaded(block_diagonal);
+ }
+}
+
} // namespace ceres::internal
diff --git a/internal/ceres/partitioned_matrix_view_test.cc b/internal/ceres/partitioned_matrix_view_test.cc
index 3d10b1c..f74351e 100644
--- a/internal/ceres/partitioned_matrix_view_test.cc
+++ b/internal/ceres/partitioned_matrix_view_test.cc
@@ -49,63 +49,6 @@
const double kEpsilon = 1e-14;
-class PartitionedMatrixViewTest : public ::testing::Test {
- protected:
- void SetUp() final {
- std::unique_ptr<LinearLeastSquaresProblem> problem =
- CreateLinearLeastSquaresProblemFromId(2);
- CHECK(problem != nullptr);
- A_ = std::move(problem->A);
-
- num_cols_ = A_->num_cols();
- num_rows_ = A_->num_rows();
- num_eliminate_blocks_ = problem->num_eliminate_blocks;
- LinearSolver::Options options;
- options.elimination_groups.push_back(num_eliminate_blocks_);
- pmv_ = PartitionedMatrixViewBase::Create(
- options, *down_cast<BlockSparseMatrix*>(A_.get()));
- }
-
- double RandDouble() { return distribution_(prng_); }
-
- int num_rows_;
- int num_cols_;
- int num_eliminate_blocks_;
- std::unique_ptr<SparseMatrix> A_;
- std::unique_ptr<PartitionedMatrixViewBase> pmv_;
- std::mt19937 prng_;
- std::uniform_real_distribution<double> distribution_ =
- std::uniform_real_distribution<double>(0.0, 1.0);
-};
-
-TEST_F(PartitionedMatrixViewTest, BlockDiagonalEtE) {
- std::unique_ptr<BlockSparseMatrix> block_diagonal_ee(
- pmv_->CreateBlockDiagonalEtE());
- const CompressedRowBlockStructure* bs = block_diagonal_ee->block_structure();
-
- EXPECT_EQ(block_diagonal_ee->num_rows(), 2);
- EXPECT_EQ(block_diagonal_ee->num_cols(), 2);
- EXPECT_EQ(bs->cols.size(), 2);
- EXPECT_EQ(bs->rows.size(), 2);
-
- EXPECT_NEAR(block_diagonal_ee->values()[0], 10.0, kEpsilon);
- EXPECT_NEAR(block_diagonal_ee->values()[1], 155.0, kEpsilon);
-}
-
-TEST_F(PartitionedMatrixViewTest, BlockDiagonalFtF) {
- std::unique_ptr<BlockSparseMatrix> block_diagonal_ff(
- pmv_->CreateBlockDiagonalFtF());
- const CompressedRowBlockStructure* bs = block_diagonal_ff->block_structure();
-
- EXPECT_EQ(block_diagonal_ff->num_rows(), 3);
- EXPECT_EQ(block_diagonal_ff->num_cols(), 3);
- EXPECT_EQ(bs->cols.size(), 3);
- EXPECT_EQ(bs->rows.size(), 3);
- EXPECT_NEAR(block_diagonal_ff->values()[0], 70.0, kEpsilon);
- EXPECT_NEAR(block_diagonal_ff->values()[1], 17.0, kEpsilon);
- EXPECT_NEAR(block_diagonal_ff->values()[2], 37.0, kEpsilon);
-}
-
// Param = <problem_id, num_threads>
using Param = ::testing::tuple<int, int>;
@@ -116,7 +59,7 @@
return ss.str();
}
-class PartitionedMatrixViewSpMVTest : public ::testing::TestWithParam<Param> {
+class PartitionedMatrixViewTest : public ::testing::TestWithParam<Param> {
protected:
void SetUp() final {
const int problem_id = ::testing::get<0>(GetParam());
@@ -132,6 +75,10 @@
options_.elimination_groups.push_back(problem->num_eliminate_blocks);
pmv_ = PartitionedMatrixViewBase::Create(options_, *block_sparse);
+ LinearSolver::Options options_single_threaded = options_;
+ options_single_threaded.num_threads = 1;
+ pmv_single_threaded_ = PartitionedMatrixViewBase::Create(options_, *block_sparse);
+
EXPECT_EQ(pmv_->num_col_blocks_e(), problem->num_eliminate_blocks);
EXPECT_EQ(pmv_->num_col_blocks_f(),
block_sparse->block_structure()->cols.size() -
@@ -147,12 +94,13 @@
std::unique_ptr<LinearLeastSquaresProblem> problem_;
std::unique_ptr<SparseMatrix> A_;
std::unique_ptr<PartitionedMatrixViewBase> pmv_;
+ std::unique_ptr<PartitionedMatrixViewBase> pmv_single_threaded_;
std::mt19937 prng_;
std::uniform_real_distribution<double> distribution_ =
std::uniform_real_distribution<double>(0.0, 1.0);
};
-TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateE) {
+TEST_P(PartitionedMatrixViewTest, RightMultiplyAndAccumulateE) {
Vector x1(pmv_->num_cols_e());
Vector x2(pmv_->num_cols());
x2.setZero();
@@ -172,7 +120,7 @@
}
}
-TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateF) {
+TEST_P(PartitionedMatrixViewTest, RightMultiplyAndAccumulateF) {
Vector x1(pmv_->num_cols_f());
Vector x2(pmv_->num_cols());
x2.setZero();
@@ -192,7 +140,7 @@
}
}
-TEST_P(PartitionedMatrixViewSpMVTest, LeftMultiplyAndAccumulate) {
+TEST_P(PartitionedMatrixViewTest, LeftMultiplyAndAccumulate) {
Vector x = Vector::Zero(pmv_->num_rows());
for (int i = 0; i < pmv_->num_rows(); ++i) {
x(i) = RandDouble();
@@ -215,9 +163,109 @@
}
}
+TEST_P(PartitionedMatrixViewTest, BlockDiagonalFtF) {
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ff(
+ pmv_->CreateBlockDiagonalFtF());
+ const auto bs_diagonal = block_diagonal_ff->block_structure();
+ const int num_rows = pmv_->num_rows();
+ const int num_cols_f = pmv_->num_cols_f();
+ const int num_cols_e = pmv_->num_cols_e();
+ const int num_col_blocks_f = pmv_->num_col_blocks_f();
+ const int num_col_blocks_e = pmv_->num_col_blocks_e();
+
+ CHECK_EQ(block_diagonal_ff->num_rows(), num_cols_f);
+ CHECK_EQ(block_diagonal_ff->num_cols(), num_cols_f);
+
+ EXPECT_EQ(bs_diagonal->cols.size(), num_col_blocks_f);
+ EXPECT_EQ(bs_diagonal->rows.size(), num_col_blocks_f);
+
+ Matrix EF;
+ A_->ToDenseMatrix(&EF);
+ const auto F = EF.topRightCorner(num_rows, num_cols_f);
+
+ Matrix expected_FtF = F.transpose() * F;
+ Matrix actual_FtF;
+ block_diagonal_ff->ToDenseMatrix(&actual_FtF);
+
+ // FtF might be not block-diagonal
+ auto bs = down_cast<BlockSparseMatrix*>(A_.get())->block_structure();
+ for (int i = 0; i < num_col_blocks_f; ++i) {
+ const auto col_block_f = bs->cols[num_col_blocks_e + i];
+ const int block_size = col_block_f.size;
+ const int block_pos = col_block_f.position - num_cols_e;
+ const auto cell_expected =
+ expected_FtF.block(block_pos, block_pos, block_size, block_size);
+ auto cell_actual =
+ actual_FtF.block(block_pos, block_pos, block_size, block_size);
+ cell_actual -= cell_expected;
+ EXPECT_NEAR(cell_actual.norm(), 0., kEpsilon);
+ }
+ // There should be nothing remaining outside block-diagonal
+ EXPECT_NEAR(actual_FtF.norm(), 0., kEpsilon);
+}
+
+TEST_P(PartitionedMatrixViewTest, BlockDiagonalEtE) {
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ee(
+ pmv_->CreateBlockDiagonalEtE());
+ const CompressedRowBlockStructure* bs = block_diagonal_ee->block_structure();
+ const int num_rows = pmv_->num_rows();
+ const int num_cols_e = pmv_->num_cols_e();
+ const int num_col_blocks_e = pmv_->num_col_blocks_e();
+
+ CHECK_EQ(block_diagonal_ee->num_rows(), num_cols_e);
+ CHECK_EQ(block_diagonal_ee->num_cols(), num_cols_e);
+
+ EXPECT_EQ(bs->cols.size(), num_col_blocks_e);
+ EXPECT_EQ(bs->rows.size(), num_col_blocks_e);
+
+ Matrix EF;
+ A_->ToDenseMatrix(&EF);
+ const auto E = EF.topLeftCorner(num_rows, num_cols_e);
+
+ Matrix expected_EtE = E.transpose() * E;
+ Matrix actual_EtE;
+ block_diagonal_ee->ToDenseMatrix(&actual_EtE);
+
+ EXPECT_NEAR((expected_EtE - actual_EtE).norm(), 0., kEpsilon);
+}
+
+TEST_P(PartitionedMatrixViewTest, UpdateBlockDiagonalEtE) {
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ete(
+ pmv_->CreateBlockDiagonalEtE());
+ const CompressedRowBlockStructure* bs = block_diagonal_ete->block_structure();
+ const int num_cols = pmv_->num_cols_e();
+
+ Matrix multi_threaded(num_cols, num_cols);
+ pmv_->UpdateBlockDiagonalEtE(block_diagonal_ete.get());
+ block_diagonal_ete->ToDenseMatrix(&multi_threaded);
+
+ Matrix single_threaded(num_cols, num_cols);
+ pmv_single_threaded_->UpdateBlockDiagonalEtE(block_diagonal_ete.get());
+ block_diagonal_ete->ToDenseMatrix(&single_threaded);
+
+ EXPECT_NEAR((multi_threaded - single_threaded).norm(), 0., kEpsilon);
+}
+
+TEST_P(PartitionedMatrixViewTest, UpdateBlockDiagonalFtF) {
+ std::unique_ptr<BlockSparseMatrix> block_diagonal_ftf(
+ pmv_->CreateBlockDiagonalFtF());
+ const CompressedRowBlockStructure* bs = block_diagonal_ftf->block_structure();
+ const int num_cols = pmv_->num_cols_f();
+
+ Matrix multi_threaded(num_cols, num_cols);
+ pmv_->UpdateBlockDiagonalFtF(block_diagonal_ftf.get());
+ block_diagonal_ftf->ToDenseMatrix(&multi_threaded);
+
+ Matrix single_threaded(num_cols, num_cols);
+ pmv_single_threaded_->UpdateBlockDiagonalFtF(block_diagonal_ftf.get());
+ block_diagonal_ftf->ToDenseMatrix(&single_threaded);
+
+ EXPECT_NEAR((multi_threaded - single_threaded).norm(), 0., kEpsilon);
+}
+
INSTANTIATE_TEST_SUITE_P(
ParallelProducts,
- PartitionedMatrixViewSpMVTest,
+ PartitionedMatrixViewTest,
::testing::Combine(::testing::Values(2, 4, 6),
::testing::Values(1, 2, 3, 4, 5, 6, 7, 8)),
ParamInfoToString);