Refactor PartitionedMatrixView to cache the partitions The constructor now takes a LinearSolver::Options as input and uses that to compute the partitioning once and uses it for its lifetime. Change-Id: I9ef30df0b60f8fa91c8b5601c397b2d9314a2cc7
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 4a85e8b..06c0914 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -603,9 +603,6 @@ add_executable(spmv_benchmark spmv_benchmark.cc) add_dependencies_to_benchmark(spmv_benchmark) - add_executable(partitioned_matrix_view_benchmark partitioned_matrix_view_benchmark.cc) - add_dependencies_to_benchmark(partitioned_matrix_view_benchmark) - add_executable(block_jacobi_preconditioner_benchmark block_jacobi_preconditioner_benchmark.cc) add_dependencies_to_benchmark(block_jacobi_preconditioner_benchmark)
diff --git a/internal/ceres/evaluation_benchmark.cc b/internal/ceres/evaluation_benchmark.cc index 2f9fff0..9849b5a 100644 --- a/internal/ceres/evaluation_benchmark.cc +++ b/internal/ceres/evaluation_benchmark.cc
@@ -145,23 +145,20 @@ return crs_jacobian.get(); } - const PartitionedView* PartitionedMatrixViewJacobian(ContextImpl* context) { - if (!partitioned_view_jacobian) { - auto block_sparse = BlockSparseJacobian(context); - partitioned_view_jacobian = std::make_unique<PartitionedView>( - *block_sparse, bal_problem->num_points()); - } + const PartitionedView* PartitionedMatrixViewJacobian( + const LinearSolver::Options& options) { + auto block_sparse = BlockSparseJacobian(options.context); + partitioned_view_jacobian = + std::make_unique<PartitionedView>(options, *block_sparse); return partitioned_view_jacobian.get(); } const PartitionedView* PartitionedMatrixViewJacobianWithTranspose( - ContextImpl* context) { - if (!partitioned_view_jacobian_with_transpose) { - auto block_sparse_transpose = BlockSparseJacobianWithTranspose(context); - partitioned_view_jacobian_with_transpose = - std::make_unique<PartitionedView>(*block_sparse_transpose, - bal_problem->num_points()); - } + const LinearSolver::Options& options) { + auto block_sparse_transpose = + BlockSparseJacobianWithTranspose(options.context); + partitioned_view_jacobian_with_transpose = + std::make_unique<PartitionedView>(options, *block_sparse_transpose); return partitioned_view_jacobian_with_transpose.get(); } @@ -243,16 +240,17 @@ static void PMVRightMultiplyAndAccumulateF(benchmark::State& state, BALData* data, ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobian(context); + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobian(options); Vector y = Vector::Zero(jacobian->num_rows()); Vector x = Vector::Random(jacobian->num_cols_f()); for (auto _ : state) { - jacobian->RightMultiplyAndAccumulateF( - x.data(), y.data(), context, num_threads); + jacobian->RightMultiplyAndAccumulateF(x.data(), y.data()); } CHECK_GT(y.squaredNorm(), 0.); } @@ -260,49 +258,35 @@ static void PMVLeftMultiplyAndAccumulateF(benchmark::State& state, BALData* data, ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobian(context); + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options); Vector y = Vector::Zero(jacobian->num_cols_f()); Vector x = Vector::Random(jacobian->num_rows()); for (auto _ : state) { - jacobian->LeftMultiplyAndAccumulateF( - x.data(), y.data(), context, num_threads); + jacobian->LeftMultiplyAndAccumulateF(x.data(), y.data()); } CHECK_GT(y.squaredNorm(), 0.); } -static void PMVLeftMultiplyAndAccumulateWithTransposeF(benchmark::State& state, - BALData* data, - ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(context); - - Vector y = Vector::Zero(jacobian->num_cols_f()); - Vector x = Vector::Random(jacobian->num_rows()); - - for (auto _ : state) { - jacobian->LeftMultiplyAndAccumulateF( - x.data(), y.data(), context, num_threads); - } - CHECK_GT(y.squaredNorm(), 0.); -} static void PMVRightMultiplyAndAccumulateE(benchmark::State& state, BALData* data, ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobian(context); + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobian(options); Vector y = Vector::Zero(jacobian->num_rows()); Vector x = Vector::Random(jacobian->num_cols_e()); for (auto _ : state) { - jacobian->RightMultiplyAndAccumulateE( - x.data(), y.data(), context, num_threads); + jacobian->RightMultiplyAndAccumulateE(x.data(), y.data()); } CHECK_GT(y.squaredNorm(), 0.); } @@ -310,33 +294,17 @@ static void PMVLeftMultiplyAndAccumulateE(benchmark::State& state, BALData* data, ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobian(context); + LinearSolver::Options options; + options.num_threads = state.range(0); + options.elimination_groups.push_back(data->bal_problem->num_points()); + options.context = context; + auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options); Vector y = Vector::Zero(jacobian->num_cols_e()); Vector x = Vector::Random(jacobian->num_rows()); for (auto _ : state) { - jacobian->LeftMultiplyAndAccumulateE( - x.data(), y.data(), context, num_threads); - } - CHECK_GT(y.squaredNorm(), 0.); -} - -static void PMVLeftMultiplyAndAccumulateWithTransposeE(benchmark::State& state, - BALData* data, - ContextImpl* context) { - const int num_threads = state.range(0); - - auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(context); - - Vector y = Vector::Zero(jacobian->num_cols_e()); - Vector x = Vector::Random(jacobian->num_rows()); - - for (auto _ : state) { - jacobian->LeftMultiplyAndAccumulateE( - x.data(), y.data(), context, num_threads); + jacobian->LeftMultiplyAndAccumulateE(x.data(), y.data()); } CHECK_GT(y.squaredNorm(), 0.); } @@ -363,22 +331,6 @@ ContextImpl* context) { const int num_threads = state.range(0); - auto jacobian = data->BlockSparseJacobian(context); - - Vector y = Vector::Zero(jacobian->num_cols()); - Vector x = Vector::Random(jacobian->num_rows()); - - for (auto _ : state) { - jacobian->LeftMultiplyAndAccumulate( - x.data(), y.data(), context, num_threads); - } - CHECK_GT(y.squaredNorm(), 0.); -} - -static void JacobianLeftMultiplyAndAccumulateWithTranspose( - benchmark::State& state, BALData* data, ContextImpl* context) { - const int num_threads = state.range(0); - auto jacobian = data->BlockSparseJacobianWithTranspose(context); Vector y = Vector::Zero(jacobian->num_cols()); @@ -551,15 +503,6 @@ ceres::internal::JacobianLeftMultiplyAndAccumulate, data, &context) - ->Arg(1); - - const std::string name_left_product_transpose = - "JacobianLeftMultiplyAndAccumulateWithTranspose<" + path + ">"; - ::benchmark::RegisterBenchmark( - name_left_product_transpose.c_str(), - ceres::internal::JacobianLeftMultiplyAndAccumulateWithTranspose, - data, - &context) ->Arg(1) ->Arg(2) ->Arg(4) @@ -573,14 +516,6 @@ ceres::internal::PMVLeftMultiplyAndAccumulateF, data, &context) - ->Arg(1); - const std::string name_left_product_partitioned_transpose_f = - "PMVLeftMultiplyAndAccumulateWithTransposeF<" + path + ">"; - ::benchmark::RegisterBenchmark( - name_left_product_partitioned_transpose_f.c_str(), - ceres::internal::PMVLeftMultiplyAndAccumulateWithTransposeF, - data, - &context) ->Arg(1) ->Arg(2) ->Arg(4) @@ -594,15 +529,6 @@ ceres::internal::PMVLeftMultiplyAndAccumulateE, data, &context) - ->Arg(1); - - const std::string name_left_product_partitioned_transpose_e = - "PMVLeftMultiplyAndAccumulateWithTransposeE<" + path + ">"; - ::benchmark::RegisterBenchmark( - name_left_product_partitioned_transpose_e.c_str(), - ceres::internal::PMVLeftMultiplyAndAccumulateWithTransposeE, - data, - &context) ->Arg(1) ->Arg(2) ->Arg(4)
diff --git a/internal/ceres/fake_bundle_adjustment_jacobian.cc b/internal/ceres/fake_bundle_adjustment_jacobian.cc index af1d03c..d8f3c01 100644 --- a/internal/ceres/fake_bundle_adjustment_jacobian.cc +++ b/internal/ceres/fake_bundle_adjustment_jacobian.cc
@@ -109,8 +109,10 @@ PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>; auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian( num_cameras, num_points, camera_size, landmark_size, visibility, rng); + LinearSolver::Options options; + options.elimination_groups.push_back(num_points); auto partitioned_view = - std::make_unique<PartitionedView>(*block_sparse_matrix, num_points); + std::make_unique<PartitionedView>(options, *block_sparse_matrix); return std::make_pair(std::move(partitioned_view), std::move(block_sparse_matrix)); }
diff --git a/internal/ceres/implicit_schur_complement.cc b/internal/ceres/implicit_schur_complement.cc index 6249c8c..4e36048 100644 --- a/internal/ceres/implicit_schur_complement.cc +++ b/internal/ceres/implicit_schur_complement.cc
@@ -105,15 +105,11 @@ double* y) const { // y1 = F x tmp_rows_.setZero(); - A_->RightMultiplyAndAccumulateF( - x, tmp_rows_.data(), options_.context, options_.num_threads); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); // y2 = E' y1 tmp_e_cols_.setZero(); - A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), - tmp_e_cols_.data(), - options_.context, - options_.num_threads); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); // y3 = -(E'E)^-1 y2 tmp_e_cols_2_.setZero(); @@ -121,13 +117,11 @@ tmp_e_cols_2_.data(), options_.context, options_.num_threads); + tmp_e_cols_2_ *= -1.0; // y1 = y1 + E y3 - A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), - tmp_rows_.data(), - options_.context, - options_.num_threads); + A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); // y5 = D * x if (D_ != nullptr) { @@ -140,8 +134,7 @@ } // y = y5 + F' y1 - A_->LeftMultiplyAndAccumulateF( - tmp_rows_.data(), y, options_.context, options_.num_threads); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y); } void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate( @@ -149,15 +142,11 @@ CHECK(compute_ftf_inverse_); // y1 = F x tmp_rows_.setZero(); - A_->RightMultiplyAndAccumulateF( - x, tmp_rows_.data(), options_.context, options_.num_threads); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); // y2 = E' y1 tmp_e_cols_.setZero(); - A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), - tmp_e_cols_.data(), - options_.context, - options_.num_threads); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); // y3 = (E'E)^-1 y2 tmp_e_cols_2_.setZero(); @@ -167,17 +156,11 @@ options_.num_threads); // y1 = E y3 tmp_rows_.setZero(); - A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), - tmp_rows_.data(), - options_.context, - options_.num_threads); + A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); // y4 = F' y1 tmp_f_cols_.setZero(); - A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), - tmp_f_cols_.data(), - options_.context, - options_.num_threads); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data()); // y += (F'F)^-1 y4 block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate( @@ -219,18 +202,14 @@ // y1 = F x tmp_rows_.setZero(); - A_->RightMultiplyAndAccumulateF( - x, tmp_rows_.data(), options_.context, options_.num_threads); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); // y2 = b - y1 tmp_rows_ = ConstVectorRef(b_, num_rows) - tmp_rows_; // y3 = E' y2 tmp_e_cols_.setZero(); - A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), - tmp_e_cols_.data(), - options_.context, - options_.num_threads); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); // y = (E'E)^-1 y3 VectorRef(y, num_cols).setZero(); @@ -254,8 +233,7 @@ void ImplicitSchurComplement::UpdateRhs() { // y1 = E'b tmp_e_cols_.setZero(); - A_->LeftMultiplyAndAccumulateE( - b_, tmp_e_cols_.data(), options_.context, options_.num_threads); + A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data()); // y2 = (E'E)^-1 y1 Vector y2 = Vector::Zero(A_->num_cols_e()); @@ -264,16 +242,14 @@ // y3 = E y2 tmp_rows_.setZero(); - A_->RightMultiplyAndAccumulateE( - y2.data(), tmp_rows_.data(), options_.context, options_.num_threads); + A_->RightMultiplyAndAccumulateE(y2.data(), tmp_rows_.data()); // y3 = b - y3 tmp_rows_ = ConstVectorRef(b_, A_->num_rows()) - tmp_rows_; // rhs = F' y3 rhs_.setZero(); - A_->LeftMultiplyAndAccumulateF( - tmp_rows_.data(), rhs_.data(), options_.context, options_.num_threads); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data()); } } // namespace ceres::internal
diff --git a/internal/ceres/parallel_for.h b/internal/ceres/parallel_for.h index 107d19c..fe5d242 100644 --- a/internal/ceres/parallel_for.h +++ b/internal/ceres/parallel_for.h
@@ -299,6 +299,42 @@ }); } +// Execute function for every element in the range [start, end) with at most +// num_threads, using the user provided partitioning. taking into account +// user-provided integer cumulative costs of iterations. +template <typename F> +void ParallelFor(ContextImpl* context, + int start, + int end, + int num_threads, + const F& function, + const std::vector<int>& partitions) { + using namespace parallel_for_details; + CHECK_GT(num_threads, 0); + if (start >= end) { + return; + } + CHECK_EQ(partitions.front(), start); + CHECK_EQ(partitions.back(), end); + if (num_threads == 1 || end - start <= num_threads) { + ParallelFor(context, start, end, num_threads, function); + return; + } + CHECK_GT(partitions.size(), 1); + const int num_partitions = partitions.size() - 1; + ParallelFor(context, + 0, + num_partitions, + num_threads, + [&function, &partitions](int thread_id, int partition_id) { + const int partition_start = partitions[partition_id]; + const int partition_end = partitions[partition_id + 1]; + + for (int i = partition_start; i < partition_end; ++i) { + Invoke<F>(thread_id, i, function); + } + }); +} } // namespace ceres::internal // Backend-specific implementations of ParallelInvoke
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc index 2710366..d952a2a 100644 --- a/internal/ceres/partitioned_matrix_view.cc +++ b/internal/ceres/partitioned_matrix_view.cc
@@ -55,121 +55,121 @@ (options.e_block_size == 2) && (options.f_block_size == 2)) { return std::make_unique<PartitionedMatrixView<2,2, 2>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2) && (options.f_block_size == 3)) { return std::make_unique<PartitionedMatrixView<2,2, 3>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2) && (options.f_block_size == 4)) { return std::make_unique<PartitionedMatrixView<2,2, 4>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2)) { return std::make_unique<PartitionedMatrixView<2,2, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 3)) { return std::make_unique<PartitionedMatrixView<2,3, 3>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 4)) { return std::make_unique<PartitionedMatrixView<2,3, 4>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 6)) { return std::make_unique<PartitionedMatrixView<2,3, 6>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 9)) { return std::make_unique<PartitionedMatrixView<2,3, 9>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3)) { return std::make_unique<PartitionedMatrixView<2,3, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 3)) { return std::make_unique<PartitionedMatrixView<2,4, 3>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 4)) { return std::make_unique<PartitionedMatrixView<2,4, 4>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 6)) { return std::make_unique<PartitionedMatrixView<2,4, 6>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 8)) { return std::make_unique<PartitionedMatrixView<2,4, 8>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 9)) { return std::make_unique<PartitionedMatrixView<2,4, 9>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4)) { return std::make_unique<PartitionedMatrixView<2,4, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); } if (options.row_block_size == 2) { return std::make_unique<PartitionedMatrixView<2,Eigen::Dynamic, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 3) && (options.e_block_size == 3) && (options.f_block_size == 3)) { return std::make_unique<PartitionedMatrixView<3,3, 3>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 2)) { return std::make_unique<PartitionedMatrixView<4,4, 2>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 3)) { return std::make_unique<PartitionedMatrixView<4,4, 3>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 4)) { return std::make_unique<PartitionedMatrixView<4,4, 4>>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4)) { return std::make_unique<PartitionedMatrixView<4,4, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); } #endif @@ -179,7 +179,7 @@ return std::make_unique<PartitionedMatrixView<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); }; } // namespace ceres::internal
diff --git a/internal/ceres/partitioned_matrix_view.h b/internal/ceres/partitioned_matrix_view.h index 533a0e5..f34fd4a 100644 --- a/internal/ceres/partitioned_matrix_view.h +++ b/internal/ceres/partitioned_matrix_view.h
@@ -70,33 +70,25 @@ // y += E'x virtual void LeftMultiplyAndAccumulateE(const double* x, double* y) const = 0; - virtual void LeftMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const = 0; + virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x, + double* y) const = 0; + virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x, + double* y) const = 0; // y += F'x virtual void LeftMultiplyAndAccumulateF(const double* x, double* y) const = 0; - virtual void LeftMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const = 0; + virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x, + double* y) const = 0; + virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x, + double* y) const = 0; // y += Ex virtual void RightMultiplyAndAccumulateE(const double* x, double* y) const = 0; - virtual void RightMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const = 0; // y += Fx virtual void RightMultiplyAndAccumulateF(const double* x, double* y) const = 0; - virtual void RightMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const = 0; // Create and return the block diagonal of the matrix E'E. virtual std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const = 0; @@ -128,6 +120,8 @@ virtual int num_cols_f() const = 0; virtual int num_rows() const = 0; virtual int num_cols() const = 0; + virtual const std::vector<int>& e_cols_partition() const = 0; + virtual const std::vector<int>& f_cols_partition() const = 0; // clang-format on static std::unique_ptr<PartitionedMatrixViewBase> Create( @@ -141,29 +135,34 @@ : public PartitionedMatrixViewBase { public: // matrix = [E F], where the matrix E contains the first - // num_col_blocks_a column blocks. - PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e); + // options.elimination_groups[0] column blocks. + PartitionedMatrixView(const LinearSolver::Options& options, + const BlockSparseMatrix& matrix); - void LeftMultiplyAndAccumulateE(const double* x, double* y) const final; - void LeftMultiplyAndAccumulateF(const double* x, double* y) const final; - void LeftMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const final; - void LeftMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const final; - void RightMultiplyAndAccumulateE(const double* x, double* y) const final; - void RightMultiplyAndAccumulateF(const double* x, double* y) const final; - void RightMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const final; - void RightMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const final; + // y += E'x + virtual void LeftMultiplyAndAccumulateE(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x, + double* y) const final; + + // y += F'x + virtual void LeftMultiplyAndAccumulateF(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x, + double* y) const final; + + // y += Ex + virtual void RightMultiplyAndAccumulateE(const double* x, + double* y) const final; + + // y += Fx + virtual void RightMultiplyAndAccumulateF(const double* x, + double* y) const final; + std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const final; std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalFtF() const final; void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final; @@ -176,17 +175,26 @@ int num_rows() const final { return matrix_.num_rows(); } int num_cols() const final { return matrix_.num_cols(); } // clang-format on + const std::vector<int>& e_cols_partition() const final { + return e_cols_partition_; + } + const std::vector<int>& f_cols_partition() const final { + return f_cols_partition_; + } private: std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalMatrixLayout( int start_col_block, int end_col_block) const; + const LinearSolver::Options options_; const BlockSparseMatrix& matrix_; int num_row_blocks_e_; int num_col_blocks_e_; int num_col_blocks_f_; int num_cols_e_; int num_cols_f_; + std::vector<int> e_cols_partition_; + std::vector<int> f_cols_partition_; }; } // namespace ceres::internal
diff --git a/internal/ceres/partitioned_matrix_view_benchmark.cc b/internal/ceres/partitioned_matrix_view_benchmark.cc deleted file mode 100644 index 63d0e34..0000000 --- a/internal/ceres/partitioned_matrix_view_benchmark.cc +++ /dev/null
@@ -1,147 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include <memory> -#include <random> - -#include "benchmark/benchmark.h" -#include "ceres/context_impl.h" -#include "ceres/fake_bundle_adjustment_jacobian.h" -#include "ceres/partitioned_matrix_view.h" - -constexpr int kNumCameras = 1000; -constexpr int kNumPoints = 10000; -constexpr int kCameraSize = 6; -constexpr int kPointSize = 3; -constexpr double kVisibility = 0.1; - -namespace ceres::internal { - -static void BM_PatitionedViewRightMultiplyAndAccumulateE_Static( - benchmark::State& state) { - const int num_threads = state.range(0); - std::mt19937 prng; - auto [partitioned_view, jacobian] = - CreateFakeBundleAdjustmentPartitionedJacobian<kPointSize, kCameraSize>( - kNumCameras, kNumPoints, kVisibility, prng); - - ContextImpl context; - context.EnsureMinimumThreads(num_threads); - - Vector x(partitioned_view->num_cols_e()); - Vector y(partitioned_view->num_rows()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - partitioned_view->RightMultiplyAndAccumulateE( - x.data(), y.data(), &context, num_threads); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} -BENCHMARK(BM_PatitionedViewRightMultiplyAndAccumulateE_Static) - ->Arg(1) - ->Arg(2) - ->Arg(4) - ->Arg(8) - ->Arg(16); - -static void BM_PatitionedViewRightMultiplyAndAccumulateE_Dynamic( - benchmark::State& state) { - std::mt19937 prng; - auto [partitioned_view, jacobian] = - CreateFakeBundleAdjustmentPartitionedJacobian( - kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); - - Vector x(partitioned_view->num_cols_e()); - Vector y(partitioned_view->num_rows()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - partitioned_view->RightMultiplyAndAccumulateE(x.data(), y.data()); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} -BENCHMARK(BM_PatitionedViewRightMultiplyAndAccumulateE_Dynamic); - -static void BM_PatitionedViewRightMultiplyAndAccumulateF_Static( - benchmark::State& state) { - const int num_threads = state.range(0); - std::mt19937 prng; - auto [partitioned_view, jacobian] = - CreateFakeBundleAdjustmentPartitionedJacobian<kPointSize, kCameraSize>( - kNumCameras, kNumPoints, kVisibility, prng); - - ContextImpl context; - context.EnsureMinimumThreads(num_threads); - - Vector x(partitioned_view->num_cols_f()); - Vector y(partitioned_view->num_rows()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - partitioned_view->RightMultiplyAndAccumulateF( - x.data(), y.data(), &context, num_threads); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} -BENCHMARK(BM_PatitionedViewRightMultiplyAndAccumulateF_Static) - ->Arg(1) - ->Arg(2) - ->Arg(4) - ->Arg(8) - ->Arg(16); - -static void BM_PatitionedViewRightMultiplyAndAccumulateF_Dynamic( - benchmark::State& state) { - std::mt19937 prng; - auto [partitioned_view, jacobian] = - CreateFakeBundleAdjustmentPartitionedJacobian( - kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); - - Vector x(partitioned_view->num_cols_f()); - Vector y(partitioned_view->num_rows()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - partitioned_view->RightMultiplyAndAccumulateF(x.data(), y.data()); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} -BENCHMARK(BM_PatitionedViewRightMultiplyAndAccumulateF_Dynamic); - -} // namespace ceres::internal - -BENCHMARK_MAIN();
diff --git a/internal/ceres/partitioned_matrix_view_impl.h b/internal/ceres/partitioned_matrix_view_impl.h index 689f09e..1ca2e14 100644 --- a/internal/ceres/partitioned_matrix_view_impl.h +++ b/internal/ceres/partitioned_matrix_view_impl.h
@@ -45,11 +45,14 @@ template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e) - : matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) { + PartitionedMatrixView(const LinearSolver::Options& options, + const BlockSparseMatrix& matrix) + + : options_(options), matrix_(matrix) { const CompressedRowBlockStructure* bs = matrix_.block_structure(); CHECK(bs != nullptr); + num_col_blocks_e_ = options_.elimination_groups[0]; num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_; // Compute the number of row blocks in E. The number of row blocks @@ -79,6 +82,25 @@ } CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols()); + + auto transpose_bs = matrix_.transpose_block_structure(); + const int num_threads = options_.num_threads; + if (transpose_bs != nullptr && num_threads > 1) { + int kMaxPartitions = num_threads * 4; + e_cols_partition_ = parallel_for_details::ComputePartition( + 0, + num_col_blocks_e_, + kMaxPartitions, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); + + f_cols_partition_ = parallel_for_details::ComputePartition( + num_col_blocks_e_, + num_col_blocks_e_ + num_col_blocks_f_, + kMaxPartitions, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); + } } // The next four methods don't seem to be particularly cache @@ -89,23 +111,14 @@ template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: RightMultiplyAndAccumulateE(const double* x, double* y) const { - RightMultiplyAndAccumulateE(x, y, nullptr, 1); -} - -template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> -void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - RightMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const { // Iterate over the first num_row_blocks_e_ row blocks, and multiply // by the first cell in each row block. auto bs = matrix_.block_structure(); const double* values = matrix_.values(); - ParallelFor(context, + ParallelFor(options_.context, 0, num_row_blocks_e_, - num_threads, + options_.num_threads, [values, bs, x, y](int row_block_id) { const Cell& cell = bs->rows[row_block_id].cells[0]; const int row_block_pos = bs->rows[row_block_id].block.position; @@ -125,15 +138,6 @@ template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: RightMultiplyAndAccumulateF(const double* x, double* y) const { - RightMultiplyAndAccumulateF(x, y, nullptr, 1); -} - -template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> -void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - RightMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const { // Iterate over row blocks, and if the row block is in E, then // multiply by all the cells except the first one which is of type // E. If the row block is not in E (i.e its in the bottom @@ -143,10 +147,10 @@ const int num_row_blocks = bs->rows.size(); const int num_cols_e = num_cols_e_; const double* values = matrix_.values(); - ParallelFor(context, + ParallelFor(options_.context, 0, num_row_blocks_e_, - num_threads, + options_.num_threads, [values, bs, num_cols_e, x, y](int row_block_id) { const int row_block_pos = bs->rows[row_block_id].block.position; const int row_block_size = bs->rows[row_block_id].block.size; @@ -163,10 +167,10 @@ // clang-format on } }); - ParallelFor(context, + ParallelFor(options_.context, num_row_blocks_e_, num_row_blocks, - num_threads, + options_.num_threads, [values, bs, num_cols_e, x, y](int row_block_id) { const int row_block_pos = bs->rows[row_block_id].block.position; const int row_block_size = bs->rows[row_block_id].block.size; @@ -190,7 +194,17 @@ LeftMultiplyAndAccumulateE(const double* x, double* y) const { if (!num_col_blocks_e_) return; if (!num_row_blocks_e_) return; + if (options_.num_threads == 1) { + LeftMultiplyAndAccumulateESingleThreaded(x, y); + } else { + CHECK(options_.context != nullptr); + LeftMultiplyAndAccumulateEMultiThreaded(x, y); + } +} +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const { const CompressedRowBlockStructure* bs = matrix_.block_structure(); // Iterate over the first num_row_blocks_e_ row blocks, and multiply @@ -214,28 +228,19 @@ template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - LeftMultiplyAndAccumulateE(const double* x, - double* y, - ContextImpl* context, - int num_threads) const { - if (!num_col_blocks_e_) return; - if (!num_row_blocks_e_) return; - + LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const { auto transpose_bs = matrix_.transpose_block_structure(); - if (transpose_bs == nullptr || num_threads == 1) { - LeftMultiplyAndAccumulateE(x, y); - return; - } + CHECK(transpose_bs != nullptr); // Local copies of class members in order to avoid capturing pointer to the // whole object in lambda function auto values = matrix_.values(); const int num_row_blocks_e = num_row_blocks_e_; ParallelFor( - context, + options_.context, 0, num_col_blocks_e_, - num_threads, + options_.num_threads, [values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) { int row_block_pos = transpose_bs->rows[row_block_id].block.position; int row_block_size = transpose_bs->rows[row_block_id].block.size; @@ -254,14 +259,24 @@ y + row_block_pos); } }, - transpose_bs->rows.data(), - [](const CompressedRow& row) { return row.cumulative_nnz; }); + e_cols_partition()); } template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: LeftMultiplyAndAccumulateF(const double* x, double* y) const { if (!num_col_blocks_f_) return; + if (options_.num_threads == 1) { + LeftMultiplyAndAccumulateFSingleThreaded(x, y); + } else { + CHECK(options_.context != nullptr); + LeftMultiplyAndAccumulateFMultiThreaded(x, y); + } +} + +template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> +void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: + LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const { const CompressedRowBlockStructure* bs = matrix_.block_structure(); // Iterate over row blocks, and if the row block is in E, then @@ -307,26 +322,19 @@ template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: - LeftMultiplyAndAccumulateF(const double* x, - double* y, - ContextImpl* context, - int num_threads) const { - if (!num_col_blocks_f_) return; + LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const { auto transpose_bs = matrix_.transpose_block_structure(); - if (transpose_bs == nullptr || num_threads == 1) { - LeftMultiplyAndAccumulateF(x, y); - return; - } + CHECK(transpose_bs != nullptr); // Local copies of class members in order to avoid capturing pointer to the // whole object in lambda function auto values = matrix_.values(); const int num_row_blocks_e = num_row_blocks_e_; const int num_cols_e = num_cols_e_; ParallelFor( - context, + options_.context, num_col_blocks_e_, num_col_blocks_e_ + num_col_blocks_f_, - num_threads, + options_.num_threads, [values, transpose_bs, num_row_blocks_e, num_cols_e, x, y]( int row_block_id) { int row_block_pos = transpose_bs->rows[row_block_id].block.position; @@ -362,8 +370,7 @@ y + row_block_pos - num_cols_e); } }, - transpose_bs->rows.data(), - [](const CompressedRow& row) { return row.cumulative_nnz; }); + f_cols_partition()); } // Given a range of columns blocks of a matrix m, compute the block
diff --git a/internal/ceres/partitioned_matrix_view_template.py b/internal/ceres/partitioned_matrix_view_template.py index 27f368f..8978d63 100644 --- a/internal/ceres/partitioned_matrix_view_template.py +++ b/internal/ceres/partitioned_matrix_view_template.py
@@ -132,7 +132,7 @@ #ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION """ FACTORY = """ return std::make_unique<PartitionedMatrixView<%s,%s, %s>>( - matrix, options.elimination_groups[0]);""" + options, matrix);""" FACTORY_FOOTER = """ #endif @@ -142,7 +142,7 @@ return std::make_unique<PartitionedMatrixView<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic>>( - matrix, options.elimination_groups[0]); + options, matrix); }; } // namespace ceres::internal
diff --git a/internal/ceres/partitioned_matrix_view_test.cc b/internal/ceres/partitioned_matrix_view_test.cc index 4653e06..fc1ebdd 100644 --- a/internal/ceres/partitioned_matrix_view_test.cc +++ b/internal/ceres/partitioned_matrix_view_test.cc
@@ -76,76 +76,6 @@ std::uniform_real_distribution<double>(0.0, 1.0); }; -TEST_F(PartitionedMatrixViewTest, DimensionsTest) { - EXPECT_EQ(pmv_->num_col_blocks_e(), num_eliminate_blocks_); - EXPECT_EQ(pmv_->num_col_blocks_f(), num_cols_ - num_eliminate_blocks_); - EXPECT_EQ(pmv_->num_cols_e(), num_eliminate_blocks_); - EXPECT_EQ(pmv_->num_cols_f(), num_cols_ - num_eliminate_blocks_); - EXPECT_EQ(pmv_->num_cols(), A_->num_cols()); - EXPECT_EQ(pmv_->num_rows(), A_->num_rows()); -} - -TEST_F(PartitionedMatrixViewTest, RightMultiplyAndAccumulateE) { - Vector x1(pmv_->num_cols_e()); - Vector x2(pmv_->num_cols()); - x2.setZero(); - - for (int i = 0; i < pmv_->num_cols_e(); ++i) { - x1(i) = x2(i) = RandDouble(); - } - - Vector y1 = Vector::Zero(pmv_->num_rows()); - pmv_->RightMultiplyAndAccumulateE(x1.data(), y1.data()); - - Vector y2 = Vector::Zero(pmv_->num_rows()); - A_->RightMultiplyAndAccumulate(x2.data(), y2.data()); - - for (int i = 0; i < pmv_->num_rows(); ++i) { - EXPECT_NEAR(y1(i), y2(i), kEpsilon); - } -} - -TEST_F(PartitionedMatrixViewTest, RightMultiplyAndAccumulateF) { - Vector x1(pmv_->num_cols_f()); - Vector x2 = Vector::Zero(pmv_->num_cols()); - - for (int i = 0; i < pmv_->num_cols_f(); ++i) { - x1(i) = RandDouble(); - x2(i + pmv_->num_cols_e()) = x1(i); - } - - Vector y1 = Vector::Zero(pmv_->num_rows()); - pmv_->RightMultiplyAndAccumulateF(x1.data(), y1.data()); - - Vector y2 = Vector::Zero(pmv_->num_rows()); - A_->RightMultiplyAndAccumulate(x2.data(), y2.data()); - - for (int i = 0; i < pmv_->num_rows(); ++i) { - EXPECT_NEAR(y1(i), y2(i), kEpsilon); - } -} - -TEST_F(PartitionedMatrixViewTest, LeftMultiplyAndAccumulate) { - Vector x = Vector::Zero(pmv_->num_rows()); - for (int i = 0; i < pmv_->num_rows(); ++i) { - x(i) = RandDouble(); - } - - Vector y = Vector::Zero(pmv_->num_cols()); - Vector y1 = Vector::Zero(pmv_->num_cols_e()); - Vector y2 = Vector::Zero(pmv_->num_cols_f()); - - A_->LeftMultiplyAndAccumulate(x.data(), y.data()); - pmv_->LeftMultiplyAndAccumulateE(x.data(), y1.data()); - pmv_->LeftMultiplyAndAccumulateF(x.data(), y2.data()); - - for (int i = 0; i < pmv_->num_cols(); ++i) { - EXPECT_NEAR(y(i), - (i < pmv_->num_cols_e()) ? y1(i) : y2(i - pmv_->num_cols_e()), - kEpsilon); - } -} - TEST_F(PartitionedMatrixViewTest, BlockDiagonalEtE) { std::unique_ptr<BlockSparseMatrix> block_diagonal_ee( pmv_->CreateBlockDiagonalEtE()); @@ -174,134 +104,122 @@ EXPECT_NEAR(block_diagonal_ff->values()[2], 37.0, kEpsilon); } -const int kMaxNumThreads = 8; -class PartitionedMatrixViewParallelTest : public ::testing::TestWithParam<int> { +// Param = <problem_id, num_threads> +using Param = ::testing::tuple<int, int>; + +static std::string ParamInfoToString(testing::TestParamInfo<Param> info) { + Param param = info.param; + std::stringstream ss; + ss << ::testing::get<0>(param) << "_" << ::testing::get<1>(param); + return ss.str(); +} + +class PartitionedMatrixViewSpMVTest : public ::testing::TestWithParam<Param> { protected: - static const int kNumProblems = 3; void SetUp() final { - int problem_ids[kNumProblems] = {2, 4, 6}; - for (int i = 0; i < kNumProblems; ++i) { - auto problem = CreateLinearLeastSquaresProblemFromId(problem_ids[i]); - CHECK(problem != nullptr); - SetUpMatrix(i, problem.get()); - } - - context_.EnsureMinimumThreads(kMaxNumThreads); - } - - void SetUpMatrix(int id, LinearLeastSquaresProblem* problem) { - A_[id] = std::move(problem->A); - auto& A = A_[id]; - auto block_sparse = down_cast<BlockSparseMatrix*>(A.get()); + const int problem_id = ::testing::get<0>(GetParam()); + const int num_threads = ::testing::get<1>(GetParam()); + auto problem = CreateLinearLeastSquaresProblemFromId(problem_id); + CHECK(problem != nullptr); + A_ = std::move(problem->A); + auto block_sparse = down_cast<BlockSparseMatrix*>(A_.get()); block_sparse->AddTransposeBlockStructure(); - num_cols_[id] = A->num_cols(); - num_rows_[id] = A->num_rows(); - num_eliminate_blocks_[id] = problem->num_eliminate_blocks; - LinearSolver::Options options; - options.elimination_groups.push_back(num_eliminate_blocks_[id]); - pmv_[id] = PartitionedMatrixViewBase::Create(options, *block_sparse); + options_.num_threads = num_threads; + options_.context = &context_; + options_.elimination_groups.push_back(problem->num_eliminate_blocks); + pmv_ = PartitionedMatrixViewBase::Create(options_, *block_sparse); + + EXPECT_EQ(pmv_->num_col_blocks_e(), problem->num_eliminate_blocks); + EXPECT_EQ(pmv_->num_col_blocks_f(), + block_sparse->block_structure()->cols.size() - + problem->num_eliminate_blocks); + EXPECT_EQ(pmv_->num_cols(), A_->num_cols()); + EXPECT_EQ(pmv_->num_rows(), A_->num_rows()); } double RandDouble() { return distribution_(prng_); } + LinearSolver::Options options_; ContextImpl context_; - int num_rows_[kNumProblems]; - int num_cols_[kNumProblems]; - int num_eliminate_blocks_[kNumProblems]; - std::unique_ptr<SparseMatrix> A_[kNumProblems]; - std::unique_ptr<PartitionedMatrixViewBase> pmv_[kNumProblems]; + std::unique_ptr<LinearLeastSquaresProblem> problem_; + std::unique_ptr<SparseMatrix> A_; + std::unique_ptr<PartitionedMatrixViewBase> pmv_; + int num_cols_e; std::mt19937 prng_; std::uniform_real_distribution<double> distribution_ = std::uniform_real_distribution<double>(0.0, 1.0); }; -TEST_P(PartitionedMatrixViewParallelTest, RightMultiplyAndAccumulateEParallel) { - const int kNumThreads = GetParam(); - for (int p = 0; p < kNumProblems; ++p) { - auto& pmv = pmv_[p]; - auto& A = A_[p]; +TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateE) { + Vector x1(pmv_->num_cols_e()); + Vector x2(pmv_->num_cols()); + x2.setZero(); - Vector x1(pmv->num_cols_e()); - Vector x2(pmv->num_cols()); - x2.setZero(); + for (int i = 0; i < pmv_->num_cols_e(); ++i) { + x1(i) = x2(i) = RandDouble(); + } - for (int i = 0; i < pmv->num_cols_e(); ++i) { - x1(i) = x2(i) = RandDouble(); - } + Vector expected = Vector::Zero(pmv_->num_rows()); + A_->RightMultiplyAndAccumulate(x2.data(), expected.data()); - Vector y1 = Vector::Zero(pmv->num_rows()); - pmv->RightMultiplyAndAccumulateE( - x1.data(), y1.data(), &context_, kNumThreads); + Vector actual = Vector::Zero(pmv_->num_rows()); + pmv_->RightMultiplyAndAccumulateE(x1.data(), actual.data()); - Vector y2 = Vector::Zero(pmv->num_rows()); - A->RightMultiplyAndAccumulate(x2.data(), y2.data()); - - for (int i = 0; i < pmv->num_rows(); ++i) { - EXPECT_NEAR(y1(i), y2(i), kEpsilon); - } + for (int i = 0; i < pmv_->num_rows(); ++i) { + EXPECT_NEAR(actual(i), expected(i), kEpsilon); } } -TEST_P(PartitionedMatrixViewParallelTest, RightMultiplyAndAccumulateFParallel) { - const int kNumThreads = GetParam(); - for (int p = 0; p < kNumProblems; ++p) { - auto& pmv = pmv_[p]; - auto& A = A_[p]; - Vector x1(pmv->num_cols_f()); - Vector x2(pmv->num_cols()); - x2.setZero(); +TEST_P(PartitionedMatrixViewSpMVTest, RightMultiplyAndAccumulateF) { + Vector x1(pmv_->num_cols_f()); + Vector x2(pmv_->num_cols()); + x2.setZero(); - for (int i = 0; i < pmv->num_cols_f(); ++i) { - x1(i) = x2(i + pmv->num_cols_e()) = RandDouble(); - } + for (int i = 0; i < pmv_->num_cols_f(); ++i) { + x1(i) = x2(i + pmv_->num_cols_e()) = RandDouble(); + } - Vector y1 = Vector::Zero(pmv->num_rows()); - pmv->RightMultiplyAndAccumulateF( - x1.data(), y1.data(), &context_, kNumThreads); + Vector actual = Vector::Zero(pmv_->num_rows()); + pmv_->RightMultiplyAndAccumulateF(x1.data(), actual.data()); - Vector y2 = Vector::Zero(pmv->num_rows()); - A->RightMultiplyAndAccumulate(x2.data(), y2.data()); + Vector expected = Vector::Zero(pmv_->num_rows()); + A_->RightMultiplyAndAccumulate(x2.data(), expected.data()); - for (int i = 0; i < pmv->num_rows(); ++i) { - EXPECT_NEAR(y1(i), y2(i), kEpsilon); - } + for (int i = 0; i < pmv_->num_rows(); ++i) { + EXPECT_NEAR(actual(i), expected(i), kEpsilon); } } -TEST_P(PartitionedMatrixViewParallelTest, LeftMultiplyAndAccumulateParallel) { - const int kNumThreads = GetParam(); - for (int p = 0; p < kNumProblems; ++p) { - auto& pmv = pmv_[p]; - auto& A = A_[p]; - Vector x = Vector::Zero(pmv->num_rows()); - for (int i = 0; i < pmv->num_rows(); ++i) { - x(i) = RandDouble(); - } - Vector x_pre = x; +TEST_P(PartitionedMatrixViewSpMVTest, LeftMultiplyAndAccumulate) { + Vector x = Vector::Zero(pmv_->num_rows()); + for (int i = 0; i < pmv_->num_rows(); ++i) { + x(i) = RandDouble(); + } + Vector x_pre = x; - Vector y = Vector::Zero(pmv->num_cols()); - Vector y1 = Vector::Zero(pmv->num_cols_e()); - Vector y2 = Vector::Zero(pmv->num_cols_f()); + Vector expected = Vector::Zero(pmv_->num_cols()); + Vector e_actual = Vector::Zero(pmv_->num_cols_e()); + Vector f_actual = Vector::Zero(pmv_->num_cols_f()); - A->LeftMultiplyAndAccumulate(x.data(), y.data()); - pmv->LeftMultiplyAndAccumulateE( - x.data(), y1.data(), &context_, kNumThreads); - pmv->LeftMultiplyAndAccumulateF( - x.data(), y2.data(), &context_, kNumThreads); + A_->LeftMultiplyAndAccumulate(x.data(), expected.data()); + pmv_->LeftMultiplyAndAccumulateE(x.data(), e_actual.data()); + pmv_->LeftMultiplyAndAccumulateF(x.data(), f_actual.data()); - for (int i = 0; i < pmv->num_cols(); ++i) { - EXPECT_NEAR(y(i), - (i < pmv->num_cols_e()) ? y1(i) : y2(i - pmv->num_cols_e()), - kEpsilon); - } + for (int i = 0; i < pmv_->num_cols(); ++i) { + EXPECT_NEAR(expected(i), + (i < pmv_->num_cols_e()) ? e_actual(i) + : f_actual(i - pmv_->num_cols_e()), + kEpsilon); } } -INSTANTIATE_TEST_SUITE_P(ParallelProducts, - PartitionedMatrixViewParallelTest, - ::testing::Values(1, 2, 4, 8), - ::testing::PrintToStringParamName()); +INSTANTIATE_TEST_SUITE_P( + ParallelProducts, + PartitionedMatrixViewSpMVTest, + ::testing::Combine(::testing::Values(2, 4, 6), + ::testing::Values(1, 2, 3, 4, 5, 6, 7, 8)), + ParamInfoToString); } // namespace internal } // namespace ceres