| // Ceres Solver - A fast non-linear least squares minimizer | 
 | // Copyright 2022 Google Inc. All rights reserved. | 
 | // http://ceres-solver.org/ | 
 | // | 
 | // Redistribution and use in source and binary forms, with or without | 
 | // modification, are permitted provided that the following conditions are met: | 
 | // | 
 | // * Redistributions of source code must retain the above copyright notice, | 
 | //   this list of conditions and the following disclaimer. | 
 | // * Redistributions in binary form must reproduce the above copyright notice, | 
 | //   this list of conditions and the following disclaimer in the documentation | 
 | //   and/or other materials provided with the distribution. | 
 | // * Neither the name of Google Inc. nor the names of its contributors may be | 
 | //   used to endorse or promote products derived from this software without | 
 | //   specific prior written permission. | 
 | // | 
 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 | // POSSIBILITY OF SUCH DAMAGE. | 
 | // | 
 | // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) | 
 |  | 
 | #include <memory> | 
 | #include <random> | 
 | #include <string> | 
 | #include <vector> | 
 |  | 
 | #include "benchmark/benchmark.h" | 
 | #include "ceres/block_sparse_matrix.h" | 
 | #include "ceres/bundle_adjustment_test_util.h" | 
 | #include "ceres/cuda_sparse_matrix.h" | 
 | #include "ceres/cuda_vector.h" | 
 | #include "ceres/evaluator.h" | 
 | #include "ceres/implicit_schur_complement.h" | 
 | #include "ceres/partitioned_matrix_view.h" | 
 | #include "ceres/preprocessor.h" | 
 | #include "ceres/problem.h" | 
 | #include "ceres/problem_impl.h" | 
 | #include "ceres/program.h" | 
 | #include "ceres/sparse_matrix.h" | 
 |  | 
 | namespace ceres::internal { | 
 |  | 
 | template <typename Derived, typename Base> | 
 | std::unique_ptr<Derived> downcast_unique_ptr(std::unique_ptr<Base>& base) { | 
 |   return std::unique_ptr<Derived>(dynamic_cast<Derived*>(base.release())); | 
 | } | 
 |  | 
 | // Benchmark library might invoke benchmark function multiple times. | 
 | // In order to save time required to parse BAL data, we ensure that | 
 | // each dataset is being loaded at most once. | 
 | // Each type of jacobians is also cached after first creation | 
 | struct BALData { | 
 |   using PartitionedView = PartitionedMatrixView<2, 3, 9>; | 
 |   explicit BALData(const std::string& path) { | 
 |     bal_problem = std::make_unique<BundleAdjustmentProblem>(path); | 
 |     CHECK(bal_problem != nullptr); | 
 |  | 
 |     auto problem_impl = bal_problem->mutable_problem()->mutable_impl(); | 
 |     auto preprocessor = Preprocessor::Create(MinimizerType::TRUST_REGION); | 
 |  | 
 |     preprocessed_problem = std::make_unique<PreprocessedProblem>(); | 
 |     Solver::Options options = bal_problem->options(); | 
 |     options.linear_solver_type = ITERATIVE_SCHUR; | 
 |     CHECK(preprocessor->Preprocess( | 
 |         options, problem_impl, preprocessed_problem.get())); | 
 |  | 
 |     auto program = preprocessed_problem->reduced_program.get(); | 
 |  | 
 |     parameters.resize(program->NumParameters()); | 
 |     program->ParameterBlocksToStateVector(parameters.data()); | 
 |  | 
 |     const int num_residuals = program->NumResiduals(); | 
 |     b.resize(num_residuals); | 
 |  | 
 |     std::mt19937 rng; | 
 |     std::normal_distribution<double> rnorm; | 
 |     for (int i = 0; i < num_residuals; ++i) { | 
 |       b[i] = rnorm(rng); | 
 |     } | 
 |  | 
 |     const int num_parameters = program->NumParameters(); | 
 |     D.resize(num_parameters); | 
 |     for (int i = 0; i < num_parameters; ++i) { | 
 |       D[i] = rnorm(rng); | 
 |     } | 
 |   } | 
 |  | 
 |   std::unique_ptr<BlockSparseMatrix> CreateBlockSparseJacobian( | 
 |       ContextImpl* context) { | 
 |     auto problem = bal_problem->mutable_problem(); | 
 |     auto problem_impl = problem->mutable_impl(); | 
 |     CHECK(problem_impl != nullptr); | 
 |  | 
 |     Evaluator::Options options; | 
 |     options.linear_solver_type = ITERATIVE_SCHUR; | 
 |     options.num_threads = 1; | 
 |     options.context = context; | 
 |     options.num_eliminate_blocks = bal_problem->num_points(); | 
 |  | 
 |     std::string error; | 
 |     auto program = preprocessed_problem->reduced_program.get(); | 
 |     auto evaluator = Evaluator::Create(options, program, &error); | 
 |     CHECK(evaluator != nullptr); | 
 |  | 
 |     auto jacobian = evaluator->CreateJacobian(); | 
 |     auto block_sparse = downcast_unique_ptr<BlockSparseMatrix>(jacobian); | 
 |     CHECK(block_sparse != nullptr); | 
 |  | 
 |     std::mt19937 rng; | 
 |     std::normal_distribution<double> rnorm; | 
 |     const int nnz = block_sparse->num_nonzeros(); | 
 |     auto values = block_sparse->mutable_values(); | 
 |     for (int i = 0; i < nnz; ++i) { | 
 |       values[i] = rnorm(rng); | 
 |     } | 
 |     return block_sparse; | 
 |   } | 
 |  | 
 |   std::unique_ptr<CompressedRowSparseMatrix> CreateCompressedRowSparseJacobian( | 
 |       ContextImpl* context) { | 
 |     auto block_sparse = BlockSparseJacobian(context); | 
 |     auto crs_jacobian = std::make_unique<CompressedRowSparseMatrix>( | 
 |         block_sparse->num_rows(), | 
 |         block_sparse->num_cols(), | 
 |         block_sparse->num_nonzeros()); | 
 |  | 
 |     block_sparse->ToCompressedRowSparseMatrix(crs_jacobian.get()); | 
 |     return crs_jacobian; | 
 |   } | 
 |  | 
 |   const BlockSparseMatrix* BlockSparseJacobian(ContextImpl* context) { | 
 |     if (!block_sparse_jacobian) { | 
 |       block_sparse_jacobian = CreateBlockSparseJacobian(context); | 
 |     } | 
 |     return block_sparse_jacobian.get(); | 
 |   } | 
 |  | 
 |   const CompressedRowSparseMatrix* CompressedRowSparseJacobian( | 
 |       ContextImpl* context) { | 
 |     if (!crs_jacobian) { | 
 |       crs_jacobian = CreateCompressedRowSparseJacobian(context); | 
 |     } | 
 |     return crs_jacobian.get(); | 
 |   } | 
 |  | 
 |   std::unique_ptr<PartitionedView> PartitionedMatrixViewJacobian( | 
 |       const LinearSolver::Options& options) { | 
 |     auto block_sparse = BlockSparseJacobian(options.context); | 
 |     return std::make_unique<PartitionedView>(options, *block_sparse); | 
 |   } | 
 |  | 
 |   BlockSparseMatrix* BlockDiagonalEtE(const LinearSolver::Options& options) { | 
 |     if (!block_diagonal_ete) { | 
 |       auto partitioned_view = PartitionedMatrixViewJacobian(options); | 
 |       block_diagonal_ete = partitioned_view->CreateBlockDiagonalEtE(); | 
 |     } | 
 |     return block_diagonal_ete.get(); | 
 |   } | 
 |  | 
 |   BlockSparseMatrix* BlockDiagonalFtF(const LinearSolver::Options& options) { | 
 |     if (!block_diagonal_ftf) { | 
 |       auto partitioned_view = PartitionedMatrixViewJacobian(options); | 
 |       block_diagonal_ftf = partitioned_view->CreateBlockDiagonalFtF(); | 
 |     } | 
 |     return block_diagonal_ftf.get(); | 
 |   } | 
 |  | 
 |   const ImplicitSchurComplement* ImplicitSchurComplementWithoutDiagonal( | 
 |       const LinearSolver::Options& options) { | 
 |     auto block_sparse = BlockSparseJacobian(options.context); | 
 |     implicit_schur_complement = | 
 |         std::make_unique<ImplicitSchurComplement>(options); | 
 |     implicit_schur_complement->Init(*block_sparse, nullptr, b.data()); | 
 |     return implicit_schur_complement.get(); | 
 |   } | 
 |  | 
 |   const ImplicitSchurComplement* ImplicitSchurComplementWithDiagonal( | 
 |       const LinearSolver::Options& options) { | 
 |     auto block_sparse = BlockSparseJacobian(options.context); | 
 |     implicit_schur_complement_diag = | 
 |         std::make_unique<ImplicitSchurComplement>(options); | 
 |     implicit_schur_complement_diag->Init(*block_sparse, D.data(), b.data()); | 
 |     return implicit_schur_complement_diag.get(); | 
 |   } | 
 |  | 
 |   Vector parameters; | 
 |   Vector D; | 
 |   Vector b; | 
 |   std::unique_ptr<BundleAdjustmentProblem> bal_problem; | 
 |   std::unique_ptr<PreprocessedProblem> preprocessed_problem; | 
 |   std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian; | 
 |   std::unique_ptr<CompressedRowSparseMatrix> crs_jacobian; | 
 |   std::unique_ptr<BlockSparseMatrix> block_diagonal_ete; | 
 |   std::unique_ptr<BlockSparseMatrix> block_diagonal_ftf; | 
 |   std::unique_ptr<ImplicitSchurComplement> implicit_schur_complement; | 
 |   std::unique_ptr<ImplicitSchurComplement> implicit_schur_complement_diag; | 
 | }; | 
 |  | 
 | static void Residuals(benchmark::State& state, | 
 |                       BALData* data, | 
 |                       ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   Evaluator::Options options; | 
 |   options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; | 
 |   options.num_threads = num_threads; | 
 |   options.context = context; | 
 |   options.num_eliminate_blocks = 0; | 
 |  | 
 |   std::string error; | 
 |   CHECK(data->preprocessed_problem != nullptr); | 
 |   auto program = data->preprocessed_problem->reduced_program.get(); | 
 |   CHECK(program != nullptr); | 
 |   auto evaluator = Evaluator::Create(options, program, &error); | 
 |   CHECK(evaluator != nullptr); | 
 |  | 
 |   double cost = 0.; | 
 |   Vector residuals = Vector::Zero(program->NumResiduals()); | 
 |  | 
 |   Evaluator::EvaluateOptions eval_options; | 
 |   for (auto _ : state) { | 
 |     CHECK(evaluator->Evaluate(eval_options, | 
 |                               data->parameters.data(), | 
 |                               &cost, | 
 |                               residuals.data(), | 
 |                               nullptr, | 
 |                               nullptr)); | 
 |   } | 
 | } | 
 |  | 
 | static void ResidualsAndJacobian(benchmark::State& state, | 
 |                                  BALData* data, | 
 |                                  ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   Evaluator::Options options; | 
 |   options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; | 
 |   options.num_threads = num_threads; | 
 |   options.context = context; | 
 |   options.num_eliminate_blocks = 0; | 
 |  | 
 |   std::string error; | 
 |   CHECK(data->preprocessed_problem != nullptr); | 
 |   auto program = data->preprocessed_problem->reduced_program.get(); | 
 |   CHECK(program != nullptr); | 
 |   auto evaluator = Evaluator::Create(options, program, &error); | 
 |   CHECK(evaluator != nullptr); | 
 |  | 
 |   double cost = 0.; | 
 |   Vector residuals = Vector::Zero(program->NumResiduals()); | 
 |   auto jacobian = evaluator->CreateJacobian(); | 
 |  | 
 |   Evaluator::EvaluateOptions eval_options; | 
 |   for (auto _ : state) { | 
 |     CHECK(evaluator->Evaluate(eval_options, | 
 |                               data->parameters.data(), | 
 |                               &cost, | 
 |                               residuals.data(), | 
 |                               nullptr, | 
 |                               jacobian.get())); | 
 |   } | 
 | } | 
 |  | 
 | static void Plus(benchmark::State& state, BALData* data, ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   Evaluator::Options options; | 
 |   options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; | 
 |   options.num_threads = num_threads; | 
 |   options.context = context; | 
 |   options.num_eliminate_blocks = 0; | 
 |  | 
 |   std::string error; | 
 |   CHECK(data->preprocessed_problem != nullptr); | 
 |   auto program = data->preprocessed_problem->reduced_program.get(); | 
 |   CHECK(program != nullptr); | 
 |   auto evaluator = Evaluator::Create(options, program, &error); | 
 |   CHECK(evaluator != nullptr); | 
 |  | 
 |   Vector state_plus_delta = Vector::Zero(program->NumParameters()); | 
 |   Vector delta = Vector::Random(program->NumEffectiveParameters()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     CHECK(evaluator->Plus( | 
 |         data->parameters.data(), delta.data(), state_plus_delta.data())); | 
 |   } | 
 |   CHECK_GT(state_plus_delta.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void PMVRightMultiplyAndAccumulateF(benchmark::State& state, | 
 |                                            BALData* data, | 
 |                                            ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_rows()); | 
 |   Vector x = Vector::Random(jacobian->num_cols_f()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulateF(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void PMVLeftMultiplyAndAccumulateF(benchmark::State& state, | 
 |                                           BALData* data, | 
 |                                           ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_cols_f()); | 
 |   Vector x = Vector::Random(jacobian->num_rows()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->LeftMultiplyAndAccumulateF(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void PMVRightMultiplyAndAccumulateE(benchmark::State& state, | 
 |                                            BALData* data, | 
 |                                            ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_rows()); | 
 |   Vector x = Vector::Random(jacobian->num_cols_e()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulateE(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void PMVLeftMultiplyAndAccumulateE(benchmark::State& state, | 
 |                                           BALData* data, | 
 |                                           ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_cols_e()); | 
 |   Vector x = Vector::Random(jacobian->num_rows()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->LeftMultiplyAndAccumulateE(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void PMVUpdateBlockDiagonalEtE(benchmark::State& state, | 
 |                                       BALData* data, | 
 |                                       ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |   auto block_diagonal_ete = data->BlockDiagonalEtE(options); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->UpdateBlockDiagonalEtE(block_diagonal_ete); | 
 |   } | 
 | } | 
 |  | 
 | static void PMVUpdateBlockDiagonalFtF(benchmark::State& state, | 
 |                                       BALData* data, | 
 |                                       ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->PartitionedMatrixViewJacobian(options); | 
 |   auto block_diagonal_ftf = data->BlockDiagonalFtF(options); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->UpdateBlockDiagonalFtF(block_diagonal_ftf); | 
 |   } | 
 | } | 
 |  | 
 | static void ISCRightMultiplyNoDiag(benchmark::State& state, | 
 |                                    BALData* data, | 
 |                                    ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |   auto jacobian = data->ImplicitSchurComplementWithoutDiagonal(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_rows()); | 
 |   Vector x = Vector::Random(jacobian->num_cols()); | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void ISCRightMultiplyDiag(benchmark::State& state, | 
 |                                  BALData* data, | 
 |                                  ContextImpl* context) { | 
 |   LinearSolver::Options options; | 
 |   options.num_threads = static_cast<int>(state.range(0)); | 
 |   options.elimination_groups.push_back(data->bal_problem->num_points()); | 
 |   options.context = context; | 
 |  | 
 |   auto jacobian = data->ImplicitSchurComplementWithDiagonal(options); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_rows()); | 
 |   Vector x = Vector::Random(jacobian->num_cols()); | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void JacobianSquaredColumnNorm(benchmark::State& state, | 
 |                                       BALData* data, | 
 |                                       ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   auto jacobian = data->BlockSparseJacobian(context); | 
 |  | 
 |   Vector x = Vector::Zero(jacobian->num_cols()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->SquaredColumnNorm(x.data(), context, num_threads); | 
 |   } | 
 |   CHECK_GT(x.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void JacobianScaleColumns(benchmark::State& state, | 
 |                                  BALData* data, | 
 |                                  ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   auto jacobian_const = data->BlockSparseJacobian(context); | 
 |   auto jacobian = const_cast<BlockSparseMatrix*>(jacobian_const); | 
 |  | 
 |   Vector x = Vector::Ones(jacobian->num_cols()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->ScaleColumns(x.data(), context, num_threads); | 
 |   } | 
 | } | 
 |  | 
 | static void JacobianRightMultiplyAndAccumulate(benchmark::State& state, | 
 |                                                BALData* data, | 
 |                                                ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   auto jacobian = data->BlockSparseJacobian(context); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_rows()); | 
 |   Vector x = Vector::Random(jacobian->num_cols()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulate( | 
 |         x.data(), y.data(), context, num_threads); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | static void JacobianLeftMultiplyAndAccumulate(benchmark::State& state, | 
 |                                               BALData* data, | 
 |                                               ContextImpl* context) { | 
 |   const int num_threads = static_cast<int>(state.range(0)); | 
 |  | 
 |   auto jacobian = data->BlockSparseJacobian(context); | 
 |  | 
 |   Vector y = Vector::Zero(jacobian->num_cols()); | 
 |   Vector x = Vector::Random(jacobian->num_rows()); | 
 |  | 
 |   for (auto _ : state) { | 
 |     jacobian->LeftMultiplyAndAccumulate( | 
 |         x.data(), y.data(), context, num_threads); | 
 |   } | 
 |   CHECK_GT(y.squaredNorm(), 0.); | 
 | } | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 | static void JacobianRightMultiplyAndAccumulateCuda(benchmark::State& state, | 
 |                                                    BALData* data, | 
 |                                                    ContextImpl* context) { | 
 |   auto crs_jacobian = data->CompressedRowSparseJacobian(context); | 
 |   CudaSparseMatrix cuda_jacobian(context, *crs_jacobian); | 
 |   CudaVector cuda_x(context, 0); | 
 |   CudaVector cuda_y(context, 0); | 
 |  | 
 |   Vector x(crs_jacobian->num_cols()); | 
 |   Vector y(crs_jacobian->num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | static void JacobianLeftMultiplyAndAccumulateCuda(benchmark::State& state, | 
 |                                                   BALData* data, | 
 |                                                   ContextImpl* context) { | 
 |   auto crs_jacobian = data->CompressedRowSparseJacobian(context); | 
 |   CudaSparseMatrix cuda_jacobian(context, *crs_jacobian); | 
 |   CudaVector cuda_x(context, 0); | 
 |   CudaVector cuda_y(context, 0); | 
 |  | 
 |   Vector x(crs_jacobian->num_rows()); | 
 |   Vector y(crs_jacobian->num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 | #endif | 
 |  | 
 | }  // namespace ceres::internal | 
 |  | 
 | // Older versions of benchmark library might come without ::benchmark::Shutdown | 
 | // function. We provide an empty fallback variant of Shutdown function in | 
 | // order to support both older and newer versions | 
 | namespace benchmark_shutdown_fallback { | 
 | template <typename... Args> | 
 | void Shutdown(Args... args) {} | 
 | };  // namespace benchmark_shutdown_fallback | 
 |  | 
 | int main(int argc, char** argv) { | 
 |   ::benchmark::Initialize(&argc, argv); | 
 |  | 
 |   std::vector<std::unique_ptr<ceres::internal::BALData>> benchmark_data; | 
 |   if (argc == 1) { | 
 |     LOG(FATAL) << "No input datasets specified. Usage: " << argv[0] | 
 |                << " [benchmark flags] path_to_BAL_data_1.txt ... " | 
 |                   "path_to_BAL_data_N.txt"; | 
 |     return -1; | 
 |   } | 
 |  | 
 |   ceres::internal::ContextImpl context; | 
 |   context.EnsureMinimumThreads(16); | 
 | #ifndef CERES_NO_CUDA | 
 |   std::string message; | 
 |   context.InitCuda(&message); | 
 | #endif | 
 |  | 
 |   for (int i = 1; i < argc; ++i) { | 
 |     const std::string path(argv[i]); | 
 |     const std::string name_residuals = "Residuals<" + path + ">"; | 
 |     benchmark_data.emplace_back( | 
 |         std::make_unique<ceres::internal::BALData>(path)); | 
 |     auto data = benchmark_data.back().get(); | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_residuals.c_str(), ceres::internal::Residuals, data, &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_jacobians = "ResidualsAndJacobian<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_jacobians.c_str(), | 
 |                                    ceres::internal::ResidualsAndJacobian, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_plus = "Plus<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_plus.c_str(), ceres::internal::Plus, data, &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_right_product = | 
 |         "JacobianRightMultiplyAndAccumulate<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_right_product.c_str(), | 
 |         ceres::internal::JacobianRightMultiplyAndAccumulate, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_right_product_partitioned_f = | 
 |         "PMVRightMultiplyAndAccumulateF<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_right_product_partitioned_f.c_str(), | 
 |         ceres::internal::PMVRightMultiplyAndAccumulateF, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_right_product_partitioned_e = | 
 |         "PMVRightMultiplyAndAccumulateE<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_right_product_partitioned_e.c_str(), | 
 |         ceres::internal::PMVRightMultiplyAndAccumulateE, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_update_block_diagonal_ftf = | 
 |         "PMVUpdateBlockDiagonalFtF<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_update_block_diagonal_ftf.c_str(), | 
 |                                    ceres::internal::PMVUpdateBlockDiagonalFtF, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_isc_no_diag = | 
 |         "ISCRightMultiplyAndAccumulate<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_isc_no_diag.c_str(), | 
 |                                    ceres::internal::ISCRightMultiplyNoDiag, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_update_block_diagonal_ete = | 
 |         "PMVUpdateBlockDiagonalEtE<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_update_block_diagonal_ete.c_str(), | 
 |                                    ceres::internal::PMVUpdateBlockDiagonalEtE, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |     const std::string name_isc_diag = | 
 |         "ISCRightMultiplyAndAccumulateDiag<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_isc_diag.c_str(), | 
 |                                    ceres::internal::ISCRightMultiplyDiag, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 |     const std::string name_right_product_cuda = | 
 |         "JacobianRightMultiplyAndAccumulateCuda<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_right_product_cuda.c_str(), | 
 |         ceres::internal::JacobianRightMultiplyAndAccumulateCuda, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1); | 
 | #endif | 
 |  | 
 |     const std::string name_left_product = | 
 |         "JacobianLeftMultiplyAndAccumulate<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_left_product.c_str(), | 
 |         ceres::internal::JacobianLeftMultiplyAndAccumulate, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_left_product_partitioned_f = | 
 |         "PMVLeftMultiplyAndAccumulateF<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_left_product_partitioned_f.c_str(), | 
 |         ceres::internal::PMVLeftMultiplyAndAccumulateF, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_left_product_partitioned_e = | 
 |         "PMVLeftMultiplyAndAccumulateE<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_left_product_partitioned_e.c_str(), | 
 |         ceres::internal::PMVLeftMultiplyAndAccumulateE, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 |     const std::string name_left_product_cuda = | 
 |         "JacobianLeftMultiplyAndAccumulateCuda<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark( | 
 |         name_left_product_cuda.c_str(), | 
 |         ceres::internal::JacobianLeftMultiplyAndAccumulateCuda, | 
 |         data, | 
 |         &context) | 
 |         ->Arg(1); | 
 | #endif | 
 |  | 
 |     const std::string name_squared_column_norm = | 
 |         "JacobianSquaredColumnNorm<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_squared_column_norm.c_str(), | 
 |                                    ceres::internal::JacobianSquaredColumnNorm, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |  | 
 |     const std::string name_scale_columns = "JacobianScaleColumns<" + path + ">"; | 
 |     ::benchmark::RegisterBenchmark(name_scale_columns.c_str(), | 
 |                                    ceres::internal::JacobianScaleColumns, | 
 |                                    data, | 
 |                                    &context) | 
 |         ->Arg(1) | 
 |         ->Arg(2) | 
 |         ->Arg(4) | 
 |         ->Arg(8) | 
 |         ->Arg(16); | 
 |   } | 
 |   ::benchmark::RunSpecifiedBenchmarks(); | 
 |  | 
 |   using namespace ::benchmark; | 
 |   using namespace benchmark_shutdown_fallback; | 
 |   Shutdown(); | 
 |   return 0; | 
 | } |