|  | // Ceres Solver - A fast non-linear least squares minimizer | 
|  | // Copyright 2022 Google Inc. All rights reserved. | 
|  | // http://ceres-solver.org/ | 
|  | // | 
|  | // Redistribution and use in source and binary forms, with or without | 
|  | // modification, are permitted provided that the following conditions are met: | 
|  | // | 
|  | // * Redistributions of source code must retain the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer. | 
|  | // * Redistributions in binary form must reproduce the above copyright notice, | 
|  | //   this list of conditions and the following disclaimer in the documentation | 
|  | //   and/or other materials provided with the distribution. | 
|  | // * Neither the name of Google Inc. nor the names of its contributors may be | 
|  | //   used to endorse or promote products derived from this software without | 
|  | //   specific prior written permission. | 
|  | // | 
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|  | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|  | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|  | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|  | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|  | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|  | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|  | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|  | // POSSIBILITY OF SUCH DAMAGE. | 
|  | // | 
|  | // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) | 
|  |  | 
|  | #include <memory> | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | #include "benchmark/benchmark.h" | 
|  | #include "ceres/block_sparse_matrix.h" | 
|  | #include "ceres/bundle_adjustment_test_util.h" | 
|  | #include "ceres/cuda_sparse_matrix.h" | 
|  | #include "ceres/cuda_vector.h" | 
|  | #include "ceres/evaluator.h" | 
|  | #include "ceres/problem.h" | 
|  | #include "ceres/problem_impl.h" | 
|  | #include "ceres/program.h" | 
|  | #include "ceres/sparse_matrix.h" | 
|  | #include "gflags/gflags.h" | 
|  |  | 
|  | namespace ceres::internal { | 
|  |  | 
|  | template <typename Derived, typename Base> | 
|  | std::unique_ptr<Derived> downcast_unique_ptr(std::unique_ptr<Base>& base) { | 
|  | return std::unique_ptr<Derived>(dynamic_cast<Derived*>(base.release())); | 
|  | } | 
|  |  | 
|  | // Benchmark library might invoke benchmark function multiple times. | 
|  | // In order to save time required to parse BAL data, we ensure that | 
|  | // each dataset is being loaded at most once. | 
|  | // Each type of jacobians is also cached after first creation | 
|  | struct BALData { | 
|  | explicit BALData(const std::string& path) { | 
|  | bal_problem = std::make_unique<BundleAdjustmentProblem>(path); | 
|  | CHECK(bal_problem != nullptr); | 
|  |  | 
|  | auto problem_impl = bal_problem->mutable_problem()->mutable_impl(); | 
|  | auto program = problem_impl->mutable_program(); | 
|  | parameters.resize(program->NumParameters()); | 
|  | program->ParameterBlocksToStateVector(parameters.data()); | 
|  | } | 
|  |  | 
|  | std::unique_ptr<BlockSparseMatrix> CreateBlockSparseJacobian( | 
|  | ContextImpl* context) { | 
|  | auto problem = bal_problem->mutable_problem(); | 
|  | auto problem_impl = problem->mutable_impl(); | 
|  | CHECK(problem_impl != nullptr); | 
|  |  | 
|  | Evaluator::Options options; | 
|  | options.linear_solver_type = ITERATIVE_SCHUR; | 
|  | options.num_threads = 1; | 
|  | options.context = context; | 
|  | options.num_eliminate_blocks = 0; | 
|  |  | 
|  | std::string error; | 
|  | auto program = problem_impl->mutable_program(); | 
|  | auto evaluator = Evaluator::Create(options, program, &error); | 
|  | CHECK(evaluator != nullptr); | 
|  |  | 
|  | auto jacobian = evaluator->CreateJacobian(); | 
|  | auto block_sparse = downcast_unique_ptr<BlockSparseMatrix>(jacobian); | 
|  | CHECK(block_sparse != nullptr); | 
|  |  | 
|  | std::mt19937 rng; | 
|  | std::normal_distribution<double> rnorm; | 
|  | const int nnz = block_sparse->num_nonzeros(); | 
|  | auto values = block_sparse->mutable_values(); | 
|  | for (int i = 0; i < nnz; ++i) { | 
|  | values[i] = rnorm(rng); | 
|  | } | 
|  | return block_sparse; | 
|  | } | 
|  |  | 
|  | std::unique_ptr<CompressedRowSparseMatrix> CreateCompressedRowSparseJacobian( | 
|  | ContextImpl* context) { | 
|  | auto block_sparse = BlockSparseJacobian(context); | 
|  | auto crs_jacobian = std::make_unique<CompressedRowSparseMatrix>( | 
|  | block_sparse->num_rows(), | 
|  | block_sparse->num_cols(), | 
|  | block_sparse->num_nonzeros()); | 
|  |  | 
|  | block_sparse->ToCompressedRowSparseMatrix(crs_jacobian.get()); | 
|  | return crs_jacobian; | 
|  | } | 
|  |  | 
|  | const BlockSparseMatrix* BlockSparseJacobian(ContextImpl* context) { | 
|  | if (!block_sparse_jacobian) { | 
|  | block_sparse_jacobian = CreateBlockSparseJacobian(context); | 
|  | } | 
|  | return block_sparse_jacobian.get(); | 
|  | } | 
|  |  | 
|  | const CompressedRowSparseMatrix* CompressedRowSparseJacobian( | 
|  | ContextImpl* context) { | 
|  | if (!crs_jacobian) { | 
|  | crs_jacobian = CreateCompressedRowSparseJacobian(context); | 
|  | } | 
|  | return crs_jacobian.get(); | 
|  | } | 
|  |  | 
|  | Vector parameters; | 
|  | std::unique_ptr<BundleAdjustmentProblem> bal_problem; | 
|  | std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian; | 
|  | std::unique_ptr<CompressedRowSparseMatrix> crs_jacobian; | 
|  | }; | 
|  |  | 
|  | static void Residuals(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | auto problem = data->bal_problem->mutable_problem(); | 
|  | auto problem_impl = problem->mutable_impl(); | 
|  | CHECK(problem_impl != nullptr); | 
|  | const int num_threads = state.range(0); | 
|  |  | 
|  | Evaluator::Options options; | 
|  | options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; | 
|  | options.num_threads = num_threads; | 
|  | options.context = context; | 
|  | options.num_eliminate_blocks = 0; | 
|  |  | 
|  | std::string error; | 
|  | auto program = problem_impl->mutable_program(); | 
|  | auto evaluator = Evaluator::Create(options, program, &error); | 
|  | CHECK(evaluator != nullptr); | 
|  |  | 
|  | double cost = 0.; | 
|  | Vector residuals = Vector::Zero(program->NumResiduals()); | 
|  |  | 
|  | Evaluator::EvaluateOptions eval_options; | 
|  | for (auto _ : state) { | 
|  | CHECK(evaluator->Evaluate(eval_options, | 
|  | data->parameters.data(), | 
|  | &cost, | 
|  | residuals.data(), | 
|  | nullptr, | 
|  | nullptr)); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void ResidualsAndJacobian(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | auto problem = data->bal_problem->mutable_problem(); | 
|  | auto problem_impl = problem->mutable_impl(); | 
|  | CHECK(problem_impl != nullptr); | 
|  | const int num_threads = state.range(0); | 
|  |  | 
|  | Evaluator::Options options; | 
|  | options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; | 
|  | options.num_threads = num_threads; | 
|  | options.context = context; | 
|  | options.num_eliminate_blocks = 0; | 
|  |  | 
|  | std::string error; | 
|  | auto program = problem_impl->mutable_program(); | 
|  | auto evaluator = Evaluator::Create(options, program, &error); | 
|  | CHECK(evaluator != nullptr); | 
|  |  | 
|  | double cost = 0.; | 
|  | Vector residuals = Vector::Zero(program->NumResiduals()); | 
|  | auto jacobian = evaluator->CreateJacobian(); | 
|  |  | 
|  | Evaluator::EvaluateOptions eval_options; | 
|  | for (auto _ : state) { | 
|  | CHECK(evaluator->Evaluate(eval_options, | 
|  | data->parameters.data(), | 
|  | &cost, | 
|  | residuals.data(), | 
|  | nullptr, | 
|  | jacobian.get())); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void JacobianRightMultiplyAndAccumulate(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | const int num_threads = state.range(0); | 
|  |  | 
|  | auto jacobian = data->BlockSparseJacobian(context); | 
|  |  | 
|  | Vector y = Vector::Zero(jacobian->num_rows()); | 
|  | Vector x = Vector::Random(jacobian->num_cols()); | 
|  |  | 
|  | for (auto _ : state) { | 
|  | jacobian->RightMultiplyAndAccumulate( | 
|  | x.data(), y.data(), context, num_threads); | 
|  | } | 
|  | CHECK_GT(y.squaredNorm(), 0.); | 
|  | } | 
|  |  | 
|  | static void JacobianLeftMultiplyAndAccumulate(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | auto jacobian = data->BlockSparseJacobian(context); | 
|  |  | 
|  | Vector y = Vector::Zero(jacobian->num_cols()); | 
|  | Vector x = Vector::Random(jacobian->num_rows()); | 
|  |  | 
|  | for (auto _ : state) { | 
|  | jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); | 
|  | } | 
|  | CHECK_GT(y.squaredNorm(), 0.); | 
|  | } | 
|  |  | 
|  | #ifndef CERES_NO_CUDA | 
|  | static void JacobianRightMultiplyAndAccumulateCuda(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | auto crs_jacobian = data->CompressedRowSparseJacobian(context); | 
|  | CudaSparseMatrix cuda_jacobian(context, *crs_jacobian); | 
|  | CudaVector cuda_x(context, 0); | 
|  | CudaVector cuda_y(context, 0); | 
|  |  | 
|  | Vector x(crs_jacobian->num_cols()); | 
|  | Vector y(crs_jacobian->num_rows()); | 
|  | x.setRandom(); | 
|  | y.setRandom(); | 
|  |  | 
|  | cuda_x.CopyFromCpu(x); | 
|  | cuda_y.CopyFromCpu(y); | 
|  | double sum = 0; | 
|  | for (auto _ : state) { | 
|  | cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); | 
|  | sum += cuda_y.Norm(); | 
|  | CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
|  | } | 
|  | CHECK_NE(sum, 0.0); | 
|  | } | 
|  |  | 
|  | static void JacobianLeftMultiplyAndAccumulateCuda(benchmark::State& state, | 
|  | BALData* data, | 
|  | ContextImpl* context) { | 
|  | auto crs_jacobian = data->CompressedRowSparseJacobian(context); | 
|  | CudaSparseMatrix cuda_jacobian(context, *crs_jacobian); | 
|  | CudaVector cuda_x(context, 0); | 
|  | CudaVector cuda_y(context, 0); | 
|  |  | 
|  | Vector x(crs_jacobian->num_rows()); | 
|  | Vector y(crs_jacobian->num_cols()); | 
|  | x.setRandom(); | 
|  | y.setRandom(); | 
|  |  | 
|  | cuda_x.CopyFromCpu(x); | 
|  | cuda_y.CopyFromCpu(y); | 
|  | double sum = 0; | 
|  | for (auto _ : state) { | 
|  | cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); | 
|  | sum += cuda_y.Norm(); | 
|  | CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
|  | } | 
|  | CHECK_NE(sum, 0.0); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | }  // namespace ceres::internal | 
|  |  | 
|  | // Older versions of benchmark library might come without ::benchmark::Shutdown | 
|  | // function. We provide an empty fallback variant of Shutdown function in | 
|  | // order to support both older and newer versions | 
|  | namespace benchmark_shutdown_fallback { | 
|  | template <typename... Args> | 
|  | void Shutdown(Args... args) {} | 
|  | };  // namespace benchmark_shutdown_fallback | 
|  |  | 
|  | int main(int argc, char** argv) { | 
|  | ::benchmark::Initialize(&argc, argv); | 
|  |  | 
|  | std::vector<std::unique_ptr<ceres::internal::BALData>> benchmark_data; | 
|  | if (argc == 1) { | 
|  | LOG(FATAL) << "No input datasets specified. Usage: " << argv[0] | 
|  | << " [benchmark flags] path_to_BAL_data_1.txt ... " | 
|  | "path_to_BAL_data_N.txt"; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | ceres::internal::ContextImpl context; | 
|  | context.EnsureMinimumThreads(16); | 
|  | #ifndef CERES_NO_CUDA | 
|  | std::string message; | 
|  | context.InitCuda(&message); | 
|  | #endif | 
|  |  | 
|  | for (int i = 1; i < argc; ++i) { | 
|  | const std::string path(argv[i]); | 
|  | const std::string name_residuals = "Residuals<" + path + ">"; | 
|  | benchmark_data.emplace_back( | 
|  | std::make_unique<ceres::internal::BALData>(path)); | 
|  | auto data = benchmark_data.back().get(); | 
|  | ::benchmark::RegisterBenchmark( | 
|  | name_residuals.c_str(), ceres::internal::Residuals, data, &context) | 
|  | ->Arg(1) | 
|  | ->Arg(2) | 
|  | ->Arg(4) | 
|  | ->Arg(8) | 
|  | ->Arg(16); | 
|  | const std::string name_jacobians = "ResidualsAndJacobian<" + path + ">"; | 
|  | ::benchmark::RegisterBenchmark(name_jacobians.c_str(), | 
|  | ceres::internal::ResidualsAndJacobian, | 
|  | data, | 
|  | &context) | 
|  | ->Arg(1) | 
|  | ->Arg(2) | 
|  | ->Arg(4) | 
|  | ->Arg(8) | 
|  | ->Arg(16); | 
|  | const std::string name_right_product = | 
|  | "JacobianRightMultiplyAndAccumulate<" + path + ">"; | 
|  | ::benchmark::RegisterBenchmark( | 
|  | name_right_product.c_str(), | 
|  | ceres::internal::JacobianRightMultiplyAndAccumulate, | 
|  | data, | 
|  | &context) | 
|  | ->Arg(1) | 
|  | ->Arg(2) | 
|  | ->Arg(4) | 
|  | ->Arg(8) | 
|  | ->Arg(16); | 
|  |  | 
|  | #ifndef CERES_NO_CUDA | 
|  | const std::string name_right_product_cuda = | 
|  | "JacobianRightMultiplyAndAccumulateCuda<" + path + ">"; | 
|  | ::benchmark::RegisterBenchmark( | 
|  | name_right_product_cuda.c_str(), | 
|  | ceres::internal::JacobianRightMultiplyAndAccumulateCuda, | 
|  | data, | 
|  | &context) | 
|  | ->Arg(1); | 
|  | #endif | 
|  |  | 
|  | const std::string name_left_product = | 
|  | "JacobianLeftMultiplyAndAccumulate<" + path + ">"; | 
|  | ::benchmark::RegisterBenchmark( | 
|  | name_left_product.c_str(), | 
|  | ceres::internal::JacobianLeftMultiplyAndAccumulate, | 
|  | data, | 
|  | &context) | 
|  | ->Arg(1); | 
|  |  | 
|  | #ifndef CERES_NO_CUDA | 
|  | const std::string name_left_product_cuda = | 
|  | "JacobianLeftMultiplyAndAccumulateCuda<" + path + ">"; | 
|  | ::benchmark::RegisterBenchmark( | 
|  | name_right_product_cuda.c_str(), | 
|  | ceres::internal::JacobianLeftMultiplyAndAccumulateCuda, | 
|  | data, | 
|  | &context) | 
|  | ->Arg(1); | 
|  | #endif | 
|  | } | 
|  | ::benchmark::RunSpecifiedBenchmarks(); | 
|  |  | 
|  | using namespace ::benchmark; | 
|  | using namespace benchmark_shutdown_fallback; | 
|  | Shutdown(); | 
|  | return 0; | 
|  | } |