blob: 9849b5a941c61b0a986b70198a7cb2e93b7178f3 [file] [log] [blame]
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/bundle_adjustment_test_util.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_vector.h"
#include "ceres/evaluator.h"
#include "ceres/partitioned_matrix_view.h"
#include "ceres/preprocessor.h"
#include "ceres/problem.h"
#include "ceres/problem_impl.h"
#include "ceres/program.h"
#include "ceres/sparse_matrix.h"
namespace ceres::internal {
template <typename Derived, typename Base>
std::unique_ptr<Derived> downcast_unique_ptr(std::unique_ptr<Base>& base) {
return std::unique_ptr<Derived>(dynamic_cast<Derived*>(base.release()));
}
// Benchmark library might invoke benchmark function multiple times.
// In order to save time required to parse BAL data, we ensure that
// each dataset is being loaded at most once.
// Each type of jacobians is also cached after first creation
struct BALData {
using PartitionedView = PartitionedMatrixView<2, 3, 9>;
explicit BALData(const std::string& path) {
bal_problem = std::make_unique<BundleAdjustmentProblem>(path);
CHECK(bal_problem != nullptr);
auto problem_impl = bal_problem->mutable_problem()->mutable_impl();
auto preprocessor = Preprocessor::Create(MinimizerType::TRUST_REGION);
preprocessed_problem = std::make_unique<PreprocessedProblem>();
Solver::Options options = bal_problem->options();
options.linear_solver_type = ITERATIVE_SCHUR;
CHECK(preprocessor->Preprocess(
options, problem_impl, preprocessed_problem.get()));
auto program = preprocessed_problem->reduced_program.get();
parameters.resize(program->NumParameters());
program->ParameterBlocksToStateVector(parameters.data());
}
std::unique_ptr<BlockSparseMatrix> CreateBlockSparseJacobian(
ContextImpl* context) {
auto problem = bal_problem->mutable_problem();
auto problem_impl = problem->mutable_impl();
CHECK(problem_impl != nullptr);
Evaluator::Options options;
options.linear_solver_type = ITERATIVE_SCHUR;
options.num_threads = 1;
options.context = context;
options.num_eliminate_blocks = bal_problem->num_points();
std::string error;
auto program = preprocessed_problem->reduced_program.get();
auto evaluator = Evaluator::Create(options, program, &error);
CHECK(evaluator != nullptr);
auto jacobian = evaluator->CreateJacobian();
auto block_sparse = downcast_unique_ptr<BlockSparseMatrix>(jacobian);
CHECK(block_sparse != nullptr);
std::mt19937 rng;
std::normal_distribution<double> rnorm;
const int nnz = block_sparse->num_nonzeros();
auto values = block_sparse->mutable_values();
for (int i = 0; i < nnz; ++i) {
values[i] = rnorm(rng);
}
return block_sparse;
}
std::unique_ptr<CompressedRowSparseMatrix> CreateCompressedRowSparseJacobian(
ContextImpl* context) {
auto block_sparse = BlockSparseJacobian(context);
auto crs_jacobian = std::make_unique<CompressedRowSparseMatrix>(
block_sparse->num_rows(),
block_sparse->num_cols(),
block_sparse->num_nonzeros());
block_sparse->ToCompressedRowSparseMatrix(crs_jacobian.get());
return crs_jacobian;
}
const BlockSparseMatrix* BlockSparseJacobian(ContextImpl* context) {
if (!block_sparse_jacobian) {
block_sparse_jacobian = CreateBlockSparseJacobian(context);
}
return block_sparse_jacobian.get();
}
const BlockSparseMatrix* BlockSparseJacobianWithTranspose(
ContextImpl* context) {
if (!block_sparse_jacobian_with_transpose) {
block_sparse_jacobian_with_transpose = CreateBlockSparseJacobian(context);
block_sparse_jacobian_with_transpose->AddTransposeBlockStructure();
}
return block_sparse_jacobian_with_transpose.get();
}
const CompressedRowSparseMatrix* CompressedRowSparseJacobian(
ContextImpl* context) {
if (!crs_jacobian) {
crs_jacobian = CreateCompressedRowSparseJacobian(context);
}
return crs_jacobian.get();
}
const PartitionedView* PartitionedMatrixViewJacobian(
const LinearSolver::Options& options) {
auto block_sparse = BlockSparseJacobian(options.context);
partitioned_view_jacobian =
std::make_unique<PartitionedView>(options, *block_sparse);
return partitioned_view_jacobian.get();
}
const PartitionedView* PartitionedMatrixViewJacobianWithTranspose(
const LinearSolver::Options& options) {
auto block_sparse_transpose =
BlockSparseJacobianWithTranspose(options.context);
partitioned_view_jacobian_with_transpose =
std::make_unique<PartitionedView>(options, *block_sparse_transpose);
return partitioned_view_jacobian_with_transpose.get();
}
Vector parameters;
std::unique_ptr<BundleAdjustmentProblem> bal_problem;
std::unique_ptr<PreprocessedProblem> preprocessed_problem;
std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian;
std::unique_ptr<BlockSparseMatrix> block_sparse_jacobian_with_transpose;
std::unique_ptr<CompressedRowSparseMatrix> crs_jacobian;
std::unique_ptr<PartitionedView> partitioned_view_jacobian;
std::unique_ptr<PartitionedView> partitioned_view_jacobian_with_transpose;
};
static void Residuals(benchmark::State& state,
BALData* data,
ContextImpl* context) {
const int num_threads = state.range(0);
Evaluator::Options options;
options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
options.num_threads = num_threads;
options.context = context;
options.num_eliminate_blocks = 0;
std::string error;
CHECK(data->preprocessed_problem != nullptr);
auto program = data->preprocessed_problem->reduced_program.get();
CHECK(program != nullptr);
auto evaluator = Evaluator::Create(options, program, &error);
CHECK(evaluator != nullptr);
double cost = 0.;
Vector residuals = Vector::Zero(program->NumResiduals());
Evaluator::EvaluateOptions eval_options;
for (auto _ : state) {
CHECK(evaluator->Evaluate(eval_options,
data->parameters.data(),
&cost,
residuals.data(),
nullptr,
nullptr));
}
}
static void ResidualsAndJacobian(benchmark::State& state,
BALData* data,
ContextImpl* context) {
const int num_threads = state.range(0);
Evaluator::Options options;
options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
options.num_threads = num_threads;
options.context = context;
options.num_eliminate_blocks = 0;
std::string error;
CHECK(data->preprocessed_problem != nullptr);
auto program = data->preprocessed_problem->reduced_program.get();
CHECK(program != nullptr);
auto evaluator = Evaluator::Create(options, program, &error);
CHECK(evaluator != nullptr);
double cost = 0.;
Vector residuals = Vector::Zero(program->NumResiduals());
auto jacobian = evaluator->CreateJacobian();
Evaluator::EvaluateOptions eval_options;
for (auto _ : state) {
CHECK(evaluator->Evaluate(eval_options,
data->parameters.data(),
&cost,
residuals.data(),
nullptr,
jacobian.get()));
}
}
static void PMVRightMultiplyAndAccumulateF(benchmark::State& state,
BALData* data,
ContextImpl* context) {
LinearSolver::Options options;
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
auto jacobian = data->PartitionedMatrixViewJacobian(options);
Vector y = Vector::Zero(jacobian->num_rows());
Vector x = Vector::Random(jacobian->num_cols_f());
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulateF(x.data(), y.data());
}
CHECK_GT(y.squaredNorm(), 0.);
}
static void PMVLeftMultiplyAndAccumulateF(benchmark::State& state,
BALData* data,
ContextImpl* context) {
LinearSolver::Options options;
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options);
Vector y = Vector::Zero(jacobian->num_cols_f());
Vector x = Vector::Random(jacobian->num_rows());
for (auto _ : state) {
jacobian->LeftMultiplyAndAccumulateF(x.data(), y.data());
}
CHECK_GT(y.squaredNorm(), 0.);
}
static void PMVRightMultiplyAndAccumulateE(benchmark::State& state,
BALData* data,
ContextImpl* context) {
LinearSolver::Options options;
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
auto jacobian = data->PartitionedMatrixViewJacobian(options);
Vector y = Vector::Zero(jacobian->num_rows());
Vector x = Vector::Random(jacobian->num_cols_e());
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulateE(x.data(), y.data());
}
CHECK_GT(y.squaredNorm(), 0.);
}
static void PMVLeftMultiplyAndAccumulateE(benchmark::State& state,
BALData* data,
ContextImpl* context) {
LinearSolver::Options options;
options.num_threads = state.range(0);
options.elimination_groups.push_back(data->bal_problem->num_points());
options.context = context;
auto jacobian = data->PartitionedMatrixViewJacobianWithTranspose(options);
Vector y = Vector::Zero(jacobian->num_cols_e());
Vector x = Vector::Random(jacobian->num_rows());
for (auto _ : state) {
jacobian->LeftMultiplyAndAccumulateE(x.data(), y.data());
}
CHECK_GT(y.squaredNorm(), 0.);
}
static void JacobianRightMultiplyAndAccumulate(benchmark::State& state,
BALData* data,
ContextImpl* context) {
const int num_threads = state.range(0);
auto jacobian = data->BlockSparseJacobian(context);
Vector y = Vector::Zero(jacobian->num_rows());
Vector x = Vector::Random(jacobian->num_cols());
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulate(
x.data(), y.data(), context, num_threads);
}
CHECK_GT(y.squaredNorm(), 0.);
}
static void JacobianLeftMultiplyAndAccumulate(benchmark::State& state,
BALData* data,
ContextImpl* context) {
const int num_threads = state.range(0);
auto jacobian = data->BlockSparseJacobianWithTranspose(context);
Vector y = Vector::Zero(jacobian->num_cols());
Vector x = Vector::Random(jacobian->num_rows());
for (auto _ : state) {
jacobian->LeftMultiplyAndAccumulate(
x.data(), y.data(), context, num_threads);
}
CHECK_GT(y.squaredNorm(), 0.);
}
#ifndef CERES_NO_CUDA
static void JacobianRightMultiplyAndAccumulateCuda(benchmark::State& state,
BALData* data,
ContextImpl* context) {
auto crs_jacobian = data->CompressedRowSparseJacobian(context);
CudaSparseMatrix cuda_jacobian(context, *crs_jacobian);
CudaVector cuda_x(context, 0);
CudaVector cuda_y(context, 0);
Vector x(crs_jacobian->num_cols());
Vector y(crs_jacobian->num_rows());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
static void JacobianLeftMultiplyAndAccumulateCuda(benchmark::State& state,
BALData* data,
ContextImpl* context) {
auto crs_jacobian = data->CompressedRowSparseJacobian(context);
CudaSparseMatrix cuda_jacobian(context, *crs_jacobian);
CudaVector cuda_x(context, 0);
CudaVector cuda_y(context, 0);
Vector x(crs_jacobian->num_rows());
Vector y(crs_jacobian->num_cols());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
#endif
} // namespace ceres::internal
// Older versions of benchmark library might come without ::benchmark::Shutdown
// function. We provide an empty fallback variant of Shutdown function in
// order to support both older and newer versions
namespace benchmark_shutdown_fallback {
template <typename... Args>
void Shutdown(Args... args) {}
}; // namespace benchmark_shutdown_fallback
int main(int argc, char** argv) {
::benchmark::Initialize(&argc, argv);
std::vector<std::unique_ptr<ceres::internal::BALData>> benchmark_data;
if (argc == 1) {
LOG(FATAL) << "No input datasets specified. Usage: " << argv[0]
<< " [benchmark flags] path_to_BAL_data_1.txt ... "
"path_to_BAL_data_N.txt";
return -1;
}
ceres::internal::ContextImpl context;
context.EnsureMinimumThreads(16);
#ifndef CERES_NO_CUDA
std::string message;
context.InitCuda(&message);
#endif
for (int i = 1; i < argc; ++i) {
const std::string path(argv[i]);
const std::string name_residuals = "Residuals<" + path + ">";
benchmark_data.emplace_back(
std::make_unique<ceres::internal::BALData>(path));
auto data = benchmark_data.back().get();
::benchmark::RegisterBenchmark(
name_residuals.c_str(), ceres::internal::Residuals, data, &context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_jacobians = "ResidualsAndJacobian<" + path + ">";
::benchmark::RegisterBenchmark(name_jacobians.c_str(),
ceres::internal::ResidualsAndJacobian,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_right_product =
"JacobianRightMultiplyAndAccumulate<" + path + ">";
::benchmark::RegisterBenchmark(
name_right_product.c_str(),
ceres::internal::JacobianRightMultiplyAndAccumulate,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_right_product_partitioned_f =
"PMVRightMultiplyAndAccumulateF<" + path + ">";
::benchmark::RegisterBenchmark(
name_right_product_partitioned_f.c_str(),
ceres::internal::PMVRightMultiplyAndAccumulateF,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_right_product_partitioned_e =
"PMVRightMultiplyAndAccumulateE<" + path + ">";
::benchmark::RegisterBenchmark(
name_right_product_partitioned_e.c_str(),
ceres::internal::PMVRightMultiplyAndAccumulateE,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
#ifndef CERES_NO_CUDA
const std::string name_right_product_cuda =
"JacobianRightMultiplyAndAccumulateCuda<" + path + ">";
::benchmark::RegisterBenchmark(
name_right_product_cuda.c_str(),
ceres::internal::JacobianRightMultiplyAndAccumulateCuda,
data,
&context)
->Arg(1);
#endif
const std::string name_left_product =
"JacobianLeftMultiplyAndAccumulate<" + path + ">";
::benchmark::RegisterBenchmark(
name_left_product.c_str(),
ceres::internal::JacobianLeftMultiplyAndAccumulate,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_left_product_partitioned_f =
"PMVLeftMultiplyAndAccumulateF<" + path + ">";
::benchmark::RegisterBenchmark(
name_left_product_partitioned_f.c_str(),
ceres::internal::PMVLeftMultiplyAndAccumulateF,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
const std::string name_left_product_partitioned_e =
"PMVLeftMultiplyAndAccumulateE<" + path + ">";
::benchmark::RegisterBenchmark(
name_left_product_partitioned_e.c_str(),
ceres::internal::PMVLeftMultiplyAndAccumulateE,
data,
&context)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
#ifndef CERES_NO_CUDA
const std::string name_left_product_cuda =
"JacobianLeftMultiplyAndAccumulateCuda<" + path + ">";
::benchmark::RegisterBenchmark(
name_left_product_cuda.c_str(),
ceres::internal::JacobianLeftMultiplyAndAccumulateCuda,
data,
&context)
->Arg(1);
#endif
}
::benchmark::RunSpecifiedBenchmarks();
using namespace ::benchmark;
using namespace benchmark_shutdown_fallback;
Shutdown();
return 0;
}