blob: 6a4efa706648ed23c8cf0de900b01b7f74491884 [file] [log] [blame]
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include <memory>
#include <random>
#include <string>
#include "Eigen/Dense"
#include "benchmark/benchmark.h"
#include "ceres/block_jacobi_preconditioner.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/context_impl.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_vector.h"
#include "ceres/fake_bundle_adjustment_jacobian.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#ifndef CERES_NO_CUDA
#include "cuda_runtime.h"
#endif
namespace ceres::internal {
constexpr int kNumCameras = 1000;
constexpr int kNumPoints = 10000;
constexpr int kCameraSize = 6;
constexpr int kPointSize = 3;
constexpr double kVisibility = 0.1;
constexpr int kNumRowBlocks = 100000;
constexpr int kNumColBlocks = 10000;
constexpr int kMinRowBlockSize = 1;
constexpr int kMaxRowBlockSize = 5;
constexpr int kMinColBlockSize = 1;
constexpr int kMaxColBlockSize = 15;
constexpr double kBlockDensity = 5.0 / kNumColBlocks;
static void BM_BlockSparseRightMultiplyAndAccumulateBA(
benchmark::State& state) {
const int num_threads = static_cast<int>(state.range(0));
std::mt19937 prng;
auto jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
ContextImpl context;
context.EnsureMinimumThreads(num_threads);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulate(
x.data(), y.data(), &context, num_threads);
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
const int num_threads = static_cast<int>(state.range(0));
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = kNumRowBlocks;
options.num_col_blocks = kNumColBlocks;
options.min_row_block_size = kMinRowBlockSize;
options.min_col_block_size = kMinColBlockSize;
options.max_row_block_size = kMaxRowBlockSize;
options.max_col_block_size = kMaxColBlockSize;
options.block_density = kBlockDensity;
std::mt19937 prng;
auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
ContextImpl context;
context.EnsureMinimumThreads(num_threads);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulate(
x.data(), y.data(), &context, num_threads);
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) {
std::mt19937 prng;
auto jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA);
static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = 100000;
options.num_col_blocks = 10000;
options.min_row_block_size = 1;
options.min_col_block_size = 1;
options.max_row_block_size = 10;
options.max_col_block_size = 15;
options.block_density = 5.0 / options.num_col_blocks;
std::mt19937 prng;
auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured);
static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) {
const int num_threads = static_cast<int>(state.range(0));
std::mt19937 prng;
auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
ContextImpl context;
context.EnsureMinimumThreads(num_threads);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulate(
x.data(), y.data(), &context, num_threads);
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
static void BM_CRSRightMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
const int num_threads = static_cast<int>(state.range(0));
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = kNumRowBlocks;
options.num_col_blocks = kNumColBlocks;
options.min_row_block_size = kMinRowBlockSize;
options.min_col_block_size = kMinColBlockSize;
options.max_row_block_size = kMaxRowBlockSize;
options.max_col_block_size = kMaxColBlockSize;
options.block_density = kBlockDensity;
std::mt19937 prng;
auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
ContextImpl context;
context.EnsureMinimumThreads(num_threads);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
jacobian->RightMultiplyAndAccumulate(
x.data(), y.data(), &context, num_threads);
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured)
->Arg(1)
->Arg(2)
->Arg(4)
->Arg(8)
->Arg(16);
static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) {
std::mt19937 prng;
// Perform setup here
auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
// This code gets timed
jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA);
static void BM_CRSLeftMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = kNumRowBlocks;
options.num_col_blocks = kNumColBlocks;
options.min_row_block_size = kMinRowBlockSize;
options.min_col_block_size = kMinColBlockSize;
options.max_row_block_size = kMaxRowBlockSize;
options.max_col_block_size = kMaxColBlockSize;
options.block_density = kBlockDensity;
std::mt19937 prng;
auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
double sum = 0;
for (auto _ : state) {
// This code gets timed
jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
sum += y.norm();
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured);
#ifndef CERES_NO_CUDA
static void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) {
std::mt19937 prng;
auto jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
ContextImpl context;
std::string message;
context.InitCuda(&message);
auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
CudaVector cuda_x(&context, 0);
CudaVector cuda_y(&context, 0);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA);
static void BM_CudaRightMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = kNumRowBlocks;
options.num_col_blocks = kNumColBlocks;
options.min_row_block_size = kMinRowBlockSize;
options.min_col_block_size = kMinColBlockSize;
options.max_row_block_size = kMaxRowBlockSize;
options.max_col_block_size = kMaxColBlockSize;
options.block_density = kBlockDensity;
std::mt19937 prng;
auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
ContextImpl context;
std::string message;
context.InitCuda(&message);
auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
CudaVector cuda_x(&context, 0);
CudaVector cuda_y(&context, 0);
Vector x(jacobian->num_cols());
Vector y(jacobian->num_rows());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured);
static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) {
std::mt19937 prng;
auto jacobian = CreateFakeBundleAdjustmentJacobian(
kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
ContextImpl context;
std::string message;
context.InitCuda(&message);
auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
CudaVector cuda_x(&context, 0);
CudaVector cuda_y(&context, 0);
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA);
static void BM_CudaLeftMultiplyAndAccumulateUnstructured(
benchmark::State& state) {
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = kNumRowBlocks;
options.num_col_blocks = kNumColBlocks;
options.min_row_block_size = kMinRowBlockSize;
options.min_col_block_size = kMinColBlockSize;
options.max_row_block_size = kMaxRowBlockSize;
options.max_col_block_size = kMaxColBlockSize;
options.block_density = kBlockDensity;
std::mt19937 prng;
auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
ContextImpl context;
std::string message;
context.InitCuda(&message);
auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
CudaVector cuda_x(&context, 0);
CudaVector cuda_y(&context, 0);
Vector x(jacobian->num_rows());
Vector y(jacobian->num_cols());
x.setRandom();
y.setRandom();
cuda_x.CopyFromCpu(x);
cuda_y.CopyFromCpu(y);
double sum = 0;
for (auto _ : state) {
cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
sum += cuda_y.Norm();
CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
}
CHECK_NE(sum, 0.0);
}
BENCHMARK(BM_CudaLeftMultiplyAndAccumulateUnstructured);
#endif
} // namespace ceres::internal
BENCHMARK_MAIN();