Improvements to benchmarks 1. Add CreateFakeBundleAdjustmentJacobian to create bundle adjustment structured jacobians. 2. sparse_linear_operator_benchmark -> spmv_benchmark 3. Refactor spmv_benchmark to use CreateFakeBundleAdjustmentJacobian. 4. Add BA and unstructured Jacobian variants of spmv benchmarks 5. Add BA and unstructured Jacobian benchmarks for the block Jacobi preconditioner. 6. Fix BlockSparseMatrix::ToCompressedRowSparseMatrix to add the row and column block structure to the output. Change-Id: I737a3d7d82dad665b1f2e7886c2c3aedd702ffcd
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 947003d..cde3895 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -213,6 +213,7 @@ eigensparse.cc evaluation_callback.cc evaluator.cc + fake_bundle_adjustment_jacobian.cc file.cc first_order_function.cc float_suitesparse.cc @@ -591,9 +592,12 @@ add_executable(dense_linear_solver_benchmark dense_linear_solver_benchmark.cc) add_dependencies_to_benchmark(dense_linear_solver_benchmark) - add_executable(sparse_linear_operator_benchmark - sparse_linear_operator_benchmark.cc) - add_dependencies_to_benchmark(sparse_linear_operator_benchmark) + add_executable(spmv_benchmark spmv_benchmark.cc) + add_dependencies_to_benchmark(spmv_benchmark) + + add_executable(block_jacobi_preconditioner_benchmark + block_jacobi_preconditioner_benchmark.cc) + add_dependencies_to_benchmark(block_jacobi_preconditioner_benchmark) add_subdirectory(autodiff_benchmarks) endif (BUILD_BENCHMARKS)
diff --git a/internal/ceres/block_jacobi_preconditioner_benchmark.cc b/internal/ceres/block_jacobi_preconditioner_benchmark.cc new file mode 100644 index 0000000..90cab9e --- /dev/null +++ b/internal/ceres/block_jacobi_preconditioner_benchmark.cc
@@ -0,0 +1,135 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: sameeragarwal@google.com (Sameer Agarwal) + +#include <memory> + +#include "Eigen/Dense" +#include "benchmark/benchmark.h" +#include "ceres/block_jacobi_preconditioner.h" +#include "ceres/block_sparse_matrix.h" +#include "ceres/fake_bundle_adjustment_jacobian.h" +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" + +namespace ceres::internal { + +constexpr int kNumCameras = 1000; +constexpr int kNumPoints = 10000; +constexpr int kCameraSize = 6; +constexpr int kPointSize = 3; +constexpr double kVisibility = 0.1; + +constexpr int kNumRowBlocks = 100000; +constexpr int kNumColBlocks = 10000; +constexpr int kMinRowBlockSize = 1; +constexpr int kMaxRowBlockSize = 5; +constexpr int kMinColBlockSize = 1; +constexpr int kMaxColBlockSize = 15; +constexpr double kBlockDensity = 5.0 / kNumColBlocks; + +static void BM_BlockSparseJacobiPreconditionerBA(benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + BlockSparseJacobiPreconditioner p(*jacobian); + Vector d = Vector::Ones(jacobian->num_cols()); + for (auto _ : state) { + p.Update(*jacobian, d.data()); + } +} + +BENCHMARK(BM_BlockSparseJacobiPreconditionerBA); + +static void BM_BlockCRSJacobiPreconditionerBA(benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + BlockCRSJacobiPreconditioner p(jacobian_crs); + Vector d = Vector::Ones(jacobian_crs.num_cols()); + for (auto _ : state) { + p.Update(jacobian_crs, d.data()); + } +} + +BENCHMARK(BM_BlockCRSJacobiPreconditionerBA); + +static void BM_BlockSparseJacobiPreconditionerUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + BlockSparseJacobiPreconditioner p(*jacobian); + Vector d = Vector::Ones(jacobian->num_cols()); + for (auto _ : state) { + p.Update(*jacobian, d.data()); + } +} + +BENCHMARK(BM_BlockSparseJacobiPreconditionerUnstructured); + +static void BM_BlockCRSJacobiPreconditionerUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + BlockCRSJacobiPreconditioner p(jacobian_crs); + Vector d = Vector::Ones(jacobian_crs.num_cols()); + for (auto _ : state) { + p.Update(jacobian_crs, d.data()); + } +} +BENCHMARK(BM_BlockCRSJacobiPreconditionerUnstructured); + +} // namespace ceres::internal + +BENCHMARK_MAIN();
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc index 4202470..b1447f8 100644 --- a/internal/ceres/block_sparse_matrix.cc +++ b/internal/ceres/block_sparse_matrix.cc
@@ -170,9 +170,26 @@ void BlockSparseMatrix::ToCompressedRowSparseMatrix( CompressedRowSparseMatrix* crs_matrix) const { - TripletSparseMatrix ts_matrix; - this->ToTripletSparseMatrix(&ts_matrix); - *crs_matrix = *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix); + { + TripletSparseMatrix ts_matrix; + this->ToTripletSparseMatrix(&ts_matrix); + *crs_matrix = + *CompressedRowSparseMatrix::FromTripletSparseMatrix(ts_matrix); + } + + int num_row_blocks = block_structure_->rows.size(); + auto& row_blocks = *crs_matrix->mutable_row_blocks(); + row_blocks.resize(num_row_blocks); + for (int i = 0; i < num_row_blocks; ++i) { + row_blocks[i] = block_structure_->rows[i].block.size; + } + + int num_col_blocks = block_structure_->cols.size(); + auto& col_blocks = *crs_matrix->mutable_col_blocks(); + col_blocks.resize(num_col_blocks); + for (int i = 0; i < num_col_blocks; ++i) { + col_blocks[i] = block_structure_->cols[i].size; + } } void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
diff --git a/internal/ceres/fake_bundle_adjustment_jacobian.cc b/internal/ceres/fake_bundle_adjustment_jacobian.cc new file mode 100644 index 0000000..9e7d82b --- /dev/null +++ b/internal/ceres/fake_bundle_adjustment_jacobian.cc
@@ -0,0 +1,99 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/fake_bundle_adjustment_jacobian.h" + +#include <iostream> +#include <memory> +#include <random> +#include <string> + +#include "Eigen/Dense" +#include "ceres/block_sparse_matrix.h" +#include "ceres/internal/eigen.h" + +namespace ceres::internal { + +std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian( + int num_cameras, + int num_points, + int camera_size, + int point_size, + double visibility, + std::mt19937& prng) { + constexpr int kResidualSize = 2; + + CompressedRowBlockStructure* bs = new CompressedRowBlockStructure; + int c = 0; + // Add column blocks for each point + for (int i = 0; i < num_points; ++i) { + bs->cols.push_back(Block(point_size, c)); + c += point_size; + } + + // Add column blocks for each camera. + for (int i = 0; i < num_cameras; ++i) { + bs->cols.push_back(Block(camera_size, c)); + c += camera_size; + } + + std::bernoulli_distribution visibility_distribution(visibility); + int row_pos = 0; + int cell_pos = 0; + for (int i = 0; i < num_points; ++i) { + for (int j = 0; j < num_cameras; ++j) { + if (!visibility_distribution(prng)) { + continue; + } + bs->rows.emplace_back(); + auto& row = bs->rows.back(); + row.block.position = row_pos; + row.block.size = kResidualSize; + auto& cells = row.cells; + cells.resize(2); + + cells[0].block_id = i; + cells[0].position = cell_pos; + cell_pos += kResidualSize * point_size; + + cells[1].block_id = num_points + j; + cells[1].position = cell_pos; + cell_pos += kResidualSize * camera_size; + + row_pos += kResidualSize; + } + } + + auto jacobian = std::make_unique<BlockSparseMatrix>(bs); + VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom(); + return jacobian; +} + +} // namespace ceres::internal
diff --git a/internal/ceres/fake_bundle_adjustment_jacobian.h b/internal/ceres/fake_bundle_adjustment_jacobian.h new file mode 100644 index 0000000..9dacfaa --- /dev/null +++ b/internal/ceres/fake_bundle_adjustment_jacobian.h
@@ -0,0 +1,51 @@ + +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2015 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN +#define CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN + +#include <memory> +#include <random> + +#include "ceres/block_sparse_matrix.h" + +namespace ceres::internal { +std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian( + int num_cameras, + int num_points, + int camera_size, + int landmark_size, + double visibility, + std::mt19937& prng); + +} // namespace ceres::internal + +#endif // CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
diff --git a/internal/ceres/sparse_linear_operator_benchmark.cc b/internal/ceres/sparse_linear_operator_benchmark.cc deleted file mode 100644 index 4969c0c..0000000 --- a/internal/ceres/sparse_linear_operator_benchmark.cc +++ /dev/null
@@ -1,241 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas) - -#include <iostream> -#include <memory> -#include <random> -#include <string> - -#include "Eigen/Dense" -#include "benchmark/benchmark.h" -#include "ceres/block_sparse_matrix.h" -#include "ceres/context_impl.h" -#include "ceres/cuda_sparse_matrix.h" -#include "ceres/cuda_vector.h" -#include "ceres/internal/config.h" -#include "ceres/internal/eigen.h" -#include "ceres/linear_solver.h" -#include "gflags/gflags.h" - -#ifndef CERES_NO_CUDA -#include "cuda_runtime.h" -#endif - -namespace ceres::internal { - -// TODO(Joydeep Biswas): Add a matrix of benchmark sizes to test. - -namespace { -// Generate a synthetic BA-style Jacobian with n camera poses, m landmarks, n_d -// parameters per camera, m_d parameters per landmark, and k residuals per -// camera. -// TODO: Unify the synthetic Jacobian generation code with the code from -// schur_eliminator_benchmark.cc since they are very similar. -std::unique_ptr<BlockSparseMatrix> GenerateSyntheticJacobian( - int n, int m, int n_d, int m_d, int k) { - static const int kResidualSize = 2; - CompressedRowBlockStructure* bs = new CompressedRowBlockStructure; - int c = 0; - // Add column blocks for each camera. - for (int i = 0; i < n; ++i) { - bs->cols.push_back(Block(n_d, c)); - c += n_d; - } - // Add column blocks for each landmark. - for (int i = 0; i < m; ++i) { - bs->cols.push_back(Block(m_d, c)); - c += m_d; - } - // Total number of row blocks = k * n. - bs->rows.resize(k * n); - int values_offset = 0; - std::mt19937 prng; - std::uniform_real_distribution<double> uniform_0_m(0.0, - static_cast<double>(m)); - // Generate structure of the Jacobian. - // For n cameras: - for (int i = 0; i < n; ++i) { - const int camera_block_id = i; - // For k residuals per camera: - for (int j = 0; j < k; ++j) { - // Pick the landmark of the residual randomly from [0, m). - const int landmark_id = uniform_0_m(prng); - const int landmark_block_id = n + landmark_id; - const int row_idx = i * k + j; - const int row = kResidualSize * row_idx; - bs->rows[row_idx].block = Block(kResidualSize, row); - bs->rows[row_idx].cells.resize(2); - // The camera part of the jacobian of this residual. - bs->rows[row_idx].cells[0] = Cell(camera_block_id, values_offset); - values_offset += n_d * kResidualSize; - // The landmark part of the jacobian of this residual. - bs->rows[row_idx].cells[1] = Cell(landmark_block_id, values_offset); - values_offset += m_d * kResidualSize; - } - } - auto jacobian = std::make_unique<BlockSparseMatrix>(bs); - VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom(); - return jacobian; -} -} // namespace - -DEFINE_int32(num_cameras, 1000, "Number of cameras."); -DEFINE_int32(num_landmarks, 10000, "Number of landmarks."); -DEFINE_int32(num_parameters_per_camera, 6, "Number of parameters per camera."); -DEFINE_int32(num_parameters_per_landmark, - 3, - "Number of parameters per landmark."); -DEFINE_int32(num_residuals_per_camera, 100, "Number of residuals per camera."); - -static void BM_CpuRightMultiplyAndAccumulate(benchmark::State& state) { - // Perform setup here - std::unique_ptr<BlockSparseMatrix> jacobian = - GenerateSyntheticJacobian(FLAGS_num_cameras, - FLAGS_num_landmarks, - FLAGS_num_parameters_per_camera, - FLAGS_num_parameters_per_landmark, - FLAGS_num_residuals_per_camera); - Vector x(jacobian->num_cols()); - Vector y(jacobian->num_rows()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - // This code gets timed - jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} - -static void BM_CpuLeftMultiplyAndAccumulate(benchmark::State& state) { - // Perform setup here - std::unique_ptr<BlockSparseMatrix> jacobian = - GenerateSyntheticJacobian(FLAGS_num_cameras, - FLAGS_num_landmarks, - FLAGS_num_parameters_per_camera, - FLAGS_num_parameters_per_landmark, - FLAGS_num_residuals_per_camera); - Vector x(jacobian->num_rows()); - Vector y(jacobian->num_cols()); - x.setRandom(); - y.setRandom(); - double sum = 0; - for (auto _ : state) { - // This code gets timed - jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); - sum += y.norm(); - } - CHECK_NE(sum, 0.0); -} - -BENCHMARK(BM_CpuRightMultiplyAndAccumulate); -BENCHMARK(BM_CpuLeftMultiplyAndAccumulate); - -#ifndef CERES_NO_CUDA -static void BM_CudaRightMultiplyAndAccumulate(benchmark::State& state) { - // Perform setup here - std::unique_ptr<BlockSparseMatrix> jacobian = - GenerateSyntheticJacobian(FLAGS_num_cameras, - FLAGS_num_landmarks, - FLAGS_num_parameters_per_camera, - FLAGS_num_parameters_per_landmark, - FLAGS_num_residuals_per_camera); - ContextImpl context; - std::string message; - context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); - CudaVector cuda_x(&context, 0); - CudaVector cuda_y(&context, 0); - - Vector x(jacobian->num_cols()); - Vector y(jacobian->num_rows()); - x.setRandom(); - y.setRandom(); - - cuda_x.CopyFromCpu(x); - cuda_y.CopyFromCpu(y); - double sum = 0; - for (auto _ : state) { - // This code gets timed - cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); - sum += cuda_y.Norm(); - CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); - } - CHECK_NE(sum, 0.0); -} - -static void BM_CudaLeftMultiplyAndAccumulate(benchmark::State& state) { - // Perform setup here - std::unique_ptr<BlockSparseMatrix> jacobian = - GenerateSyntheticJacobian(FLAGS_num_cameras, - FLAGS_num_landmarks, - FLAGS_num_parameters_per_camera, - FLAGS_num_parameters_per_landmark, - FLAGS_num_residuals_per_camera); - ContextImpl context; - std::string message; - context.InitCuda(&message); - CompressedRowSparseMatrix jacobian_crs( - jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); - jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); - CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); - CudaVector cuda_x(&context, 0); - CudaVector cuda_y(&context, 0); - - Vector x(jacobian->num_rows()); - Vector y(jacobian->num_cols()); - x.setRandom(); - y.setRandom(); - - cuda_x.CopyFromCpu(x); - cuda_y.CopyFromCpu(y); - double sum = 0; - for (auto _ : state) { - // This code gets timed - cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); - sum += cuda_y.Norm(); - CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); - } - CHECK_NE(sum, 0.0); -} - -BENCHMARK(BM_CudaRightMultiplyAndAccumulate); -BENCHMARK(BM_CudaLeftMultiplyAndAccumulate); -#endif - -BENCHMARK_MAIN(); - -} // namespace ceres::internal - -BENCHMARK_MAIN();
diff --git a/internal/ceres/spmv_benchmark.cc b/internal/ceres/spmv_benchmark.cc new file mode 100644 index 0000000..0f475be --- /dev/null +++ b/internal/ceres/spmv_benchmark.cc
@@ -0,0 +1,425 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2022 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include <memory> +#include <random> +#include <string> + +#include "Eigen/Dense" +#include "benchmark/benchmark.h" +#include "ceres/block_jacobi_preconditioner.h" +#include "ceres/block_sparse_matrix.h" +#include "ceres/context_impl.h" +#include "ceres/cuda_sparse_matrix.h" +#include "ceres/cuda_vector.h" +#include "ceres/fake_bundle_adjustment_jacobian.h" +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "ceres/linear_solver.h" + +#ifndef CERES_NO_CUDA +#include "cuda_runtime.h" +#endif + +namespace ceres::internal { + +constexpr int kNumCameras = 1000; +constexpr int kNumPoints = 10000; +constexpr int kCameraSize = 6; +constexpr int kPointSize = 3; +constexpr double kVisibility = 0.1; + +constexpr int kNumRowBlocks = 100000; +constexpr int kNumColBlocks = 10000; +constexpr int kMinRowBlockSize = 1; +constexpr int kMaxRowBlockSize = 5; +constexpr int kMinColBlockSize = 1; +constexpr int kMaxColBlockSize = 15; +constexpr double kBlockDensity = 5.0 / kNumColBlocks; + +static void BM_BlockSparseRightMultiplyAndAccumulateBA( + benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA); + +static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured); + +static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA); + +static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = 100000; + options.num_col_blocks = 10000; + options.min_row_block_size = 1; + options.min_col_block_size = 1; + options.max_row_block_size = 10; + options.max_col_block_size = 15; + options.block_density = 5.0 / options.num_col_blocks; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured); + +static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) { + std::mt19937 prng; + auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + + CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), + bsm_jacobian->num_cols(), + bsm_jacobian->num_nonzeros()); + bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + + Vector x(jacobian.num_cols()); + Vector y(jacobian.num_rows()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian.RightMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA); + +static void BM_CRSRightMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), + bsm_jacobian->num_cols(), + bsm_jacobian->num_nonzeros()); + bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + + Vector x(jacobian.num_cols()); + Vector y(jacobian.num_rows()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + jacobian.RightMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured); + +static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) { + std::mt19937 prng; + // Perform setup here + auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), + bsm_jacobian->num_cols(), + bsm_jacobian->num_nonzeros()); + bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + + Vector x(jacobian.num_rows()); + Vector y(jacobian.num_cols()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + // This code gets timed + jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA); + +static void BM_CRSLeftMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), + bsm_jacobian->num_cols(), + bsm_jacobian->num_nonzeros()); + bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); + + Vector x(jacobian.num_rows()); + Vector y(jacobian.num_cols()); + x.setRandom(); + y.setRandom(); + double sum = 0; + for (auto _ : state) { + // This code gets timed + jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); + sum += y.norm(); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured); + +#ifndef CERES_NO_CUDA +static void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + ContextImpl context; + std::string message; + context.InitCuda(&message); + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + CudaVector cuda_x(&context, 0); + CudaVector cuda_y(&context, 0); + + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); + x.setRandom(); + y.setRandom(); + + cuda_x.CopyFromCpu(x); + cuda_y.CopyFromCpu(y); + double sum = 0; + for (auto _ : state) { + cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); + sum += cuda_y.Norm(); + CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA); + +static void BM_CudaRightMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + ContextImpl context; + std::string message; + context.InitCuda(&message); + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + CudaVector cuda_x(&context, 0); + CudaVector cuda_y(&context, 0); + + Vector x(jacobian->num_cols()); + Vector y(jacobian->num_rows()); + x.setRandom(); + y.setRandom(); + + cuda_x.CopyFromCpu(x); + cuda_y.CopyFromCpu(y); + double sum = 0; + for (auto _ : state) { + cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); + sum += cuda_y.Norm(); + CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured); + +static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) { + std::mt19937 prng; + auto jacobian = CreateFakeBundleAdjustmentJacobian( + kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); + ContextImpl context; + std::string message; + context.InitCuda(&message); + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + CudaVector cuda_x(&context, 0); + CudaVector cuda_y(&context, 0); + + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); + x.setRandom(); + y.setRandom(); + + cuda_x.CopyFromCpu(x); + cuda_y.CopyFromCpu(y); + double sum = 0; + for (auto _ : state) { + cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); + sum += cuda_y.Norm(); + CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA); + +static void BM_CudaLeftMultiplyAndAccumulateUnstructured( + benchmark::State& state) { + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = kNumRowBlocks; + options.num_col_blocks = kNumColBlocks; + options.min_row_block_size = kMinRowBlockSize; + options.min_col_block_size = kMinColBlockSize; + options.max_row_block_size = kMaxRowBlockSize; + options.max_col_block_size = kMaxColBlockSize; + options.block_density = kBlockDensity; + std::mt19937 prng; + + auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); + ContextImpl context; + std::string message; + context.InitCuda(&message); + CompressedRowSparseMatrix jacobian_crs( + jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); + jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); + CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); + CudaVector cuda_x(&context, 0); + CudaVector cuda_y(&context, 0); + + Vector x(jacobian->num_rows()); + Vector y(jacobian->num_cols()); + x.setRandom(); + y.setRandom(); + + cuda_x.CopyFromCpu(x); + cuda_y.CopyFromCpu(y); + double sum = 0; + for (auto _ : state) { + cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); + sum += cuda_y.Norm(); + CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); + } + CHECK_NE(sum, 0.0); +} + +BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA); + +#endif + +} // namespace ceres::internal + +BENCHMARK_MAIN();