| // Ceres Solver - A fast non-linear least squares minimizer | 
 | // Copyright 2022 Google Inc. All rights reserved. | 
 | // http://ceres-solver.org/ | 
 | // | 
 | // Redistribution and use in source and binary forms, with or without | 
 | // modification, are permitted provided that the following conditions are met: | 
 | // | 
 | // * Redistributions of source code must retain the above copyright notice, | 
 | //   this list of conditions and the following disclaimer. | 
 | // * Redistributions in binary form must reproduce the above copyright notice, | 
 | //   this list of conditions and the following disclaimer in the documentation | 
 | //   and/or other materials provided with the distribution. | 
 | // * Neither the name of Google Inc. nor the names of its contributors may be | 
 | //   used to endorse or promote products derived from this software without | 
 | //   specific prior written permission. | 
 | // | 
 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 | // POSSIBILITY OF SUCH DAMAGE. | 
 | // | 
 | // Authors: joydeepb@cs.utexas.edu (Joydeep Biswas) | 
 |  | 
 | #include <memory> | 
 | #include <random> | 
 | #include <string> | 
 |  | 
 | #include "Eigen/Dense" | 
 | #include "benchmark/benchmark.h" | 
 | #include "ceres/block_jacobi_preconditioner.h" | 
 | #include "ceres/block_sparse_matrix.h" | 
 | #include "ceres/context_impl.h" | 
 | #include "ceres/cuda_sparse_matrix.h" | 
 | #include "ceres/cuda_vector.h" | 
 | #include "ceres/fake_bundle_adjustment_jacobian.h" | 
 | #include "ceres/internal/config.h" | 
 | #include "ceres/internal/eigen.h" | 
 | #include "ceres/linear_solver.h" | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 | #include "cuda_runtime.h" | 
 | #endif | 
 |  | 
 | namespace ceres::internal { | 
 |  | 
 | constexpr int kNumCameras = 1000; | 
 | constexpr int kNumPoints = 10000; | 
 | constexpr int kCameraSize = 6; | 
 | constexpr int kPointSize = 3; | 
 | constexpr double kVisibility = 0.1; | 
 |  | 
 | constexpr int kNumRowBlocks = 100000; | 
 | constexpr int kNumColBlocks = 10000; | 
 | constexpr int kMinRowBlockSize = 1; | 
 | constexpr int kMaxRowBlockSize = 5; | 
 | constexpr int kMinColBlockSize = 1; | 
 | constexpr int kMaxColBlockSize = 15; | 
 | constexpr double kBlockDensity = 5.0 / kNumColBlocks; | 
 |  | 
 | static void BM_BlockSparseRightMultiplyAndAccumulateBA( | 
 |     benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   auto jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |  | 
 |   Vector x(jacobian->num_cols()); | 
 |   Vector y(jacobian->num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = kNumRowBlocks; | 
 |   options.num_col_blocks = kNumColBlocks; | 
 |   options.min_row_block_size = kMinRowBlockSize; | 
 |   options.min_col_block_size = kMinColBlockSize; | 
 |   options.max_row_block_size = kMaxRowBlockSize; | 
 |   options.max_col_block_size = kMaxColBlockSize; | 
 |   options.block_density = kBlockDensity; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |  | 
 |   Vector x(jacobian->num_cols()); | 
 |   Vector y(jacobian->num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian->RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured); | 
 |  | 
 | static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   auto jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |   Vector x(jacobian->num_rows()); | 
 |   Vector y(jacobian->num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = 100000; | 
 |   options.num_col_blocks = 10000; | 
 |   options.min_row_block_size = 1; | 
 |   options.min_col_block_size = 1; | 
 |   options.max_row_block_size = 10; | 
 |   options.max_col_block_size = 15; | 
 |   options.block_density = 5.0 / options.num_col_blocks; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |   Vector x(jacobian->num_rows()); | 
 |   Vector y(jacobian->num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian->LeftMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured); | 
 |  | 
 | static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |  | 
 |   CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), | 
 |                                      bsm_jacobian->num_cols(), | 
 |                                      bsm_jacobian->num_nonzeros()); | 
 |   bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); | 
 |  | 
 |   Vector x(jacobian.num_cols()); | 
 |   Vector y(jacobian.num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian.RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_CRSRightMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = kNumRowBlocks; | 
 |   options.num_col_blocks = kNumColBlocks; | 
 |   options.min_row_block_size = kMinRowBlockSize; | 
 |   options.min_col_block_size = kMinColBlockSize; | 
 |   options.max_row_block_size = kMaxRowBlockSize; | 
 |   options.max_col_block_size = kMaxColBlockSize; | 
 |   options.block_density = kBlockDensity; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |   CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), | 
 |                                      bsm_jacobian->num_cols(), | 
 |                                      bsm_jacobian->num_nonzeros()); | 
 |   bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); | 
 |  | 
 |   Vector x(jacobian.num_cols()); | 
 |   Vector y(jacobian.num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     jacobian.RightMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured); | 
 |  | 
 | static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   // Perform setup here | 
 |   auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |   CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), | 
 |                                      bsm_jacobian->num_cols(), | 
 |                                      bsm_jacobian->num_nonzeros()); | 
 |   bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); | 
 |  | 
 |   Vector x(jacobian.num_rows()); | 
 |   Vector y(jacobian.num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     // This code gets timed | 
 |     jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_CRSLeftMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = kNumRowBlocks; | 
 |   options.num_col_blocks = kNumColBlocks; | 
 |   options.min_row_block_size = kMinRowBlockSize; | 
 |   options.min_col_block_size = kMinColBlockSize; | 
 |   options.max_row_block_size = kMaxRowBlockSize; | 
 |   options.max_col_block_size = kMaxColBlockSize; | 
 |   options.block_density = kBlockDensity; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |   CompressedRowSparseMatrix jacobian(bsm_jacobian->num_rows(), | 
 |                                      bsm_jacobian->num_cols(), | 
 |                                      bsm_jacobian->num_nonzeros()); | 
 |   bsm_jacobian->ToCompressedRowSparseMatrix(&jacobian); | 
 |  | 
 |   Vector x(jacobian.num_rows()); | 
 |   Vector y(jacobian.num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     // This code gets timed | 
 |     jacobian.LeftMultiplyAndAccumulate(x.data(), y.data()); | 
 |     sum += y.norm(); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured); | 
 |  | 
 | #ifndef CERES_NO_CUDA | 
 | static void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   auto jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |   ContextImpl context; | 
 |   std::string message; | 
 |   context.InitCuda(&message); | 
 |   CompressedRowSparseMatrix jacobian_crs( | 
 |       jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); | 
 |   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); | 
 |   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); | 
 |   CudaVector cuda_x(&context, 0); | 
 |   CudaVector cuda_y(&context, 0); | 
 |  | 
 |   Vector x(jacobian->num_cols()); | 
 |   Vector y(jacobian->num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_CudaRightMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = kNumRowBlocks; | 
 |   options.num_col_blocks = kNumColBlocks; | 
 |   options.min_row_block_size = kMinRowBlockSize; | 
 |   options.min_col_block_size = kMinColBlockSize; | 
 |   options.max_row_block_size = kMaxRowBlockSize; | 
 |   options.max_col_block_size = kMaxColBlockSize; | 
 |   options.block_density = kBlockDensity; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |   ContextImpl context; | 
 |   std::string message; | 
 |   context.InitCuda(&message); | 
 |   CompressedRowSparseMatrix jacobian_crs( | 
 |       jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); | 
 |   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); | 
 |   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); | 
 |   CudaVector cuda_x(&context, 0); | 
 |   CudaVector cuda_y(&context, 0); | 
 |  | 
 |   Vector x(jacobian->num_cols()); | 
 |   Vector y(jacobian->num_rows()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured); | 
 |  | 
 | static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) { | 
 |   std::mt19937 prng; | 
 |   auto jacobian = CreateFakeBundleAdjustmentJacobian( | 
 |       kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng); | 
 |   ContextImpl context; | 
 |   std::string message; | 
 |   context.InitCuda(&message); | 
 |   CompressedRowSparseMatrix jacobian_crs( | 
 |       jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); | 
 |   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); | 
 |   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); | 
 |   CudaVector cuda_x(&context, 0); | 
 |   CudaVector cuda_y(&context, 0); | 
 |  | 
 |   Vector x(jacobian->num_rows()); | 
 |   Vector y(jacobian->num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA); | 
 |  | 
 | static void BM_CudaLeftMultiplyAndAccumulateUnstructured( | 
 |     benchmark::State& state) { | 
 |   BlockSparseMatrix::RandomMatrixOptions options; | 
 |   options.num_row_blocks = kNumRowBlocks; | 
 |   options.num_col_blocks = kNumColBlocks; | 
 |   options.min_row_block_size = kMinRowBlockSize; | 
 |   options.min_col_block_size = kMinColBlockSize; | 
 |   options.max_row_block_size = kMaxRowBlockSize; | 
 |   options.max_col_block_size = kMaxColBlockSize; | 
 |   options.block_density = kBlockDensity; | 
 |   std::mt19937 prng; | 
 |  | 
 |   auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng); | 
 |   ContextImpl context; | 
 |   std::string message; | 
 |   context.InitCuda(&message); | 
 |   CompressedRowSparseMatrix jacobian_crs( | 
 |       jacobian->num_rows(), jacobian->num_cols(), jacobian->num_nonzeros()); | 
 |   jacobian->ToCompressedRowSparseMatrix(&jacobian_crs); | 
 |   CudaSparseMatrix cuda_jacobian(&context, jacobian_crs); | 
 |   CudaVector cuda_x(&context, 0); | 
 |   CudaVector cuda_y(&context, 0); | 
 |  | 
 |   Vector x(jacobian->num_rows()); | 
 |   Vector y(jacobian->num_cols()); | 
 |   x.setRandom(); | 
 |   y.setRandom(); | 
 |  | 
 |   cuda_x.CopyFromCpu(x); | 
 |   cuda_y.CopyFromCpu(y); | 
 |   double sum = 0; | 
 |   for (auto _ : state) { | 
 |     cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y); | 
 |     sum += cuda_y.Norm(); | 
 |     CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess); | 
 |   } | 
 |   CHECK_NE(sum, 0.0); | 
 | } | 
 |  | 
 | BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA); | 
 |  | 
 | #endif | 
 |  | 
 | }  // namespace ceres::internal | 
 |  | 
 | BENCHMARK_MAIN(); |