| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2018 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Authors: sameeragarwal@google.com (Sameer Agarwal) |
| |
| #include <iostream> |
| #include "Eigen/Dense" |
| #include "benchmark/benchmark.h" |
| #include "ceres/small_blas.h" |
| |
| namespace ceres { |
| |
| // Benchmarking matrix-vector multiply routines and optimizing memory |
| // access requires that we make sure that they are not just sitting in |
| // the cache. So, as the benchmarking routine iterates, we need to |
| // multiply new/different matrice and vectors. Allocating/creating |
| // these objects in the benchmarking loop is too heavy duty, so we |
| // create them before hand and cycle through them in the |
| // benchmark. This class, given the size of the matrix creates such |
| // matrix and vector objects for use in the benchmark. |
| class MatrixVectorMultiplyData { |
| public: |
| MatrixVectorMultiplyData(int rows, int cols) { |
| num_elements_ = 1000; |
| // A single memory buffer for all the matrices & vectors. |
| |
| size_t buffer_size = num_elements_ * (200); |
| data_.resize(buffer_size, 1.00000000000001); |
| |
| // Each element is three points, corresponding to the three |
| // elements of the expression c = A * b. |
| ptrs_.resize(3 * num_elements_, NULL); |
| double* p = &data_[0]; |
| for (int i = 0; i < num_elements_; ++i) { |
| // Matrix X. |
| ptrs_[3 * i] = p; |
| p += rows * cols; |
| // Vector b. |
| ptrs_[3 * i + 1] = p; |
| p += cols; |
| // Vector c. |
| ptrs_[3 * i + 2] = p; |
| p += rows; |
| } |
| } |
| |
| int num_elements() const { return num_elements_; } |
| double* data() { return &data_[0]; } |
| const std::vector<double*>& ptrs() const { return ptrs_; } |
| |
| private: |
| int num_elements_; |
| std::vector<double> data_; |
| std::vector<double*> ptrs_; |
| }; |
| |
| // Run on (8 X 2200 MHz CPU s) |
| // 2018-02-06 21:23:59 |
| // --------------------------------------------------------------------------- |
| // Benchmark Time CPU Iterations |
| // --------------------------------------------------------------------------- |
| // BM_MatrixVectorMultiplyDynamic/1/1 4 ns 4 ns 165611093 |
| // BM_MatrixVectorMultiplyDynamic/1/2 5 ns 5 ns 140648672 |
| // BM_MatrixVectorMultiplyDynamic/1/3 5 ns 5 ns 139414459 |
| // BM_MatrixVectorMultiplyDynamic/1/4 5 ns 5 ns 144247512 |
| // BM_MatrixVectorMultiplyDynamic/1/6 6 ns 6 ns 106639042 |
| // BM_MatrixVectorMultiplyDynamic/1/8 7 ns 7 ns 102367617 |
| // BM_MatrixVectorMultiplyDynamic/1/10 9 ns 9 ns 82419847 |
| // BM_MatrixVectorMultiplyDynamic/1/12 10 ns 10 ns 65129002 |
| // BM_MatrixVectorMultiplyDynamic/1/16 12 ns 12 ns 53500867 |
| // BM_MatrixVectorMultiplyDynamic/1/20 16 ns 16 ns 46067179 |
| // BM_MatrixVectorMultiplyDynamic/2/1 5 ns 5 ns 128880215 |
| // BM_MatrixVectorMultiplyDynamic/2/2 8 ns 8 ns 81938429 |
| // BM_MatrixVectorMultiplyDynamic/2/3 10 ns 10 ns 68807565 |
| // BM_MatrixVectorMultiplyDynamic/2/4 8 ns 8 ns 91833388 |
| // BM_MatrixVectorMultiplyDynamic/2/6 10 ns 10 ns 64031028 |
| // BM_MatrixVectorMultiplyDynamic/2/8 12 ns 12 ns 59788179 |
| // BM_MatrixVectorMultiplyDynamic/2/10 15 ns 15 ns 44737868 |
| // BM_MatrixVectorMultiplyDynamic/2/12 17 ns 17 ns 37423949 |
| // BM_MatrixVectorMultiplyDynamic/2/16 22 ns 22 ns 33470723 |
| // BM_MatrixVectorMultiplyDynamic/2/20 26 ns 26 ns 27076057 |
| // BM_MatrixVectorMultiplyDynamic/3/1 6 ns 6 ns 100932908 |
| // BM_MatrixVectorMultiplyDynamic/3/2 12 ns 12 ns 65591589 |
| // BM_MatrixVectorMultiplyDynamic/3/3 14 ns 14 ns 48182819 |
| // BM_MatrixVectorMultiplyDynamic/3/4 11 ns 11 ns 61770338 |
| // BM_MatrixVectorMultiplyDynamic/3/6 15 ns 15 ns 44712435 |
| // BM_MatrixVectorMultiplyDynamic/3/8 18 ns 18 ns 35177294 |
| // BM_MatrixVectorMultiplyDynamic/3/10 21 ns 21 ns 32164683 |
| // BM_MatrixVectorMultiplyDynamic/3/12 24 ns 24 ns 28222279 |
| // BM_MatrixVectorMultiplyDynamic/3/16 30 ns 30 ns 23050731 |
| // BM_MatrixVectorMultiplyDynamic/3/20 38 ns 38 ns 17832714 |
| // BM_MatrixVectorMultiplyDynamic/4/1 8 ns 8 ns 85763293 |
| // BM_MatrixVectorMultiplyDynamic/4/2 16 ns 16 ns 41959886 |
| // BM_MatrixVectorMultiplyDynamic/4/3 19 ns 19 ns 36674176 |
| // BM_MatrixVectorMultiplyDynamic/4/4 15 ns 15 ns 43561867 |
| // BM_MatrixVectorMultiplyDynamic/4/6 21 ns 21 ns 34278607 |
| // BM_MatrixVectorMultiplyDynamic/4/8 22 ns 22 ns 31484163 |
| // BM_MatrixVectorMultiplyDynamic/4/10 26 ns 26 ns 25605197 |
| // BM_MatrixVectorMultiplyDynamic/4/12 31 ns 31 ns 23380172 |
| // BM_MatrixVectorMultiplyDynamic/4/16 38 ns 38 ns 18054638 |
| // BM_MatrixVectorMultiplyDynamic/4/20 49 ns 49 ns 14771703 |
| void BM_MatrixVectorMultiplyDynamic(benchmark::State& state) { |
| const int rows = state.range(0); |
| const int cols = state.range(1); |
| MatrixVectorMultiplyData data(rows, cols); |
| const std::vector<double*> ptrs = data.ptrs(); |
| const int num_elements = data.num_elements(); |
| |
| int i = 0; |
| for (auto _ : state) { |
| double* a_ptr = ptrs[3 * i]; |
| double* b_ptr = ptrs[3 * i + 1]; |
| double* c_ptr = ptrs[3 * i + 2]; |
| internal::MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| a_ptr, rows, cols, b_ptr, c_ptr); |
| i = (i + 1) % num_elements; |
| } |
| } |
| |
| // Each ArgPair specifies a row and column size of the matrix. |
| BENCHMARK(BM_MatrixVectorMultiplyDynamic) |
| ->ArgPair(1, 1) |
| ->ArgPair(1, 2) |
| ->ArgPair(1, 3) |
| ->ArgPair(1, 4) |
| ->ArgPair(1, 6) |
| ->ArgPair(1, 8) |
| ->ArgPair(1, 10) |
| ->ArgPair(1, 12) |
| ->ArgPair(1, 16) |
| ->ArgPair(1, 20) |
| ->ArgPair(2, 1) |
| ->ArgPair(2, 2) |
| ->ArgPair(2, 3) |
| ->ArgPair(2, 4) |
| ->ArgPair(2, 6) |
| ->ArgPair(2, 8) |
| ->ArgPair(2, 10) |
| ->ArgPair(2, 12) |
| ->ArgPair(2, 16) |
| ->ArgPair(2, 20) |
| ->ArgPair(3, 1) |
| ->ArgPair(3, 2) |
| ->ArgPair(3, 3) |
| ->ArgPair(3, 4) |
| ->ArgPair(3, 6) |
| ->ArgPair(3, 8) |
| ->ArgPair(3, 10) |
| ->ArgPair(3, 12) |
| ->ArgPair(3, 16) |
| ->ArgPair(3, 20) |
| ->ArgPair(4, 1) |
| ->ArgPair(4, 2) |
| ->ArgPair(4, 3) |
| ->ArgPair(4, 4) |
| ->ArgPair(4, 6) |
| ->ArgPair(4, 8) |
| ->ArgPair(4, 10) |
| ->ArgPair(4, 12) |
| ->ArgPair(4, 16) |
| ->ArgPair(4, 20); |
| |
| // Run on (8 X 2200 MHz CPU s) |
| // 2018-02-06 21:18:17 |
| // ------------------------------------------------------------------------------------ |
| // Benchmark Time CPU Iterations |
| // ------------------------------------------------------------------------------------ |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/1 5 ns 5 ns 139356174 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/2 6 ns 6 ns 120800041 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/3 7 ns 7 ns 100267858 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/4 9 ns 9 ns 70778564 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/6 14 ns 14 ns 47748651 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/8 16 ns 16 ns 43903663 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/10 18 ns 18 ns 34838177 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/12 20 ns 20 ns 36138731 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/16 23 ns 23 ns 27063704 |
| // BM_MatrixTransposeVectorMultiplyDynamic/1/20 29 ns 29 ns 23400336 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/1 6 ns 6 ns 121572101 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/2 8 ns 8 ns 82896155 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/3 12 ns 12 ns 56705415 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/4 14 ns 14 ns 51241509 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/6 18 ns 18 ns 38377403 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/8 25 ns 25 ns 28560121 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/10 29 ns 29 ns 23608052 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/12 33 ns 33 ns 20668478 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/16 44 ns 44 ns 16335446 |
| // BM_MatrixTransposeVectorMultiplyDynamic/2/20 53 ns 53 ns 13462315 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/1 6 ns 6 ns 117031415 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/2 10 ns 10 ns 71040747 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/3 14 ns 14 ns 49453538 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/4 17 ns 17 ns 39161935 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/6 22 ns 22 ns 32118490 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/8 28 ns 28 ns 25295689 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/10 34 ns 34 ns 20900389 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/12 39 ns 39 ns 17934922 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/16 51 ns 51 ns 10000000 |
| // BM_MatrixTransposeVectorMultiplyDynamic/3/20 64 ns 64 ns 10594824 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/1 7 ns 7 ns 98903583 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/2 13 ns 13 ns 57301899 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/3 16 ns 16 ns 44622083 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/4 18 ns 18 ns 39645007 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/6 26 ns 26 ns 27239262 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/8 33 ns 33 ns 20869171 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/10 39 ns 39 ns 17169614 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/12 47 ns 47 ns 15045286 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/16 62 ns 62 ns 11437535 |
| // BM_MatrixTransposeVectorMultiplyDynamic/4/20 77 ns 77 ns 8351428 |
| void BM_MatrixTransposeVectorMultiplyDynamic(benchmark::State& state) { |
| const int rows = state.range(0); |
| const int cols = state.range(1); |
| MatrixVectorMultiplyData data(rows, cols); |
| const std::vector<double*> ptrs = data.ptrs(); |
| const int num_elements = data.num_elements(); |
| |
| int i = 0; |
| for (auto _ : state) { |
| double* a_ptr = ptrs[3 * i]; |
| double* b_ptr = ptrs[3 * i + 1]; |
| double* c_ptr = ptrs[3 * i + 2]; |
| internal::MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| a_ptr, rows, cols, c_ptr, b_ptr); |
| i = (i + 1) % num_elements; |
| } |
| } |
| |
| // Each ArgPair specifies a row and column size of the matrix. |
| BENCHMARK(BM_MatrixTransposeVectorMultiplyDynamic) |
| ->ArgPair(1, 1) |
| ->ArgPair(1, 2) |
| ->ArgPair(1, 3) |
| ->ArgPair(1, 4) |
| ->ArgPair(1, 6) |
| ->ArgPair(1, 8) |
| ->ArgPair(1, 10) |
| ->ArgPair(1, 12) |
| ->ArgPair(1, 16) |
| ->ArgPair(1, 20) |
| ->ArgPair(2, 1) |
| ->ArgPair(2, 2) |
| ->ArgPair(2, 3) |
| ->ArgPair(2, 4) |
| ->ArgPair(2, 6) |
| ->ArgPair(2, 8) |
| ->ArgPair(2, 10) |
| ->ArgPair(2, 12) |
| ->ArgPair(2, 16) |
| ->ArgPair(2, 20) |
| ->ArgPair(3, 1) |
| ->ArgPair(3, 2) |
| ->ArgPair(3, 3) |
| ->ArgPair(3, 4) |
| ->ArgPair(3, 6) |
| ->ArgPair(3, 8) |
| ->ArgPair(3, 10) |
| ->ArgPair(3, 12) |
| ->ArgPair(3, 16) |
| ->ArgPair(3, 20) |
| ->ArgPair(4, 1) |
| ->ArgPair(4, 2) |
| ->ArgPair(4, 3) |
| ->ArgPair(4, 4) |
| ->ArgPair(4, 6) |
| ->ArgPair(4, 8) |
| ->ArgPair(4, 10) |
| ->ArgPair(4, 12) |
| ->ArgPair(4, 16) |
| ->ArgPair(4, 20); |
| |
| } // namespace ceres |
| |
| BENCHMARK_MAIN(); |