| // Ceres Solver - A fast non-linear least squares minimizer |
| // Copyright 2023 Google Inc. All rights reserved. |
| // http://ceres-solver.org/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of Google Inc. nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) |
| |
| #include "ceres/cuda_block_sparse_crs_view.h" |
| |
| #include <glog/logging.h> |
| #include <gtest/gtest.h> |
| |
| #include <numeric> |
| |
| #ifndef CERES_NO_CUDA |
| |
| namespace ceres::internal { |
| class CudaBlockSparseCRSViewTest : public ::testing::Test { |
| protected: |
| void SetUp() final { |
| std::string message; |
| CHECK(context_.InitCuda(&message)) |
| << "InitCuda() failed because: " << message; |
| |
| BlockSparseMatrix::RandomMatrixOptions options; |
| options.num_row_blocks = 1234; |
| options.min_row_block_size = 1; |
| options.max_row_block_size = 10; |
| options.num_col_blocks = 567; |
| options.min_col_block_size = 1; |
| options.max_col_block_size = 10; |
| options.block_density = 0.2; |
| std::mt19937 rng; |
| |
| // Block-sparse matrix with order of values different from CRS |
| block_sparse_non_crs_compatible_ = |
| BlockSparseMatrix::CreateRandomMatrix(options, rng, true); |
| std::iota(block_sparse_non_crs_compatible_->mutable_values(), |
| block_sparse_non_crs_compatible_->mutable_values() + |
| block_sparse_non_crs_compatible_->num_nonzeros(), |
| 1); |
| |
| options.max_row_block_size = 1; |
| // Block-sparse matrix with CRS order of values (row-blocks are rows) |
| block_sparse_crs_compatible_rows_ = |
| BlockSparseMatrix::CreateRandomMatrix(options, rng, true); |
| std::iota(block_sparse_crs_compatible_rows_->mutable_values(), |
| block_sparse_crs_compatible_rows_->mutable_values() + |
| block_sparse_crs_compatible_rows_->num_nonzeros(), |
| 1); |
| // Block-sparse matrix with CRS order of values (single cell per row-block) |
| auto bs = std::make_unique<CompressedRowBlockStructure>( |
| *block_sparse_non_crs_compatible_->block_structure()); |
| |
| int num_nonzeros = 0; |
| for (auto& r : bs->rows) { |
| const int num_cells = r.cells.size(); |
| if (num_cells > 1) { |
| std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1); |
| const int selected_cell = uniform_cell(rng); |
| std::swap(r.cells[0], r.cells[selected_cell]); |
| r.cells.resize(1); |
| } |
| const int row_block_size = r.block.size; |
| for (auto& c : r.cells) { |
| c.position = num_nonzeros; |
| const int col_block_size = bs->cols[c.block_id].size; |
| num_nonzeros += col_block_size * row_block_size; |
| } |
| } |
| block_sparse_crs_compatible_single_cell_ = |
| std::make_unique<BlockSparseMatrix>(bs.release()); |
| std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(), |
| block_sparse_crs_compatible_single_cell_->mutable_values() + |
| block_sparse_crs_compatible_single_cell_->num_nonzeros(), |
| 1); |
| } |
| |
| void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) { |
| ASSERT_EQ(csm.num_cols(), bsm.num_cols()); |
| ASSERT_EQ(csm.num_rows(), bsm.num_rows()); |
| ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros()); |
| const int num_rows = bsm.num_rows(); |
| const int num_cols = bsm.num_cols(); |
| Vector x(num_cols); |
| Vector y(num_rows); |
| CudaVector x_cuda(&context_, num_cols); |
| CudaVector y_cuda(&context_, num_rows); |
| Vector y_cuda_host(num_rows); |
| |
| for (int i = 0; i < num_cols; ++i) { |
| x.setZero(); |
| y.setZero(); |
| y_cuda.SetZero(); |
| x[i] = 1.; |
| x_cuda.CopyFromCpu(x); |
| csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda); |
| bsm.RightMultiplyAndAccumulate( |
| x.data(), y.data(), &context_, std::thread::hardware_concurrency()); |
| y_cuda.CopyTo(&y_cuda_host); |
| // There will be up to 1 non-zero product per row, thus we expect an exact |
| // match on 32-bit integer indices |
| EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.); |
| } |
| } |
| |
| std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_; |
| std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_; |
| std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_; |
| ContextImpl context_; |
| }; |
| |
| TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) { |
| auto view = |
| CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_); |
| ASSERT_EQ(view.IsCrsCompatible(), false); |
| |
| auto matrix = view.crs_matrix(); |
| Compare(*block_sparse_non_crs_compatible_, *matrix); |
| } |
| |
| TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) { |
| auto view = |
| CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_); |
| ASSERT_EQ(view.IsCrsCompatible(), true); |
| |
| auto matrix = view.crs_matrix(); |
| Compare(*block_sparse_crs_compatible_rows_, *matrix); |
| } |
| |
| TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) { |
| auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_, |
| &context_); |
| ASSERT_EQ(view.IsCrsCompatible(), true); |
| |
| auto matrix = view.crs_matrix(); |
| Compare(*block_sparse_crs_compatible_single_cell_, *matrix); |
| } |
| } // namespace ceres::internal |
| |
| #endif // CERES_NO_CUDA |