internal/ceres/cuda_block_sparse_crs_view_test.cc - ceres-solver - Git at Google

 // Ceres Solver - A fast non-linear least squares minimizer
 // Copyright 2023 Google Inc. All rights reserved.
 // http://ceres-solver.org/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 //   this list of conditions and the following disclaimer.
 // * Redistributions in binary form must reproduce the above copyright notice,
 //   this list of conditions and the following disclaimer in the documentation
 //   and/or other materials provided with the distribution.
 // * Neither the name of Google Inc. nor the names of its contributors may be
 //   used to endorse or promote products derived from this software without
 //   specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)

 #include "ceres/cuda_block_sparse_crs_view.h"

 #include <glog/logging.h>
 #include <gtest/gtest.h>

 #include <numeric>

 #ifndef CERES_NO_CUDA

 namespace ceres::internal {
 class CudaBlockSparseCRSViewTest : public ::testing::Test {
  protected:
   void SetUp() final {
     std::string message;
     CHECK(context_.InitCuda(&message))
         << "InitCuda() failed because: " << message;

     BlockSparseMatrix::RandomMatrixOptions options;
     options.num_row_blocks = 1234;
     options.min_row_block_size = 1;
     options.max_row_block_size = 10;
     options.num_col_blocks = 567;
     options.min_col_block_size = 1;
     options.max_col_block_size = 10;
     options.block_density = 0.2;
     std::mt19937 rng;

     // Block-sparse matrix with order of values different from CRS
     block_sparse_non_crs_compatible_ =
         BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
     std::iota(block_sparse_non_crs_compatible_->mutable_values(),
               block_sparse_non_crs_compatible_->mutable_values() +
                   block_sparse_non_crs_compatible_->num_nonzeros(),
               1);

     options.max_row_block_size = 1;
     // Block-sparse matrix with CRS order of values (row-blocks are rows)
     block_sparse_crs_compatible_rows_ =
         BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
     std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
               block_sparse_crs_compatible_rows_->mutable_values() +
                   block_sparse_crs_compatible_rows_->num_nonzeros(),
               1);
     // Block-sparse matrix with CRS order of values (single cell per row-block)
     auto bs = std::make_unique<CompressedRowBlockStructure>(
         *block_sparse_non_crs_compatible_->block_structure());

     int num_nonzeros = 0;
     for (auto& r : bs->rows) {
       const int num_cells = r.cells.size();
       if (num_cells > 1) {
         std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
         const int selected_cell = uniform_cell(rng);
         std::swap(r.cells[0], r.cells[selected_cell]);
         r.cells.resize(1);
       }
       const int row_block_size = r.block.size;
       for (auto& c : r.cells) {
         c.position = num_nonzeros;
         const int col_block_size = bs->cols[c.block_id].size;
         num_nonzeros += col_block_size * row_block_size;
       }
     }
     block_sparse_crs_compatible_single_cell_ =
         std::make_unique<BlockSparseMatrix>(bs.release());
     std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
               block_sparse_crs_compatible_single_cell_->mutable_values() +
                   block_sparse_crs_compatible_single_cell_->num_nonzeros(),
               1);
   }

   void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
     ASSERT_EQ(csm.num_cols(), bsm.num_cols());
     ASSERT_EQ(csm.num_rows(), bsm.num_rows());
     ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
     const int num_rows = bsm.num_rows();
     const int num_cols = bsm.num_cols();
     Vector x(num_cols);
     Vector y(num_rows);
     CudaVector x_cuda(&context_, num_cols);
     CudaVector y_cuda(&context_, num_rows);
     Vector y_cuda_host(num_rows);

     for (int i = 0; i < num_cols; ++i) {
       x.setZero();
       y.setZero();
       y_cuda.SetZero();
       x[i] = 1.;
       x_cuda.CopyFromCpu(x);
       csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
       bsm.RightMultiplyAndAccumulate(
           x.data(), y.data(), &context_, std::thread::hardware_concurrency());
       y_cuda.CopyTo(&y_cuda_host);
       // There will be up to 1 non-zero product per row, thus we expect an exact
       // match on 32-bit integer indices
       EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
     }
   }

   std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
   std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
   std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
   ContextImpl context_;
 };

 TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
   auto view =
       CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
   ASSERT_EQ(view.IsCrsCompatible(), false);

   auto matrix = view.crs_matrix();
   Compare(*block_sparse_non_crs_compatible_, *matrix);
 }

 TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
   auto view =
       CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
   ASSERT_EQ(view.IsCrsCompatible(), true);

   auto matrix = view.crs_matrix();
   Compare(*block_sparse_crs_compatible_rows_, *matrix);
 }

 TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
   auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
                                      &context_);
   ASSERT_EQ(view.IsCrsCompatible(), true);

   auto matrix = view.crs_matrix();
   Compare(*block_sparse_crs_compatible_single_cell_, *matrix);
 }
 }  // namespace ceres::internal

 #endif  // CERES_NO_CUDA
	// Ceres Solver - A fast non-linear least squares minimizer
	// Copyright 2023 Google Inc. All rights reserved.
	// http://ceres-solver.org/
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	// * Neither the name of Google Inc. nor the names of its contributors may be
	// used to endorse or promote products derived from this software without
	// specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.
	//
	// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)

	#include "ceres/cuda_block_sparse_crs_view.h"

	#include <glog/logging.h>
	#include <gtest/gtest.h>

	#include <numeric>

	#ifndef CERES_NO_CUDA

	namespace ceres::internal {
	class CudaBlockSparseCRSViewTest : public ::testing::Test {
	protected:
	void SetUp() final {
	std::string message;
	CHECK(context_.InitCuda(&message))
	<< "InitCuda() failed because: " << message;

	BlockSparseMatrix::RandomMatrixOptions options;
	options.num_row_blocks = 1234;
	options.min_row_block_size = 1;
	options.max_row_block_size = 10;
	options.num_col_blocks = 567;
	options.min_col_block_size = 1;
	options.max_col_block_size = 10;
	options.block_density = 0.2;
	std::mt19937 rng;

	// Block-sparse matrix with order of values different from CRS
	block_sparse_non_crs_compatible_ =
	BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
	std::iota(block_sparse_non_crs_compatible_->mutable_values(),
	block_sparse_non_crs_compatible_->mutable_values() +
	block_sparse_non_crs_compatible_->num_nonzeros(),
	1);

	options.max_row_block_size = 1;
	// Block-sparse matrix with CRS order of values (row-blocks are rows)
	block_sparse_crs_compatible_rows_ =
	BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
	std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
	block_sparse_crs_compatible_rows_->mutable_values() +
	block_sparse_crs_compatible_rows_->num_nonzeros(),
	1);
	// Block-sparse matrix with CRS order of values (single cell per row-block)
	auto bs = std::make_unique<CompressedRowBlockStructure>(
	*block_sparse_non_crs_compatible_->block_structure());

	int num_nonzeros = 0;
	for (auto& r : bs->rows) {
	const int num_cells = r.cells.size();
	if (num_cells > 1) {
	std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
	const int selected_cell = uniform_cell(rng);
	std::swap(r.cells[0], r.cells[selected_cell]);
	r.cells.resize(1);
	}
	const int row_block_size = r.block.size;
	for (auto& c : r.cells) {
	c.position = num_nonzeros;
	const int col_block_size = bs->cols[c.block_id].size;
	num_nonzeros += col_block_size * row_block_size;
	}
	}
	block_sparse_crs_compatible_single_cell_ =
	std::make_unique<BlockSparseMatrix>(bs.release());
	std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
	block_sparse_crs_compatible_single_cell_->mutable_values() +
	block_sparse_crs_compatible_single_cell_->num_nonzeros(),
	1);
	}

	void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
	ASSERT_EQ(csm.num_cols(), bsm.num_cols());
	ASSERT_EQ(csm.num_rows(), bsm.num_rows());
	ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
	const int num_rows = bsm.num_rows();
	const int num_cols = bsm.num_cols();
	Vector x(num_cols);
	Vector y(num_rows);
	CudaVector x_cuda(&context_, num_cols);
	CudaVector y_cuda(&context_, num_rows);
	Vector y_cuda_host(num_rows);

	for (int i = 0; i < num_cols; ++i) {
	x.setZero();
	y.setZero();
	y_cuda.SetZero();
	x[i] = 1.;
	x_cuda.CopyFromCpu(x);
	csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
	bsm.RightMultiplyAndAccumulate(
	x.data(), y.data(), &context_, std::thread::hardware_concurrency());
	y_cuda.CopyTo(&y_cuda_host);
	// There will be up to 1 non-zero product per row, thus we expect an exact
	// match on 32-bit integer indices
	EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
	}
	}

	std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
	std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
	std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
	ContextImpl context_;
	};

	TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
	auto view =
	CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
	ASSERT_EQ(view.IsCrsCompatible(), false);

	auto matrix = view.crs_matrix();
	Compare(block_sparse_non_crs_compatible_, matrix);
	}

	TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
	auto view =
	CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
	ASSERT_EQ(view.IsCrsCompatible(), true);

	auto matrix = view.crs_matrix();
	Compare(block_sparse_crs_compatible_rows_, matrix);
	}

	TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
	auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
	&context_);
	ASSERT_EQ(view.IsCrsCompatible(), true);

	auto matrix = view.crs_matrix();
	Compare(block_sparse_crs_compatible_single_cell_, matrix);
	}
	} // namespace ceres::internal

	#endif // CERES_NO_CUDA