)]}'
{
  "commit": "bdee4d6172b940c9c2b46dda863baec0e6fc3308",
  "tree": "932a5daf02335c23d9487571bf5d5bc250f89abe",
  "parents": [
    "0f9de3daf44312bfad6d3a44c1a8793c64a28c3d"
  ],
  "author": {
    "name": "Dmitriy Korchemkin",
    "email": "dmitriy.korchemkin@gmail.com",
    "time": "Wed May 17 14:43:33 2023 +0300"
  },
  "committer": {
    "name": "Dmitriy Korchemkin",
    "email": "dmitriy.korchemkin@gmail.com",
    "time": "Fri May 26 01:12:47 2023 +0300"
  },
  "message": "Block-sparse to CRS conversion using block-structure\n\nInstead of pre-computing pemutation from block-sparse to CRS order,\nindex of value in CRS matrix is computed in the process of updating\nvalues using block-sparse structure.\n\nWhen it is possible to update values via a simple host-to-device copy,\nblock-sparse structure on GPU is discarded after computing CRS\nstructure.\n\nComputing index is significantly slower than using pre-computed\npermutation, but is still hidden by host-to-device transfer.\n\nOn problems from BAL dataset this results into reduction of extra\ngpu memory consumption from 33% (permutation stored as 32-bit indices)\nto ~10% for storing block-sparse structure.\n\nBenchmark results:\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d CUDA Device Properties \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nCuda version         : 11.8\nDevice ID            : 0\nDevice name          : NVIDIA GeForce RTX 2080 Ti\nTotal GPU memory     :  11012 MiB\nGPU memory available :  10852 MiB\nCompute capability   : 7.5\nWarp size            : 32\nMax threads per block: 1024\nMax threads per dim  : 1024 1024 64\nMax grid size        : 2147483647 65535 65535\nMultiprocessor count : 68\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nRunning ./bin/evaluation_benchmark\nRun on (112 X 3200 MHz CPU s)\nCPU Caches:\n  L1 Data 32 KiB (x56)\n  L1 Instruction 32 KiB (x56)\n  L2 Unified 1024 KiB (x56)\n  L3 Unified 39424 KiB (x2)\nLoad Average: 24.58, 11.75, 8.52\n\n-----------------------------------------------------------------------\nBenchmark                                                          Time\n-----------------------------------------------------------------------\nUsing on-the-fly computation of CRS index corresponding to block-sparse\nindex:\n\nJacobianToCRS\u003cg/final/problem-4585-1324582-pre.txt\u003e             1607 ms\nJacobianToCRSView\u003cg/final/problem-4585-1324582-pre.txt\u003e          564 ms\nJacobianToCRSMatrix\u003cg/final/problem-4585-1324582-pre.txt\u003e       2226 ms\nJacobianToCRSViewUpdate\u003cg/final/problem-4585-1324582-pre.txt\u003e    228 ms\nJacobianToCRSMatrixUpdate\u003cg/final/problem-4585-1324582-pre.txt\u003e  400 ms\n\nUsing precomputed permutation:\nJacobianToCRS\u003c/final/problem-4585-1324582-pre.txt\u003e              1656 ms\nJacobianToCRSView\u003c/final/problem-4585-1324582-pre.txt\u003e           553 ms\nJacobianToCRSMatrix\u003c/final/problem-4585-1324582-pre.txt\u003e        2255 ms\nJacobianToCRSViewUpdate\u003c/final/problem-4585-1324582-pre.txt\u003e     228 ms\nJacobianToCRSMatrixUpdate\u003c/final/problem-4585-1324582-pre.txt\u003e   406 ms\n\nPerformance of JacobianToCRSViewUpdate is still limited by\nhost-to-device transfer, and JacobianToCRSView is faster than computing\nCRS structure on CPU.\n\nChange-Id: Ifb6910fb01ae6071400d36c277846fadc5857964\n",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "e4b124d0e4c6bfcaac40e075c7371b356a74483f",
      "old_mode": 33188,
      "old_path": ".github/workflows/linux.yml",
      "new_id": "be4bad1b6b8303e0a0fe809fd1fc528659a1003c",
      "new_mode": 33188,
      "new_path": ".github/workflows/linux.yml"
    },
    {
      "type": "modify",
      "old_id": "9cb99b492b4c7f0c23c043aeb89858d8123ef070",
      "old_mode": 33188,
      "old_path": "internal/ceres/CMakeLists.txt",
      "new_id": "b67b10ee6ae8e18b95482e473258f5ce7c4289df",
      "new_mode": 33188,
      "new_path": "internal/ceres/CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "ab1d746bece416a3093012ad44b370e7ea22635c",
      "old_mode": 33188,
      "old_path": "internal/ceres/block_sparse_matrix.cc",
      "new_id": "01dbfbe4190ac42e8053d3c100ce7e5c55ec6825",
      "new_mode": 33188,
      "new_path": "internal/ceres/block_sparse_matrix.cc"
    },
    {
      "type": "modify",
      "old_id": "0d99e1501876d264d6f36882847e7d8fd8427f27",
      "old_mode": 33188,
      "old_path": "internal/ceres/block_sparse_matrix.h",
      "new_id": "cd13b0672728222c66cc4c2ab3b4d9e618a6af0d",
      "new_mode": 33188,
      "new_path": "internal/ceres/block_sparse_matrix.h"
    },
    {
      "type": "modify",
      "old_id": "a03d01cd1e93b666adb29f54430a27cc631e72f9",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_sparse_crs_view.cc",
      "new_id": "bb267dabd7fd436750a3b5e24d3e6a1597c0c93a",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_sparse_crs_view.cc"
    },
    {
      "type": "modify",
      "old_id": "3ea8498ca5d0cef6a65d3224324b37a87f21c6d5",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_sparse_crs_view.h",
      "new_id": "2ae8721257e2c20ea0a0f2597e197c2ac8e4b8a0",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_sparse_crs_view.h"
    },
    {
      "type": "modify",
      "old_id": "f8d1cdf9fa8b02d012f3198916b929a39d1c5ed0",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_sparse_crs_view_test.cc",
      "new_id": "e80d0268354da888cb2094dfb0efa27f04ac1cbf",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_sparse_crs_view_test.cc"
    },
    {
      "type": "modify",
      "old_id": "8e549e1c5fd66d46223b1319c45e936ed6dcebbf",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_structure.cc",
      "new_id": "5817cd7bcb788ca40182246a000e6e93e920d14f",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_structure.cc"
    },
    {
      "type": "modify",
      "old_id": "a41d0b39a1a0ca6488d97b4595ba309b71cdeb8d",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_structure.h",
      "new_id": "28e36b22b69b348b5258b2c43f2a3a272d39aa8e",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_structure.h"
    },
    {
      "type": "modify",
      "old_id": "95cb26b47d8577b56534346c566acb312c23d219",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_block_structure_test.cc",
      "new_id": "000b6bbf2ef4aaac67109ba917d21307d97b1878",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_block_structure_test.cc"
    },
    {
      "type": "delete",
      "old_id": "3d93ae8e2330860bbffc74dabd947fc95290a920",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_kernels.cu.cc",
      "new_id": "0000000000000000000000000000000000000000",
      "new_mode": 0,
      "new_path": "/dev/null"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "8be35534fbf6e3b4ab2207298bbe0faf0af31186",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_bsm_to_crs.cu.cc"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "b6da69bdea26ce875abbed4febab05a41117550b",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_bsm_to_crs.h"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "45d5d3b78d47d391c0e6bb50faef498b935fa95c",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_utils.h"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "b5a8e40e14576259f5ef42d129f79afa76110189",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_vector_ops.cu.cc"
    },
    {
      "type": "rename",
      "old_id": "61f945a80858d72bfa24f3bfb5ab4123eff2761d",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_kernels.h",
      "new_id": "b1792011b10d1344747649eaec4c3ba6762a5151",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_vector_ops.h",
      "score": 66
    },
    {
      "type": "rename",
      "old_id": "41036a39762c97ab347755a2026b0a892a56febb",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_kernels_test.cc",
      "new_id": "7f6e2291bcbfc4228bab26ab529e8c6887fca3d2",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_kernels_vector_ops_test.cc",
      "score": 99
    },
    {
      "type": "modify",
      "old_id": "0ae8c5deb47eee3d55605f79f46e3069581a7e07",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_sparse_matrix.cc",
      "new_id": "e9ca9cc82b533c21685f5188aac81695dac1b00a",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_sparse_matrix.cc"
    },
    {
      "type": "modify",
      "old_id": "ad551709ae242ca535fa4d1a5334e1f626487d09",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_sparse_matrix.h",
      "new_id": "f5fcb911cbb4fbcbee4b6eaff490fce059e9c6e1",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_sparse_matrix.h"
    },
    {
      "type": "modify",
      "old_id": "e2143de415bc9bbc22abc636cfb5a756a6218beb",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_vector.cc",
      "new_id": "d434f3685dafd0c38e592ac9995043b06a6f1176",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_vector.cc"
    },
    {
      "type": "modify",
      "old_id": "34f39473bc99dcacdcbf51d44868236ab9be9bea",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_vector.h",
      "new_id": "46661cf1eb54e68ba54d6cb9efea928709d09c91",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_vector.h"
    },
    {
      "type": "modify",
      "old_id": "f19ca7e79d72ca75d6196a4c5e821fd3ae43004b",
      "old_mode": 33188,
      "old_path": "internal/ceres/dense_cholesky.cc",
      "new_id": "fb9553683e683da57b0844b5c2bc6ea60218385d",
      "new_mode": 33188,
      "new_path": "internal/ceres/dense_cholesky.cc"
    },
    {
      "type": "modify",
      "old_id": "2ba03ed7aaee8172ab40086f60b8832b63942041",
      "old_mode": 33188,
      "old_path": "internal/ceres/evaluation_benchmark.cc",
      "new_id": "c23c881de250875244d22ead3e09cee95eb4cd08",
      "new_mode": 33188,
      "new_path": "internal/ceres/evaluation_benchmark.cc"
    }
  ]
}
