)]}'
{
  "commit": "88e08cfe7158b59b848df70721d6fa29592af30d",
  "tree": "449a579bdc7e34f4fd2a306578878c44b9aeb811",
  "parents": [
    "290b34ef058eb83aae64236b26742867a7a9431d"
  ],
  "author": {
    "name": "Joydeep Biswas",
    "email": "joydeepb@cs.utexas.edu",
    "time": "Sat Jun 04 20:17:06 2022 -0500"
  },
  "committer": {
    "name": "Joydeep Biswas",
    "email": "joydeepb@cs.utexas.edu",
    "time": "Wed Jul 13 06:55:31 2022 -0500"
  },
  "message": "Mixed-precision Iterative Refinement Cholesky With CUDA\n\n* Created a new class CUDADenseCholeskyMixedPrecision, which performs\n  Cholesky factorization and solving in single (fp32) precision, and\n  optionally performs iterative refinement.\n* Added CUDA kernels for mixed-precision solve operations\n* Added more detailed timing information to the FullReport about Schur\n  elimination, reduced system solves, and back-substitution.\n\nSome test performance numbers follow.\nAll tests were performed on an Ubuntu 20.04 desktop with an\nIntel Core i9-9940X CPU and Nvidia Quadro RTX 6000 GPU.\n\nTests were launched as:\n./bin/bundle_adjuster --input (problem_file) \\\n    --num_iterations 20\n    --num_threads 28\n    --linear_solver dense_schur\n    --dense_linear_algebra_library (cuda|lapack)\n    [--mixed_precision_solves]\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nproblem-21-11315-pre.txt\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n--------------------------------------------------\nCuda Mixed Precision\n--------------------------------------------------\nCost:\nInitial                          4.413239e+06\nFinal                            3.037864e+04\nChange                           4.382861e+06\n  Linear solver                      0.250703 (14)\n  ├ Schur eliminate                  0.234025 (14)\n  ├ Reduced solve                    0.006643 (14)\n  └ Backsubstitute                   0.006598 (12)\n\n--------------------------------------------------\nCuda\n--------------------------------------------------\nCost:\nInitial                          4.413239e+06\nFinal                            3.037864e+04\nChange                           4.382861e+06\n  Linear solver                      0.257517 (12)\n  ├ Schur eliminate                  0.233518 (12)\n  ├ Reduced solve                    0.010621 (12)\n  └ Backsubstitute                   0.007124 (12)\n\n--------------------------------------------------\nLapack (OpenBLAS)\n--------------------------------------------------\nCost:\nInitial                          4.413239e+06\nFinal                            3.037864e+04\nChange                           4.382861e+06\n  Linear solver                      0.332349 (12)\n  ├ Schur eliminate                  0.274748 (12)\n  ├ Reduced solve                    0.015966 (12)\n  └ Backsubstitute                   0.034192 (12)\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nproblem-257-65132-pre.txt\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n--------------------------------------------------\nCuda Mixed Precision\n--------------------------------------------------\nCost:\nInitial                          2.456242e+07\nFinal                            9.677593e+04\nChange                           2.446565e+07\n  Linear solver                      1.332367 (20)\n  ├ Schur eliminate                  1.021365 (20)\n  ├ Reduced solve                    0.195472 (20)\n  └ Backsubstitute                   0.075582 (20)\n\n--------------------------------------------------\nCuda\n--------------------------------------------------\nCost:\nInitial                          2.456242e+07\nFinal                            9.677547e+04\nChange                           2.446565e+07\n  Linear solver                      1.810176 (20)\n  ├ Schur eliminate                  1.012862 (20)\n  ├ Reduced solve                    0.678704 (20)\n  └ Backsubstitute                   0.083925 (20)\n\n--------------------------------------------------\nLapack (OpenBLAS)\n--------------------------------------------------\nCost:\nInitial                          2.456242e+07\nFinal                            9.677547e+04\nChange                           2.446565e+07\n  Linear solver                      2.376273 (20)\n  ├ Schur eliminate                  0.987613 (20)\n  ├ Reduced solve                    1.043873 (20)\n  └ Backsubstitute                   0.310402 (20)\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nproblem-744-543562-pre.txt\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n--------------------------------------------------\nCuda Mixed Precision\n--------------------------------------------------\nCost:\nInitial                          1.434881e+08\nFinal                            1.546895e+06\nChange                           1.419412e+08\n  Linear solver                     27.010088 (20)\n  ├ Schur eliminate                 24.362433 (20)\n  ├ Reduced solve                    1.428542 (20)\n  └ Backsubstitute                   0.814266 (20)\n\n--------------------------------------------------\nCuda\n--------------------------------------------------\nCost:\nInitial                          1.434881e+08\nFinal                            1.546895e+06\nChange                           1.419412e+08\n  Linear solver                     32.342513 (20)\n  ├ Schur eliminate                 24.638819 (20)\n  ├ Reduced solve                    6.492090 (20)\n  └ Backsubstitute                   0.802184 (20)\n\n--------------------------------------------------\nLapack (OpenBLAS)\n--------------------------------------------------\nCost:\nInitial                          1.434881e+08\nFinal                            1.546895e+06\nChange                           1.419412e+08\n  Linear solver                     34.152224 (20)\n  ├ Schur eliminate                 24.183723 (20)\n  ├ Reduced solve                    8.784413 (20)\n  └ Backsubstitute                   0.795044 (20)\n\nChange-Id: I178887e776d8f4a1e8abb99bbc205bf8c278bf79\n",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "8d6094c36ac33eff850fd33ef7cd1f54f74d52e3",
      "old_mode": 33188,
      "old_path": "CMakeLists.txt",
      "new_id": "82014a95662cec3d3c0892076b5566424d503565",
      "new_mode": 33188,
      "new_path": "CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "5e6b0a72fb39c5be46220a1645d46c781b05194a",
      "old_mode": 33188,
      "old_path": "docs/source/installation.rst",
      "new_id": "d6bae75beaec578dbb810a26b66780ac1ac285fb",
      "new_mode": 33188,
      "new_path": "docs/source/installation.rst"
    },
    {
      "type": "modify",
      "old_id": "097276b2376ed92265dedaac6b0ea8f8bfed34de",
      "old_mode": 33188,
      "old_path": "include/ceres/solver.h",
      "new_id": "3b1531841a5d6400ce4fe762be3939afe4f6304a",
      "new_mode": 33188,
      "new_path": "include/ceres/solver.h"
    },
    {
      "type": "modify",
      "old_id": "4e5008b2adea3732eec3d02b2c918e8f3d59d174",
      "old_mode": 33188,
      "old_path": "internal/ceres/CMakeLists.txt",
      "new_id": "ae22a9d69a91b9a4a77af1bdb2e65b92763dd53f",
      "new_mode": 33188,
      "new_path": "internal/ceres/CMakeLists.txt"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "2fd76b5626b051887c9f77c038258f72e60178b2",
      "new_mode": 33188,
      "new_path": "internal/ceres/ceres_cuda_kernels.cu"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "989399f4f2a447b2b7006bd6c346bcc4d631ac36",
      "new_mode": 33188,
      "new_path": "internal/ceres/ceres_cuda_kernels.h"
    },
    {
      "type": "modify",
      "old_id": "a1cf78420d5aec93cd254957d35218477f851a97",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_buffer.h",
      "new_id": "8868e1a8af36ea079487c3bec2a97814a6bae50a",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_buffer.h"
    },
    {
      "type": "modify",
      "old_id": "13dc34b34ced4c45815f68b196ad5cc20eefb02f",
      "old_mode": 33188,
      "old_path": "internal/ceres/cuda_dense_cholesky_test.cc",
      "new_id": "b9acc990f7721cd32283c81c2756bd7484d9491d",
      "new_mode": 33188,
      "new_path": "internal/ceres/cuda_dense_cholesky_test.cc"
    },
    {
      "type": "modify",
      "old_id": "a10f3117d0da7721f6b93ffd8b2a687c39f8517a",
      "old_mode": 33188,
      "old_path": "internal/ceres/dense_cholesky.cc",
      "new_id": "e2c5036457484aaf3cba676d38cbcb86b80f8e52",
      "new_mode": 33188,
      "new_path": "internal/ceres/dense_cholesky.cc"
    },
    {
      "type": "modify",
      "old_id": "cc8642c90af8716f250c6471b95ecd051d144d95",
      "old_mode": 33188,
      "old_path": "internal/ceres/dense_cholesky.h",
      "new_id": "0593875f5604cb0906ec81bf58be7cf15dac0222",
      "new_mode": 33188,
      "new_path": "internal/ceres/dense_cholesky.h"
    },
    {
      "type": "modify",
      "old_id": "7866d5c280ceabd827936ebfc5010ac096157b5b",
      "old_mode": 33188,
      "old_path": "internal/ceres/dense_cholesky_test.cc",
      "new_id": "a42534372f30b16ccae0365b67af5c141b112541",
      "new_mode": 33188,
      "new_path": "internal/ceres/dense_cholesky_test.cc"
    },
    {
      "type": "modify",
      "old_id": "9503843402b23e47b333287e15e7917e91bd9cd2",
      "old_mode": 33188,
      "old_path": "internal/ceres/solver.cc",
      "new_id": "93551f7074cd1a51681463154cbf5e35bdebc0cf",
      "new_mode": 33188,
      "new_path": "internal/ceres/solver.cc"
    }
  ]
}
