Multithread covariance estimation.
1. Multithread the inversion of J'J.
2. Simplify the dense rank truncation loop.
3. Minor correction to building documentation.
Change-Id: Ide932811c0f28dc6c253809339fb2caa083865b5
diff --git a/docs/source/building.rst b/docs/source/building.rst
index 17e17d4..b91027b 100644
--- a/docs/source/building.rst
+++ b/docs/source/building.rst
@@ -72,7 +72,7 @@
.. code-block:: bash
# CMake
- sudo apt-hey install cmake
+ sudo apt-get install cmake
# gflags
tar -xvzf gflags-2.0.tar.gz
cd gflags-2.0
diff --git a/include/ceres/covariance.h b/include/ceres/covariance.h
index f65d9eb..b093026 100644
--- a/include/ceres/covariance.h
+++ b/include/ceres/covariance.h
@@ -51,7 +51,7 @@
// 1. This is experimental code and the API WILL CHANGE before
// release.
//
-// 2. WARNING: It is very easy to use this class incorrectly without
+// 2. It is very easy to use this class incorrectly without
// understanding the underlying mathematics. Please read and
// understand the documentation completely before attempting to use
// this class.
@@ -166,6 +166,21 @@
// with indeterminacy. IEEE Transactions on Information Theory 47(5):
// 2017-2028 (2001)
//
+// Speed
+// -----
+//
+// When use_dense_linear_algebra = true, Eigen's JacobiSVD algorithm
+// is used to perform the computations. It is an accurate but slow
+// method and should only be used for small to moderate sized
+// problems.
+//
+// When use_dense_linear_algebra = false, SuiteSparse/CHOLMOD is used
+// to perform the computation. Recent versions of SuiteSparse (>=
+// 4.2.0) provide a much more efficient method for solving for rows of
+// the covariance matrix. Therefore, if you are doing large scale
+// covariance estimation, we strongly recommend using a recent version
+// of SuiteSparse.
+//
// Example Usage
// =============
//
@@ -248,6 +263,27 @@
// and by default Covariance::Compute will return false if it
// encounters such a matrix.
//
+ // use_dense_linear_algebra = false
+ // --------------------------------
+ //
+ // When performing large scale sparse covariance estimation,
+ // computing the exact value of the reciprocal condition number is
+ // not possible as it would require computing the eigenvalues of
+ // J'J.
+ //
+ // In this case we use cholmod_rcond, which uses the ratio of the
+ // smallest to the largest diagonal entries of the Cholesky
+ // factorization as an approximation to the reciprocal condition
+ // number.
+ //
+ // However, care must be taken as this is a heuristic and can
+ // sometimes be a very crude estimate. The default value of
+ // min_reciprocal_condition_number has been set to a conservative
+ // value, and sometimes the Covariance::Compute may return false
+ // even if it is possible to estimate the covariance reliably. In
+ // such cases, the user should exercise their judgement before
+ // lowering the value of min_reciprocal_condition_number.
+ //
// use_dense_linear_algebra = true
// -------------------------------
//
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc
index 966ba35..978acc1 100644
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -30,6 +30,10 @@
#include "ceres/covariance_impl.h"
+#ifdef CERES_USE_OPENMP
+#include <omp.h>
+#endif
+
#include <algorithm>
#include <utility>
#include <vector>
@@ -47,6 +51,36 @@
namespace ceres {
namespace internal {
+namespace {
+
+// Per thread storage for SuiteSparse.
+struct PerThreadContext {
+ PerThreadContext(int num_rows)
+ : solution(NULL),
+ solution_set(NULL),
+ y_workspace(NULL),
+ e_workspace(NULL),
+ rhs(NULL) {
+ rhs = ss.CreateDenseVector(NULL, num_rows, num_rows);
+ }
+
+ ~PerThreadContext() {
+ ss.Free(solution);
+ ss.Free(solution_set);
+ ss.Free(y_workspace);
+ ss.Free(e_workspace);
+ ss.Free(rhs);
+ }
+
+ cholmod_dense* solution;
+ cholmod_sparse* solution_set;
+ cholmod_dense* y_workspace;
+ cholmod_dense* e_workspace;
+ cholmod_dense* rhs;
+ SuiteSparse ss;
+};
+
+} // namespace
typedef vector<pair<const double*, const double*> > CovarianceBlocks;
@@ -424,9 +458,6 @@
const int* cols = covariance_matrix_->cols();
double* values = covariance_matrix_->mutable_values();
- cholmod_dense* rhs = ss_.CreateDenseVector(NULL, num_rows, num_rows);
- double* rhs_x = reinterpret_cast<double*>(rhs->x);
-
// The following loop exploits the fact that the i^th column of A^{-1}
// is given by the solution to the linear system
//
@@ -441,6 +472,9 @@
// versions of SuiteSparse have the cholmod_solve2 function which
// re-uses memory across calls.
#if (SUITESPARSE_VERSION < 4002)
+ cholmod_dense* rhs = ss_.CreateDenseVector(NULL, num_rows, num_rows);
+ double* rhs_x = reinterpret_cast<double*>(rhs->x);
+
for (int r = 0; r < num_rows; ++r) {
int row_begin = rows[r];
int row_end = rows[r + 1];
@@ -458,15 +492,35 @@
ss_.Free(solution);
rhs_x[r] = 0.0;
}
-#else
- // TODO(sameeragarwal) There should be a more efficient way
- // involving the use of Bset but I am unable to make it work right
- // now.
- cholmod_dense* solution = NULL;
- cholmod_sparse* solution_set = NULL;
- cholmod_dense* y_workspace = NULL;
- cholmod_dense* e_workspace = NULL;
+ ss_.Free(rhs);
+#else // SUITESPARSE_VERSION < 4002
+
+ const int num_threads = options_.num_threads;
+ vector<PerThreadContext*> contexts(num_threads);
+ for (int i = 0; i < num_threads; ++i) {
+ contexts[i] = new PerThreadContext(num_rows);
+ }
+
+ // The first call to cholmod_solve2 is not thread safe, since it
+ // changes the factorization from supernodal to simplicial etc.
+ {
+ PerThreadContext* context = contexts[0];
+ double* context_rhs_x = reinterpret_cast<double*>(context->rhs->x);
+ context_rhs_x[0] = 1.0;
+ cholmod_solve2(CHOLMOD_A,
+ factor,
+ context->rhs,
+ NULL,
+ &context->solution,
+ &context->solution_set,
+ &context->y_workspace,
+ &context->e_workspace,
+ context->ss.mutable_cc());
+ context_rhs_x[0] = 0.0;
+ }
+
+#pragma omp parallel for num_threads(num_threads) schedule(dynamic)
for (int r = 0; r < num_rows; ++r) {
int row_begin = rows[r];
int row_end = rows[r + 1];
@@ -474,40 +528,52 @@
continue;
}
- rhs_x[r] = 1.0;
+# ifdef CERES_USE_OPENMP
+ int thread_id = omp_get_thread_num();
+# else
+ int thread_id = 0;
+# endif
+ PerThreadContext* context = contexts[thread_id];
+ double* context_rhs_x = reinterpret_cast<double*>(context->rhs->x);
+ context_rhs_x[r] = 1.0;
+
+ // TODO(sameeragarwal) There should be a more efficient way
+ // involving the use of Bset but I am unable to make it work right
+ // now.
cholmod_solve2(CHOLMOD_A,
factor,
- rhs,
+ context->rhs,
NULL,
- &solution,
- &solution_set,
- &y_workspace,
- &e_workspace,
- ss_.mutable_cc());
+ &context->solution,
+ &context->solution_set,
+ &context->y_workspace,
+ &context->e_workspace,
+ context->ss.mutable_cc());
- double* solution_x = reinterpret_cast<double*>(solution->x);
+ double* solution_x = reinterpret_cast<double*>(context->solution->x);
for (int idx = row_begin; idx < row_end; ++idx) {
const int c = cols[idx];
values[idx] = solution_x[c];
}
- rhs_x[r] = 0.0;
+ context_rhs_x[r] = 0.0;
}
- ss_.Free(solution);
- ss_.Free(solution_set);
- ss_.Free(y_workspace);
- ss_.Free(e_workspace);
+ for (int i = 0; i < num_threads; ++i) {
+ delete contexts[i];
+ }
-#endif
+#endif // SUITESPARSE_VERSION < 4002
- ss_.Free(rhs);
ss_.Free(factor);
event_logger.AddEvent("Inversion");
return true;
-#else
+
+#else // CERES_NO_SUITESPARSE
+
return false;
-#endif
+
+#endif // CERES_NO_SUITESPARSE
};
bool CovarianceImpl::ComputeCovarianceValuesUsingEigen() {
@@ -549,19 +615,32 @@
const int max_rank = min(num_singular_values,
num_singular_values - options_.null_space_rank);
+ // Compute the squared inverse of the singular values. Truncate the
+ // computation based on min_singular_value_ratio and
+ // null_space_rank. When either of these two quantities are active,
+ // the resulting covariance matrix is a Moore-Penrose inverse
+ // instead of a regular inverse.
for (int i = 0; i < max_rank; ++i) {
const double singular_value_ratio = singular_values[i] / max_singular_value;
- if (singular_value_ratio >= min_singular_value_ratio) {
- inverse_squared_singular_values[i] =
- 1.0 / (singular_values[i] * singular_values[i]);
- } else if (!automatic_truncation) {
- LOG(WARNING) << "Cholesky factorization of J'J is not reliable. "
- << "Reciprocal condition number: "
- << singular_value_ratio * singular_value_ratio << " "
- << "min_reciprocal_condition_number : "
- << options_.min_reciprocal_condition_number;
- return false;
+ if (singular_value_ratio < min_singular_value_ratio) {
+ // Since the singular values are in decreasing order, if
+ // automatic truncation is enabled, then from this point on
+ // all values will fail the ratio test and there is nothing to
+ // do in this loop.
+ if (automatic_truncation) {
+ break;
+ } else {
+ LOG(WARNING) << "Cholesky factorization of J'J is not reliable. "
+ << "Reciprocal condition number: "
+ << singular_value_ratio * singular_value_ratio << " "
+ << "min_reciprocal_condition_number : "
+ << options_.min_reciprocal_condition_number;
+ return false;
+ }
}
+
+ inverse_squared_singular_values[i] =
+ 1.0 / (singular_values[i] * singular_values[i]);
}
Matrix dense_covariance =
@@ -585,7 +664,5 @@
return true;
};
-
-
} // namespace internal
} // namespace ceres
diff --git a/internal/ceres/covariance_test.cc b/internal/ceres/covariance_test.cc
index 05fb234..bd0a0e2 100644
--- a/internal/ceres/covariance_test.cc
+++ b/internal/ceres/covariance_test.cc
@@ -405,6 +405,50 @@
ComputeAndCompareCovarianceBlocks(options, expected_covariance);
}
+#ifdef CERES_USE_OPENMP
+
+TEST_F(CovarianceTest, ThreadedNormalBehavior) {
+ // J
+ //
+ // 1 0 0 0 0 0
+ // 0 1 0 0 0 0
+ // 0 0 2 0 0 0
+ // 0 0 0 2 0 0
+ // 0 0 0 0 2 0
+ // 0 0 0 0 0 5
+ // -5 -6 1 2 3 0
+ // 3 -2 0 0 0 2
+
+ // J'J
+ //
+ // 35 24 -5 -10 -15 6
+ // 24 41 -6 -12 -18 -4
+ // -5 -6 5 2 3 0
+ // -10 -12 2 8 6 0
+ // -15 -18 3 6 13 0
+ // 6 -4 0 0 0 29
+
+ // inv(J'J) computed using octave.
+ double expected_covariance[] = {
+ 7.0747e-02, -8.4923e-03, 1.6821e-02, 3.3643e-02, 5.0464e-02, -1.5809e-02, // NOLINT
+ -8.4923e-03, 8.1352e-02, 2.4758e-02, 4.9517e-02, 7.4275e-02, 1.2978e-02, // NOLINT
+ 1.6821e-02, 2.4758e-02, 2.4904e-01, -1.9271e-03, -2.8906e-03, -6.5325e-05, // NOLINT
+ 3.3643e-02, 4.9517e-02, -1.9271e-03, 2.4615e-01, -5.7813e-03, -1.3065e-04, // NOLINT
+ 5.0464e-02, 7.4275e-02, -2.8906e-03, -5.7813e-03, 2.4133e-01, -1.9598e-04, // NOLINT
+ -1.5809e-02, 1.2978e-02, -6.5325e-05, -1.3065e-04, -1.9598e-04, 3.9544e-02, // NOLINT
+ };
+
+ Covariance::Options options;
+ options.num_threads = 4;
+
+ options.use_dense_linear_algebra = false;
+ ComputeAndCompareCovarianceBlocks(options, expected_covariance);
+
+ options.use_dense_linear_algebra = true;
+ ComputeAndCompareCovarianceBlocks(options, expected_covariance);
+}
+
+#endif // CERES_USE_OPENMP
TEST_F(CovarianceTest, ConstantParameterBlock) {
problem_.SetParameterBlockConstant(parameters_);
@@ -644,5 +688,80 @@
ComputeAndCompareCovarianceBlocks(options, expected_covariance);
}
+class LargeScaleCovarianceTest : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ num_parameter_blocks_ = 2000;
+ parameter_block_size_ = 5;
+ parameters_.reset(new double[parameter_block_size_ * num_parameter_blocks_]);
+
+ Matrix jacobian(parameter_block_size_, parameter_block_size_);
+ for (int i = 0; i < num_parameter_blocks_; ++i) {
+ jacobian.setIdentity();
+ jacobian *= (i + 1);
+
+ double* block_i = parameters_.get() + i * parameter_block_size_;
+ problem_.AddResidualBlock(new UnaryCostFunction(parameter_block_size_,
+ parameter_block_size_,
+ jacobian.data()),
+ NULL,
+ block_i );
+ for (int j = i; j < num_parameter_blocks_; ++j) {
+ double* block_j = parameters_.get() + j * parameter_block_size_;
+ all_covariance_blocks_.push_back(make_pair(block_i, block_j));
+ }
+ }
+ }
+
+ void ComputeAndCompare(int num_threads) {
+ Covariance::Options options;
+ options.use_dense_linear_algebra = false;
+ options.num_threads = num_threads;
+ Covariance covariance(options);
+ EXPECT_TRUE(covariance.Compute(all_covariance_blocks_, &problem_));
+
+ Matrix expected(parameter_block_size_, parameter_block_size_);
+ Matrix actual(parameter_block_size_, parameter_block_size_);
+ const double kTolerance = 1e-16;
+
+ for (int i = 0; i < num_parameter_blocks_; ++i) {
+ expected.setIdentity();
+ expected /= (i + 1.0) * (i + 1.0);
+
+ double* block_i = parameters_.get() + i * parameter_block_size_;
+ covariance.GetCovarianceBlock(block_i, block_i, actual.data());
+ EXPECT_NEAR((expected - actual).norm(), 0.0, kTolerance)
+ << "block: " << i << ", " << i << "\n"
+ << "expected: \n" << expected << "\n"
+ << "actual: \n" << actual;
+
+ expected.setZero();
+ for (int j = i + 1; j < num_parameter_blocks_; ++j) {
+ double* block_j = parameters_.get() + j * parameter_block_size_;
+ covariance.GetCovarianceBlock(block_i, block_j, actual.data());
+ EXPECT_NEAR((expected - actual).norm(), 0.0, kTolerance)
+ << "block: " << i << ", " << j << "\n"
+ << "expected: \n" << expected << "\n"
+ << "actual: \n" << actual;
+ }
+ }
+ }
+
+ scoped_array<double> parameters_;
+ int parameter_block_size_;
+ int num_parameter_blocks_;
+
+ Problem problem_;
+ vector<pair<const double*, const double*> > all_covariance_blocks_;
+};
+
+#if !defined(CERES_NO_SUITESPARSE) && defined(CERES_USE_OPENMP)
+
+TEST_F(LargeScaleCovarianceTest, Parallel) {
+ ComputeAndCompare(4);
+}
+
+#endif // !defined(CERES_NO_SUITESPARSE) && defined(CERES_USE_OPENMP)
+
} // namespace internal
} // namespace ceres