Multithread covariance estimation.
1. Multithread the inversion of J'J.
2. Simplify the dense rank truncation loop.
3. Minor correction to building documentation.
Change-Id: Ide932811c0f28dc6c253809339fb2caa083865b5
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc
index 966ba35..978acc1 100644
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -30,6 +30,10 @@
#include "ceres/covariance_impl.h"
+#ifdef CERES_USE_OPENMP
+#include <omp.h>
+#endif
+
#include <algorithm>
#include <utility>
#include <vector>
@@ -47,6 +51,36 @@
namespace ceres {
namespace internal {
+namespace {
+
+// Per thread storage for SuiteSparse.
+struct PerThreadContext {
+ PerThreadContext(int num_rows)
+ : solution(NULL),
+ solution_set(NULL),
+ y_workspace(NULL),
+ e_workspace(NULL),
+ rhs(NULL) {
+ rhs = ss.CreateDenseVector(NULL, num_rows, num_rows);
+ }
+
+ ~PerThreadContext() {
+ ss.Free(solution);
+ ss.Free(solution_set);
+ ss.Free(y_workspace);
+ ss.Free(e_workspace);
+ ss.Free(rhs);
+ }
+
+ cholmod_dense* solution;
+ cholmod_sparse* solution_set;
+ cholmod_dense* y_workspace;
+ cholmod_dense* e_workspace;
+ cholmod_dense* rhs;
+ SuiteSparse ss;
+};
+
+} // namespace
typedef vector<pair<const double*, const double*> > CovarianceBlocks;
@@ -424,9 +458,6 @@
const int* cols = covariance_matrix_->cols();
double* values = covariance_matrix_->mutable_values();
- cholmod_dense* rhs = ss_.CreateDenseVector(NULL, num_rows, num_rows);
- double* rhs_x = reinterpret_cast<double*>(rhs->x);
-
// The following loop exploits the fact that the i^th column of A^{-1}
// is given by the solution to the linear system
//
@@ -441,6 +472,9 @@
// versions of SuiteSparse have the cholmod_solve2 function which
// re-uses memory across calls.
#if (SUITESPARSE_VERSION < 4002)
+ cholmod_dense* rhs = ss_.CreateDenseVector(NULL, num_rows, num_rows);
+ double* rhs_x = reinterpret_cast<double*>(rhs->x);
+
for (int r = 0; r < num_rows; ++r) {
int row_begin = rows[r];
int row_end = rows[r + 1];
@@ -458,15 +492,35 @@
ss_.Free(solution);
rhs_x[r] = 0.0;
}
-#else
- // TODO(sameeragarwal) There should be a more efficient way
- // involving the use of Bset but I am unable to make it work right
- // now.
- cholmod_dense* solution = NULL;
- cholmod_sparse* solution_set = NULL;
- cholmod_dense* y_workspace = NULL;
- cholmod_dense* e_workspace = NULL;
+ ss_.Free(rhs);
+#else // SUITESPARSE_VERSION < 4002
+
+ const int num_threads = options_.num_threads;
+ vector<PerThreadContext*> contexts(num_threads);
+ for (int i = 0; i < num_threads; ++i) {
+ contexts[i] = new PerThreadContext(num_rows);
+ }
+
+ // The first call to cholmod_solve2 is not thread safe, since it
+ // changes the factorization from supernodal to simplicial etc.
+ {
+ PerThreadContext* context = contexts[0];
+ double* context_rhs_x = reinterpret_cast<double*>(context->rhs->x);
+ context_rhs_x[0] = 1.0;
+ cholmod_solve2(CHOLMOD_A,
+ factor,
+ context->rhs,
+ NULL,
+ &context->solution,
+ &context->solution_set,
+ &context->y_workspace,
+ &context->e_workspace,
+ context->ss.mutable_cc());
+ context_rhs_x[0] = 0.0;
+ }
+
+#pragma omp parallel for num_threads(num_threads) schedule(dynamic)
for (int r = 0; r < num_rows; ++r) {
int row_begin = rows[r];
int row_end = rows[r + 1];
@@ -474,40 +528,52 @@
continue;
}
- rhs_x[r] = 1.0;
+# ifdef CERES_USE_OPENMP
+ int thread_id = omp_get_thread_num();
+# else
+ int thread_id = 0;
+# endif
+ PerThreadContext* context = contexts[thread_id];
+ double* context_rhs_x = reinterpret_cast<double*>(context->rhs->x);
+ context_rhs_x[r] = 1.0;
+
+ // TODO(sameeragarwal) There should be a more efficient way
+ // involving the use of Bset but I am unable to make it work right
+ // now.
cholmod_solve2(CHOLMOD_A,
factor,
- rhs,
+ context->rhs,
NULL,
- &solution,
- &solution_set,
- &y_workspace,
- &e_workspace,
- ss_.mutable_cc());
+ &context->solution,
+ &context->solution_set,
+ &context->y_workspace,
+ &context->e_workspace,
+ context->ss.mutable_cc());
- double* solution_x = reinterpret_cast<double*>(solution->x);
+ double* solution_x = reinterpret_cast<double*>(context->solution->x);
for (int idx = row_begin; idx < row_end; ++idx) {
const int c = cols[idx];
values[idx] = solution_x[c];
}
- rhs_x[r] = 0.0;
+ context_rhs_x[r] = 0.0;
}
- ss_.Free(solution);
- ss_.Free(solution_set);
- ss_.Free(y_workspace);
- ss_.Free(e_workspace);
+ for (int i = 0; i < num_threads; ++i) {
+ delete contexts[i];
+ }
-#endif
+#endif // SUITESPARSE_VERSION < 4002
- ss_.Free(rhs);
ss_.Free(factor);
event_logger.AddEvent("Inversion");
return true;
-#else
+
+#else // CERES_NO_SUITESPARSE
+
return false;
-#endif
+
+#endif // CERES_NO_SUITESPARSE
};
bool CovarianceImpl::ComputeCovarianceValuesUsingEigen() {
@@ -549,19 +615,32 @@
const int max_rank = min(num_singular_values,
num_singular_values - options_.null_space_rank);
+ // Compute the squared inverse of the singular values. Truncate the
+ // computation based on min_singular_value_ratio and
+ // null_space_rank. When either of these two quantities are active,
+ // the resulting covariance matrix is a Moore-Penrose inverse
+ // instead of a regular inverse.
for (int i = 0; i < max_rank; ++i) {
const double singular_value_ratio = singular_values[i] / max_singular_value;
- if (singular_value_ratio >= min_singular_value_ratio) {
- inverse_squared_singular_values[i] =
- 1.0 / (singular_values[i] * singular_values[i]);
- } else if (!automatic_truncation) {
- LOG(WARNING) << "Cholesky factorization of J'J is not reliable. "
- << "Reciprocal condition number: "
- << singular_value_ratio * singular_value_ratio << " "
- << "min_reciprocal_condition_number : "
- << options_.min_reciprocal_condition_number;
- return false;
+ if (singular_value_ratio < min_singular_value_ratio) {
+ // Since the singular values are in decreasing order, if
+ // automatic truncation is enabled, then from this point on
+ // all values will fail the ratio test and there is nothing to
+ // do in this loop.
+ if (automatic_truncation) {
+ break;
+ } else {
+ LOG(WARNING) << "Cholesky factorization of J'J is not reliable. "
+ << "Reciprocal condition number: "
+ << singular_value_ratio * singular_value_ratio << " "
+ << "min_reciprocal_condition_number : "
+ << options_.min_reciprocal_condition_number;
+ return false;
+ }
}
+
+ inverse_squared_singular_values[i] =
+ 1.0 / (singular_values[i] * singular_values[i]);
}
Matrix dense_covariance =
@@ -585,7 +664,5 @@
return true;
};
-
-
} // namespace internal
} // namespace ceres