Expand reporting of timing information.
1. Add an ExecutionSummary object to record execution
information about Ceres objects.
2. Add an EventLogger object to log events in a function call.
3. Add a ScopedExecutionTimer object to log times in ExecutionSummary.
4. Instrument ProgramEvaluator and all the linear solvers
to report their timing statistics.
5. Connect the timing statistics to Summary::FullReport.
6. Add high precision timer on unix systems using
gettimeofday() call.
7. Various minor clean ups all around.
Change-Id: I5e09804b730b09535484124be7dbc1c58eccd1d4
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 9ad7e50..cb858ba 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -664,6 +664,10 @@
// Some total of all time spent inside Ceres when Solve is called.
double total_time_in_seconds;
+ double linear_solver_time_in_seconds;
+ double residual_evaluation_time_in_seconds;
+ double jacobian_evaluation_time_in_seconds;
+
// Preprocessor summary.
int num_parameter_blocks;
int num_parameters;
diff --git a/internal/ceres/cgnr_solver.cc b/internal/ceres/cgnr_solver.cc
index ccc8026..5ef521c 100644
--- a/internal/ceres/cgnr_solver.cc
+++ b/internal/ceres/cgnr_solver.cc
@@ -30,11 +30,12 @@
#include "ceres/cgnr_solver.h"
-#include "glog/logging.h"
-#include "ceres/linear_solver.h"
+#include "ceres/block_jacobi_preconditioner.h"
#include "ceres/cgnr_linear_operator.h"
#include "ceres/conjugate_gradients_solver.h"
-#include "ceres/block_jacobi_preconditioner.h"
+#include "ceres/linear_solver.h"
+#include "ceres/wall_time.h"
+#include "glog/logging.h"
namespace ceres {
namespace internal {
@@ -49,6 +50,8 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ EventLogger event_logger("CgnrSolver::Solve");
+
// Form z = Atb.
scoped_array<double> z(new double[A->num_cols()]);
std::fill(z.get(), z.get() + A->num_cols(), 0.0);
@@ -69,11 +72,14 @@
// Solve (AtA + DtD)x = z (= Atb).
std::fill(x, x + A->num_cols(), 0.0);
CgnrLinearOperator lhs(*A, per_solve_options.D);
+ event_logger.AddEvent("Setup");
+
ConjugateGradientsSolver conjugate_gradient_solver(options_);
- return conjugate_gradient_solver.Solve(&lhs,
- z.get(),
- cg_per_solve_options,
- x);
+ LinearSolver::Summary summary =
+ conjugate_gradient_solver.Solve(&lhs, z.get(), cg_per_solve_options, x);
+ event_logger.AddEvent("Solve");
+
+ return summary;
}
} // namespace internal
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc
index f6bb99a..a340e16 100644
--- a/internal/ceres/dense_normal_cholesky_solver.cc
+++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -34,10 +34,11 @@
#include "Eigen/Dense"
#include "ceres/dense_sparse_matrix.h"
-#include "ceres/linear_solver.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/linear_solver.h"
#include "ceres/types.h"
+#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
@@ -51,6 +52,8 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ EventLogger event_logger("DenseNormalCholeskySolver::Solve");
+
const int num_rows = A->num_rows();
const int num_cols = A->num_cols();
@@ -58,6 +61,7 @@
Matrix lhs(num_cols, num_cols);
lhs.setZero();
+ event_logger.AddEvent("Setup");
// lhs += A'A
//
// Using rankUpdate instead of GEMM, exposes the fact that its the
@@ -73,12 +77,13 @@
lhs += D.array().square().matrix().asDiagonal();
}
- VectorRef(x, num_cols) =
- lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
-
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = TOLERANCE;
+ VectorRef(x, num_cols) =
+ lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
+ event_logger.AddEvent("Solve");
+
return summary;
}
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 7aec450..21b6b15 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -34,10 +34,11 @@
#include "Eigen/Dense"
#include "ceres/dense_sparse_matrix.h"
-#include "ceres/linear_solver.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/linear_solver.h"
#include "ceres/types.h"
+#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
@@ -50,10 +51,10 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ EventLogger event_logger("DenseQRSolver::Solve");
+
const int num_rows = A->num_rows();
const int num_cols = A->num_cols();
- VLOG(2) << "DenseQRSolver: "
- << num_rows << " x " << num_cols << " system.";
if (per_solve_options.D != NULL) {
// Temporarily append a diagonal block to the A matrix, but undo
@@ -68,9 +69,11 @@
rhs_.setZero();
}
rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
+ event_logger.AddEvent("Setup");
// Solve the system.
VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_);
+ event_logger.AddEvent("Solve");
if (per_solve_options.D != NULL) {
// Undo the modifications to the matrix A.
@@ -83,6 +86,8 @@
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = TOLERANCE;
+
+ event_logger.AddEvent("TearDown");
return summary;
}
diff --git a/internal/ceres/evaluator.h b/internal/ceres/evaluator.h
index 600feda..76cf1e0 100644
--- a/internal/ceres/evaluator.h
+++ b/internal/ceres/evaluator.h
@@ -155,6 +155,17 @@
// The number of residuals in the optimization problem.
virtual int NumResiduals() const = 0;
+ // The following two methods return copies instead of references so
+ // that the base class implementation does not have to worry about
+ // life time issues. Further, these calls are not expected to be
+ // frequent or performance sensitive.
+ virtual map<string, int> CallStatistics() const {
+ return map<string, int>();
+ }
+
+ virtual map<string, double> TimeStatistics() const {
+ return map<string, double>();
+ }
};
} // namespace internal
diff --git a/internal/ceres/execution_summary.h b/internal/ceres/execution_summary.h
index 02a4448..2eb3c1a 100644
--- a/internal/ceres/execution_summary.h
+++ b/internal/ceres/execution_summary.h
@@ -35,6 +35,8 @@
#include <string>
#include "ceres/internal/port.h"
+#include "ceres/wall_time.h"
+#include "ceres/mutex.h"
namespace ceres {
namespace internal {
@@ -42,8 +44,44 @@
// Struct used by various objects to report statistics and other
// information about their execution. e.g., ExecutionSummary::times
// can be used for reporting times associated with various activities.
-struct ExecutionSummary {
- map<string, double> times;
+class ExecutionSummary {
+ public:
+ void IncrementTimeBy(const string& name, const double value) {
+ CeresMutexLock l(×_mutex_);
+ times_[name] += value;
+ }
+
+ void IncrementCall(const string& name) {
+ CeresMutexLock l(&calls_mutex_);
+ calls_[name] += 1;
+ }
+
+ const map<string, double>& times() const { return times_; };
+ const map<string, int>& calls() const { return calls_; };
+
+ private:
+ Mutex times_mutex_;
+ map<string, double> times_;
+
+ Mutex calls_mutex_;
+ map<string, int> calls_;
+};
+
+class ScopedExecutionTimer {
+ public:
+ ScopedExecutionTimer(const string& name, ExecutionSummary* summary)
+ : start_time_(WallTimeInSeconds()),
+ name_(name),
+ summary_(summary) {}
+
+ ~ScopedExecutionTimer() {
+ summary_->IncrementTimeBy(name_, WallTimeInSeconds() - start_time_);
+ }
+
+ private:
+ const double start_time_;
+ const string name_;
+ ExecutionSummary* summary_;
};
} // namespace internal
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index 376a586..cb2d827 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -33,6 +33,7 @@
#include <algorithm>
#include <cstring>
#include <vector>
+
#include "Eigen/Dense"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
@@ -44,6 +45,7 @@
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
#include "ceres/visibility_based_preconditioner.h"
+#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
@@ -62,6 +64,8 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ EventLogger event_logger("IterativeSchurComplementSolver::Solve");
+
CHECK_NOTNULL(A->block_structure());
// Initialize a ImplicitSchurComplement object.
@@ -121,6 +125,7 @@
default:
LOG(FATAL) << "Unknown Preconditioner Type";
}
+ event_logger.AddEvent("Setup");
LinearSolver::Summary cg_summary;
cg_summary.num_iterations = 0;
@@ -138,6 +143,8 @@
}
VLOG(2) << "CG Iterations : " << cg_summary.num_iterations;
+
+ event_logger.AddEvent("Solve");
return cg_summary;
}
diff --git a/internal/ceres/iterative_schur_complement_solver.h b/internal/ceres/iterative_schur_complement_solver.h
index cfeb65e..862453c 100644
--- a/internal/ceres/iterative_schur_complement_solver.h
+++ b/internal/ceres/iterative_schur_complement_solver.h
@@ -86,6 +86,7 @@
scoped_ptr<internal::ImplicitSchurComplement> schur_complement_;
scoped_ptr<VisibilityBasedPreconditioner> visibility_based_preconditioner_;
Vector reduced_linear_system_solution_;
+ CERES_DISALLOW_COPY_AND_ASSIGN(IterativeSchurComplementSolver);
};
} // namespace internal
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 29959d3..219939e 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -43,6 +43,7 @@
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/dense_sparse_matrix.h"
#include "ceres/execution_summary.h"
+#include "ceres/execution_summary.h"
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
@@ -256,6 +257,18 @@
const PerSolveOptions& per_solve_options,
double* x) = 0;
+ // The following two methods return copies instead of references so
+ // that the base class implementation does not have to worry about
+ // life time issues. Further, these calls are not expected to be
+ // frequent or performance sensitive.
+ virtual map<string, int> CallStatistics() const {
+ return map<string, int>();
+ }
+
+ virtual map<string, double> TimeStatistics() const {
+ return map<string, double>();
+ }
+
// Factory
static LinearSolver* Create(const Options& options);
};
@@ -276,18 +289,29 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
+ ScopedExecutionTimer total_time("LinearSolver::Solve", &execution_summary_);
CHECK_NOTNULL(A);
CHECK_NOTNULL(b);
CHECK_NOTNULL(x);
return SolveImpl(down_cast<MatrixType*>(A), b, per_solve_options, x);
}
+ virtual map<string, int> CallStatistics() const {
+ return execution_summary_.calls();
+ }
+
+ virtual map<string, double> TimeStatistics() const {
+ return execution_summary_.times();
+ }
+
private:
virtual LinearSolver::Summary SolveImpl(
MatrixType* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) = 0;
+
+ ExecutionSummary execution_summary_;
};
// Linear solvers that depend on acccess to the low level structure of
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h
index 1ecf4aa..e9640ab 100644
--- a/internal/ceres/program_evaluator.h
+++ b/internal/ceres/program_evaluator.h
@@ -88,6 +88,7 @@
#include "ceres/residual_block.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
+#include "ceres/execution_summary.h"
namespace ceres {
namespace internal {
@@ -122,6 +123,12 @@
double* residuals,
double* gradient,
SparseMatrix* jacobian) {
+ ScopedExecutionTimer total_timer("Evaluator::Total", &execution_summary_);
+ ScopedExecutionTimer call_type_timer(gradient == NULL && jacobian == NULL
+ ? "Evaluator::Residual"
+ : "Evaluator::Jacobian",
+ &execution_summary_);
+
// The parameters are stateful, so set the state before evaluating.
if (!program_->StateVectorToParameterBlocks(state)) {
return false;
@@ -266,6 +273,14 @@
return program_->NumResiduals();
}
+ virtual map<string, int> CallStatistics() const {
+ return execution_summary_.calls();
+ }
+
+ virtual map<string, double> TimeStatistics() const {
+ return execution_summary_.times();
+ }
+
private:
// Per-thread scratch space needed to evaluate and store each residual block.
struct EvaluateScratch {
@@ -331,6 +346,7 @@
scoped_array<EvaluatePreparer> evaluate_preparers_;
scoped_array<EvaluateScratch> evaluate_scratch_;
vector<int> residual_layout_;
+ ::ceres::internal::ExecutionSummary execution_summary_;
};
} // namespace internal
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 9b7d4e5..1753759 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -44,15 +44,15 @@
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/detect_structure.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/port.h"
+#include "ceres/internal/scoped_ptr.h"
#include "ceres/linear_solver.h"
#include "ceres/schur_complement_solver.h"
#include "ceres/suitesparse.h"
#include "ceres/triplet_sparse_matrix.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/internal/port.h"
-#include "ceres/internal/scoped_ptr.h"
#include "ceres/types.h"
-
+#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
@@ -62,7 +62,8 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
- const time_t start_time = time(NULL);
+ EventLogger event_logger("SchurComplementSolver::Solve");
+
if (eliminator_.get() == NULL) {
InitStorage(A->block_structure());
DetectStructure(*A->block_structure(),
@@ -73,32 +74,27 @@
eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_)));
eliminator_->Init(options_.elimination_groups[0], A->block_structure());
};
- const time_t init_time = time(NULL);
fill(x, x + A->num_cols(), 0.0);
+ event_logger.AddEvent("Setup");
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = FAILURE;
eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get());
- const time_t eliminate_time = time(NULL);
+ event_logger.AddEvent("Eliminate");
double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
const bool status = SolveReducedLinearSystem(reduced_solution);
- const time_t solve_time = time(NULL);
+ event_logger.AddEvent("ReducedSolve");
if (!status) {
return summary;
}
eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x);
- const time_t backsubstitute_time = time(NULL);
summary.termination_type = TOLERANCE;
- VLOG(2) << "time (sec) total: " << (backsubstitute_time - start_time)
- << " init: " << (init_time - start_time)
- << " eliminate: " << (eliminate_time - init_time)
- << " solve: " << (solve_time - eliminate_time)
- << " backsubstitute: " << (backsubstitute_time - solve_time);
+ event_logger.AddEvent("BackSubstitute");
return summary;
}
@@ -267,8 +263,6 @@
// CHOLMOD's sparse cholesky factorization routines.
bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
double* solution) {
- const time_t start_time = time(NULL);
-
TripletSparseMatrix* tsm =
const_cast<TripletSparseMatrix*>(
down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
@@ -285,11 +279,9 @@
// The matrix is symmetric, and the upper triangular part of the
// matrix contains the values.
cholmod_lhs->stype = 1;
- const time_t lhs_time = time(NULL);
cholmod_dense* cholmod_rhs =
ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows);
- const time_t rhs_time = time(NULL);
// Symbolic factorization is computed if we don't already have one handy.
if (factor_ == NULL) {
@@ -306,12 +298,9 @@
CHECK_NOTNULL(factor_);
- const time_t symbolic_time = time(NULL);
cholmod_dense* cholmod_solution =
ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
- const time_t solve_time = time(NULL);
-
ss_.Free(cholmod_lhs);
cholmod_lhs = NULL;
ss_.Free(cholmod_rhs);
@@ -325,13 +314,6 @@
VectorRef(solution, num_rows)
= VectorRef(static_cast<double*>(cholmod_solution->x), num_rows);
ss_.Free(cholmod_solution);
- const time_t final_time = time(NULL);
- VLOG(2) << "time: " << (final_time - start_time)
- << " lhs : " << (lhs_time - start_time)
- << " rhs: " << (rhs_time - lhs_time)
- << " analyze: " << (symbolic_time - rhs_time)
- << " factor_and_solve: " << (solve_time - symbolic_time)
- << " cleanup: " << (final_time - solve_time);
return true;
}
#else
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index af32629..2ae255a 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -94,6 +94,9 @@
minimizer_time_in_seconds(-1.0),
postprocessor_time_in_seconds(-1.0),
total_time_in_seconds(-1.0),
+ linear_solver_time_in_seconds(-1.0),
+ residual_evaluation_time_in_seconds(-1.0),
+ jacobian_evaluation_time_in_seconds(-1.0),
num_parameter_blocks(-1),
num_parameters(-1),
num_residual_blocks(-1),
@@ -264,11 +267,22 @@
StringAppendF(&report, "Total % 20d\n",
num_successful_steps + num_unsuccessful_steps);
StringAppendF(&report, "\nTime (in seconds):\n");
- StringAppendF(&report, "Preprocessor % 25e\n",
+ StringAppendF(&report, "Preprocessor % 25.3e\n",
preprocessor_time_in_seconds);
- StringAppendF(&report, "Minimizer % 25e\n",
+
+ StringAppendF(&report, "\n Residual Evaluations % 22.3e\n",
+ residual_evaluation_time_in_seconds);
+ StringAppendF(&report, " Jacobian Evaluations % 22.3e\n",
+ jacobian_evaluation_time_in_seconds);
+ StringAppendF(&report, " Linear Solver % 23.3e\n",
+ linear_solver_time_in_seconds);
+ StringAppendF(&report, "Minimizer % 25.3e\n\n",
minimizer_time_in_seconds);
- StringAppendF(&report, "Total % 25e\n",
+
+ StringAppendF(&report, "Postprocessor % 24.3e\n",
+ postprocessor_time_in_seconds);
+
+ StringAppendF(&report, "Total % 25.3e\n\n",
total_time_in_seconds);
StringAppendF(&report, "Termination: %25s\n",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 50c96e5..3dfbb67 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -177,6 +177,8 @@
FILE* fptr_;
};
+// Iterate over each of the groups in order of their priority and fill
+// summary with their sizes.
void SummarizeOrdering(ParameterBlockOrdering* ordering,
vector<int>* summary) {
CHECK_NOTNULL(summary)->clear();
@@ -308,6 +310,7 @@
void SolverImpl::TrustRegionSolve(const Solver::Options& original_options,
ProblemImpl* original_problem_impl,
Solver::Summary* summary) {
+ EventLogger event_logger("TrustRegionSolve");
double solver_start_time = WallTimeInSeconds();
Program* original_program = original_problem_impl->mutable_program();
@@ -369,6 +372,7 @@
" currently broken. Ignoring Solver::Options::lsqp_iterations_to_dump";
}
+ event_logger.AddEvent("Init");
// Evaluate the initial cost, residual vector and the jacobian
// matrix if requested by the user. The initial cost needs to be
// computed on the original unpreprocessed problem, as it is used to
@@ -392,6 +396,8 @@
return;
}
+ event_logger.AddEvent("InitialEvaluate");
+
original_program->SetParameterBlockStatePtrsToUserStatePtrs();
// If the user requests gradient checking, construct a new
@@ -430,12 +436,15 @@
}
}
+ event_logger.AddEvent("ConstructOrdering");
+
// Create the three objects needed to minimize: the transformed program, the
// evaluator, and the linear solver.
scoped_ptr<Program> reduced_program(CreateReducedProgram(&options,
problem_impl,
&summary->fixed_cost,
&summary->error));
+ event_logger.AddEvent("CreateReducedProgram");
if (reduced_program == NULL) {
return;
}
@@ -484,6 +493,7 @@
// Ensure the program state is set to the user parameters on the way out.
original_program->SetParameterBlockStatePtrsToUserStatePtrs();
+ event_logger.AddEvent("FinalEvaluate");
summary->postprocessor_time_in_seconds =
WallTimeInSeconds() - post_process_start_time;
return;
@@ -491,6 +501,7 @@
scoped_ptr<LinearSolver>
linear_solver(CreateLinearSolver(&options, &summary->error));
+ event_logger.AddEvent("CreateLinearSolver");
if (linear_solver == NULL) {
return;
}
@@ -527,6 +538,9 @@
problem_impl->parameter_map(),
reduced_program.get(),
&summary->error));
+
+ event_logger.AddEvent("CreateEvaluator");
+
if (evaluator == NULL) {
return;
}
@@ -549,6 +563,8 @@
}
}
+ event_logger.AddEvent("CreateIIM");
+
// The optimizer works on contiguous parameter vectors; allocate some.
Vector parameters(reduced_program->NumParameters());
@@ -561,6 +577,7 @@
summary->preprocessor_time_in_seconds =
minimizer_start_time - solver_start_time;
+ event_logger.AddEvent("FinalInit");
// Run the optimization.
TrustRegionMinimize(options,
reduced_program.get(),
@@ -569,6 +586,7 @@
linear_solver.get(),
parameters.data(),
summary);
+ event_logger.AddEvent("Minimize");
// If the user aborted mid-optimization or the optimization
// terminated because of a numerical failure, then return without
@@ -612,15 +630,31 @@
summary->termination_type = NUMERICAL_FAILURE;
summary->error = "Unable to evaluate the final cost.";
LOG(ERROR) << summary->error;
+
+ event_logger.AddEvent("PostProcess");
return;
}
// Ensure the program state is set to the user parameters on the way out.
original_program->SetParameterBlockStatePtrsToUserStatePtrs();
+ const map<string, double>& linear_solver_time_statistics =
+ linear_solver->TimeStatistics();
+ summary->linear_solver_time_in_seconds =
+ FindWithDefault(linear_solver_time_statistics, "LinearSolver::Solve", 0.0);
+
+ const map<string, double>& evaluator_time_statistics =
+ evaluator->TimeStatistics();
+
+ summary->residual_evaluation_time_in_seconds =
+ FindWithDefault(evaluator_time_statistics, "Evaluator::Residual", 0.0);
+ summary->jacobian_evaluation_time_in_seconds =
+ FindWithDefault(evaluator_time_statistics, "Evaluator::Jacobian", 0.0);
+
// Stick a fork in it, we're done.
summary->postprocessor_time_in_seconds =
WallTimeInSeconds() - post_process_start_time;
+ event_logger.AddEvent("PostProcess");
}
void SolverImpl::LineSearchSolve(const Solver::Options& original_options,
@@ -764,7 +798,7 @@
// any further.
summary->termination_type = FUNCTION_TOLERANCE;
- double post_process_start_time = WallTimeInSeconds();
+ const double post_process_start_time = WallTimeInSeconds();
// Evaluate the final cost, residual vector and the jacobian
// matrix if requested by the user.
if (!Evaluator::Evaluate(original_program,
@@ -809,7 +843,7 @@
Vector original_parameters = parameters;
- double minimizer_start_time = WallTimeInSeconds();
+ const double minimizer_start_time = WallTimeInSeconds();
summary->preprocessor_time_in_seconds =
minimizer_start_time - solver_start_time;
@@ -828,7 +862,7 @@
return;
}
- double post_process_start_time = WallTimeInSeconds();
+ const double post_process_start_time = WallTimeInSeconds();
// Push the contiguous optimized parameters back to the user's parameters.
reduced_program->StateVectorToParameterBlocks(parameters.data());
@@ -1026,9 +1060,13 @@
ProblemImpl* problem_impl,
double* fixed_cost,
string* error) {
+ EventLogger event_logger("CreateReducedProgram");
+
CHECK_NOTNULL(options->linear_solver_ordering);
Program* original_program = problem_impl->mutable_program();
scoped_ptr<Program> transformed_program(new Program(*original_program));
+ event_logger.AddEvent("TransformedProgram");
+
ParameterBlockOrdering* linear_solver_ordering =
options->linear_solver_ordering;
@@ -1043,6 +1081,8 @@
return NULL;
}
+ event_logger.AddEvent("RemovedFixedBlocks");
+
if (transformed_program->NumParameterBlocks() == 0) {
if (transformed_program->NumResidualBlocks() > 0) {
*error = "Zero parameter blocks but non-zero residual blocks"
@@ -1076,6 +1116,7 @@
(i < num_eliminate_blocks) ? 0 : 1);
}
}
+ event_logger.AddEvent("SchurOrdering");
if (!ApplyUserOrdering(problem_impl->parameter_map(),
linear_solver_ordering,
@@ -1083,6 +1124,7 @@
error)) {
return NULL;
}
+ event_logger.AddEvent("ApplyOrdering");
// If the user requested the use of a Schur type solver, and
// supplied a non-NULL linear_solver_ordering object with more than
@@ -1123,9 +1165,13 @@
LOG(WARNING) << msg;
}
+ event_logger.AddEvent("AlternateSolver");
+
// Since the transformed program is the "active" program, and it is mutated,
// update the parameter offsets and indices.
transformed_program->SetParameterOffsetsAndIndex();
+
+ event_logger.AddEvent("SetOffsets");
return transformed_program.release();
}
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc
index 9e00b44..6fae204 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -39,12 +39,13 @@
#endif
#include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/scoped_ptr.h"
#include "ceres/linear_solver.h"
#include "ceres/suitesparse.h"
#include "ceres/triplet_sparse_matrix.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/internal/scoped_ptr.h"
#include "ceres/types.h"
+#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
@@ -103,6 +104,8 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double * x) {
+ EventLogger event_logger("SparseNormalCholeskySolver::CXSparse::Solve");
+
LinearSolver::Summary summary;
summary.num_iterations = 1;
const int num_cols = A->num_cols();
@@ -136,18 +139,27 @@
A->DeleteRows(num_cols);
}
+ event_logger.AddEvent("Setup");
+
// Compute symbolic factorization if not available.
if (cxsparse_factor_ == NULL) {
cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(AtA));
}
+ event_logger.AddEvent("Analysis");
+
+
// Solve the linear system.
if (cxsparse_.SolveCholesky(AtA, cxsparse_factor_, Atb.data())) {
VectorRef(x, Atb.rows()) = Atb;
summary.termination_type = TOLERANCE;
}
+ event_logger.AddEvent("Solve");
+
cxsparse_.Free(AtA);
+
+ event_logger.AddEvent("Teardown");
return summary;
}
#else
@@ -169,9 +181,9 @@
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double * x) {
- const time_t start_time = time(NULL);
- const int num_cols = A->num_cols();
+ EventLogger event_logger("SparseNormalCholeskySolver::SuiteSparse::Solve");
+ const int num_cols = A->num_cols();
LinearSolver::Summary summary;
Vector Atb = Vector::Zero(num_cols);
A->LeftMultiply(b, Atb.data());
@@ -189,7 +201,7 @@
CHECK_NOTNULL(lhs.get());
cholmod_dense* rhs = ss_.CreateDenseVector(Atb.data(), num_cols, num_cols);
- const time_t init_time = time(NULL);
+ event_logger.AddEvent("Setup");
if (factor_ == NULL) {
if (options_.use_block_amd) {
@@ -206,11 +218,10 @@
}
CHECK_NOTNULL(factor_);
-
- const time_t symbolic_time = time(NULL);
+ event_logger.AddEvent("Analysis");
cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
- const time_t solve_time = time(NULL);
+ event_logger.AddEvent("Solve");
ss_.Free(rhs);
rhs = NULL;
@@ -228,12 +239,7 @@
summary.termination_type = TOLERANCE;
}
- const time_t cleanup_time = time(NULL);
- VLOG(2) << "time (sec) total: " << (cleanup_time - start_time)
- << " init: " << (init_time - start_time)
- << " symbolic: " << (symbolic_time - init_time)
- << " solve: " << (solve_time - symbolic_time)
- << " cleanup: " << (cleanup_time - solve_time);
+ event_logger.AddEvent("Teardown");
return summary;
}
#else
diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h
index 40d9e0a..8d48096 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.h
+++ b/internal/ceres/sparse_normal_cholesky_solver.h
@@ -35,8 +35,8 @@
#define CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_
#include "ceres/cxsparse.h"
-#include "ceres/linear_solver.h"
#include "ceres/internal/macros.h"
+#include "ceres/linear_solver.h"
#include "ceres/suitesparse.h"
namespace ceres {
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc
index 0dce19f..8dc4e1f 100644
--- a/internal/ceres/wall_time.cc
+++ b/internal/ceres/wall_time.cc
@@ -28,12 +28,20 @@
//
// Author: strandmark@google.com (Petter Strandmark)
+#include "ceres/wall_time.h"
+
#ifdef CERES_USE_OPENMP
#include <omp.h>
#else
#include <ctime>
#endif
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
namespace ceres {
namespace internal {
@@ -41,8 +49,47 @@
#ifdef CERES_USE_OPENMP
return omp_get_wtime();
#else
+#ifdef _WIN32
return static_cast<double>(std::time(NULL));
+#else
+ timeval time_val;
+ gettimeofday(&time_val, NULL);
+ return (time_val.tv_sec + time_val.tv_usec * 1e-6);
#endif
+#endif
+}
+
+EventLogger::EventLogger(const string& logger_name)
+ : start_time_(WallTimeInSeconds()),
+ last_event_time_(start_time_),
+ events_("") {
+ StringAppendF(&events_,
+ "\n%s\n Delta Cumulative\n",
+ logger_name.c_str());
+}
+
+EventLogger::~EventLogger() {
+ if (VLOG_IS_ON(3)) {
+ AddEvent("Total");
+ VLOG(2) << "\n" << events_ << "\n";
+ }
+}
+
+void EventLogger::AddEvent(const string& event_name) {
+ if (!VLOG_IS_ON(3)) {
+ return;
+ }
+
+ const double current_time = WallTimeInSeconds();
+ const double relative_time_delta = current_time - last_event_time_;
+ const double absolute_time_delta = current_time - start_time_;
+ last_event_time_ = current_time;
+
+ StringAppendF(&events_,
+ " %25s : %5.3e %5.3e\n",
+ event_name.c_str(),
+ relative_time_delta,
+ absolute_time_delta);
}
} // namespace internal
diff --git a/internal/ceres/wall_time.h b/internal/ceres/wall_time.h
index 1a6e3bb..1dbd6e9 100644
--- a/internal/ceres/wall_time.h
+++ b/internal/ceres/wall_time.h
@@ -31,13 +31,57 @@
#ifndef CERES_INTERNAL_WALL_TIME_H_
#define CERES_INTERNAL_WALL_TIME_H_
+#include <map>
+
+#include "ceres/internal/port.h"
+#include "ceres/stringprintf.h"
+#include "glog/logging.h"
+
namespace ceres {
namespace internal {
-// Returns time, in seconds, from some arbitrary starting point. Has very
-// high precision if OpenMP is available, otherwise only second granularity.
+// Returns time, in seconds, from some arbitrary starting point. If
+// OpenMP is available then the high precision openmp_get_wtime()
+// function is used. Otherwise on unixes, gettimeofday is used. The
+// granularity is in seconds on windows systems.
double WallTimeInSeconds();
+// Log a series of events, recording for each event the time elapsed
+// since the last event and since the creation of the object.
+//
+// The information is output to VLOG(3) upon destruction. A
+// name::Total event is added as the final event right before
+// destruction.
+//
+// Example usage:
+//
+// void Foo() {
+// EventLogger event_logger("Foo");
+// Bar1();
+// event_logger.AddEvent("Bar1")
+// Bar2();
+// event_logger.AddEvent("Bar2")
+// Bar3();
+// }
+//
+// Will produce output that looks like
+//
+// Foo
+// Bar1: time1 time1
+// Bar2: time2 time1 + time2;
+// Total: time3 time1 + time2 + time3;
+class EventLogger {
+ public:
+ EventLogger(const string& logger_name);
+ ~EventLogger();
+ void AddEvent(const string& event_name);
+
+ private:
+ const double start_time_;
+ double last_event_time_;
+ string events_;
+};
+
} // namespace internal
} // namespace ceres