Expand reporting of timing information. 1. Add an ExecutionSummary object to record execution information about Ceres objects. 2. Add an EventLogger object to log events in a function call. 3. Add a ScopedExecutionTimer object to log times in ExecutionSummary. 4. Instrument ProgramEvaluator and all the linear solvers to report their timing statistics. 5. Connect the timing statistics to Summary::FullReport. 6. Add high precision timer on unix systems using gettimeofday() call. 7. Various minor clean ups all around. Change-Id: I5e09804b730b09535484124be7dbc1c58eccd1d4
diff --git a/include/ceres/solver.h b/include/ceres/solver.h index 9ad7e50..cb858ba 100644 --- a/include/ceres/solver.h +++ b/include/ceres/solver.h
@@ -664,6 +664,10 @@ // Some total of all time spent inside Ceres when Solve is called. double total_time_in_seconds; + double linear_solver_time_in_seconds; + double residual_evaluation_time_in_seconds; + double jacobian_evaluation_time_in_seconds; + // Preprocessor summary. int num_parameter_blocks; int num_parameters;
diff --git a/internal/ceres/cgnr_solver.cc b/internal/ceres/cgnr_solver.cc index ccc8026..5ef521c 100644 --- a/internal/ceres/cgnr_solver.cc +++ b/internal/ceres/cgnr_solver.cc
@@ -30,11 +30,12 @@ #include "ceres/cgnr_solver.h" -#include "glog/logging.h" -#include "ceres/linear_solver.h" +#include "ceres/block_jacobi_preconditioner.h" #include "ceres/cgnr_linear_operator.h" #include "ceres/conjugate_gradients_solver.h" -#include "ceres/block_jacobi_preconditioner.h" +#include "ceres/linear_solver.h" +#include "ceres/wall_time.h" +#include "glog/logging.h" namespace ceres { namespace internal { @@ -49,6 +50,8 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + EventLogger event_logger("CgnrSolver::Solve"); + // Form z = Atb. scoped_array<double> z(new double[A->num_cols()]); std::fill(z.get(), z.get() + A->num_cols(), 0.0); @@ -69,11 +72,14 @@ // Solve (AtA + DtD)x = z (= Atb). std::fill(x, x + A->num_cols(), 0.0); CgnrLinearOperator lhs(*A, per_solve_options.D); + event_logger.AddEvent("Setup"); + ConjugateGradientsSolver conjugate_gradient_solver(options_); - return conjugate_gradient_solver.Solve(&lhs, - z.get(), - cg_per_solve_options, - x); + LinearSolver::Summary summary = + conjugate_gradient_solver.Solve(&lhs, z.get(), cg_per_solve_options, x); + event_logger.AddEvent("Solve"); + + return summary; } } // namespace internal
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc index f6bb99a..a340e16 100644 --- a/internal/ceres/dense_normal_cholesky_solver.cc +++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -34,10 +34,11 @@ #include "Eigen/Dense" #include "ceres/dense_sparse_matrix.h" -#include "ceres/linear_solver.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/linear_solver.h" #include "ceres/types.h" +#include "ceres/wall_time.h" namespace ceres { namespace internal { @@ -51,6 +52,8 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + EventLogger event_logger("DenseNormalCholeskySolver::Solve"); + const int num_rows = A->num_rows(); const int num_cols = A->num_cols(); @@ -58,6 +61,7 @@ Matrix lhs(num_cols, num_cols); lhs.setZero(); + event_logger.AddEvent("Setup"); // lhs += A'A // // Using rankUpdate instead of GEMM, exposes the fact that its the @@ -73,12 +77,13 @@ lhs += D.array().square().matrix().asDiagonal(); } - VectorRef(x, num_cols) = - lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs); - LinearSolver::Summary summary; summary.num_iterations = 1; summary.termination_type = TOLERANCE; + VectorRef(x, num_cols) = + lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs); + event_logger.AddEvent("Solve"); + return summary; }
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc index 7aec450..21b6b15 100644 --- a/internal/ceres/dense_qr_solver.cc +++ b/internal/ceres/dense_qr_solver.cc
@@ -34,10 +34,11 @@ #include "Eigen/Dense" #include "ceres/dense_sparse_matrix.h" -#include "ceres/linear_solver.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/linear_solver.h" #include "ceres/types.h" +#include "ceres/wall_time.h" namespace ceres { namespace internal { @@ -50,10 +51,10 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + EventLogger event_logger("DenseQRSolver::Solve"); + const int num_rows = A->num_rows(); const int num_cols = A->num_cols(); - VLOG(2) << "DenseQRSolver: " - << num_rows << " x " << num_cols << " system."; if (per_solve_options.D != NULL) { // Temporarily append a diagonal block to the A matrix, but undo @@ -68,9 +69,11 @@ rhs_.setZero(); } rhs_.head(num_rows) = ConstVectorRef(b, num_rows); + event_logger.AddEvent("Setup"); // Solve the system. VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_); + event_logger.AddEvent("Solve"); if (per_solve_options.D != NULL) { // Undo the modifications to the matrix A. @@ -83,6 +86,8 @@ LinearSolver::Summary summary; summary.num_iterations = 1; summary.termination_type = TOLERANCE; + + event_logger.AddEvent("TearDown"); return summary; }
diff --git a/internal/ceres/evaluator.h b/internal/ceres/evaluator.h index 600feda..76cf1e0 100644 --- a/internal/ceres/evaluator.h +++ b/internal/ceres/evaluator.h
@@ -155,6 +155,17 @@ // The number of residuals in the optimization problem. virtual int NumResiduals() const = 0; + // The following two methods return copies instead of references so + // that the base class implementation does not have to worry about + // life time issues. Further, these calls are not expected to be + // frequent or performance sensitive. + virtual map<string, int> CallStatistics() const { + return map<string, int>(); + } + + virtual map<string, double> TimeStatistics() const { + return map<string, double>(); + } }; } // namespace internal
diff --git a/internal/ceres/execution_summary.h b/internal/ceres/execution_summary.h index 02a4448..2eb3c1a 100644 --- a/internal/ceres/execution_summary.h +++ b/internal/ceres/execution_summary.h
@@ -35,6 +35,8 @@ #include <string> #include "ceres/internal/port.h" +#include "ceres/wall_time.h" +#include "ceres/mutex.h" namespace ceres { namespace internal { @@ -42,8 +44,44 @@ // Struct used by various objects to report statistics and other // information about their execution. e.g., ExecutionSummary::times // can be used for reporting times associated with various activities. -struct ExecutionSummary { - map<string, double> times; +class ExecutionSummary { + public: + void IncrementTimeBy(const string& name, const double value) { + CeresMutexLock l(×_mutex_); + times_[name] += value; + } + + void IncrementCall(const string& name) { + CeresMutexLock l(&calls_mutex_); + calls_[name] += 1; + } + + const map<string, double>& times() const { return times_; }; + const map<string, int>& calls() const { return calls_; }; + + private: + Mutex times_mutex_; + map<string, double> times_; + + Mutex calls_mutex_; + map<string, int> calls_; +}; + +class ScopedExecutionTimer { + public: + ScopedExecutionTimer(const string& name, ExecutionSummary* summary) + : start_time_(WallTimeInSeconds()), + name_(name), + summary_(summary) {} + + ~ScopedExecutionTimer() { + summary_->IncrementTimeBy(name_, WallTimeInSeconds() - start_time_); + } + + private: + const double start_time_; + const string name_; + ExecutionSummary* summary_; }; } // namespace internal
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc index 376a586..cb2d827 100644 --- a/internal/ceres/iterative_schur_complement_solver.cc +++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -33,6 +33,7 @@ #include <algorithm> #include <cstring> #include <vector> + #include "Eigen/Dense" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" @@ -44,6 +45,7 @@ #include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" #include "ceres/visibility_based_preconditioner.h" +#include "ceres/wall_time.h" #include "glog/logging.h" namespace ceres { @@ -62,6 +64,8 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + EventLogger event_logger("IterativeSchurComplementSolver::Solve"); + CHECK_NOTNULL(A->block_structure()); // Initialize a ImplicitSchurComplement object. @@ -121,6 +125,7 @@ default: LOG(FATAL) << "Unknown Preconditioner Type"; } + event_logger.AddEvent("Setup"); LinearSolver::Summary cg_summary; cg_summary.num_iterations = 0; @@ -138,6 +143,8 @@ } VLOG(2) << "CG Iterations : " << cg_summary.num_iterations; + + event_logger.AddEvent("Solve"); return cg_summary; }
diff --git a/internal/ceres/iterative_schur_complement_solver.h b/internal/ceres/iterative_schur_complement_solver.h index cfeb65e..862453c 100644 --- a/internal/ceres/iterative_schur_complement_solver.h +++ b/internal/ceres/iterative_schur_complement_solver.h
@@ -86,6 +86,7 @@ scoped_ptr<internal::ImplicitSchurComplement> schur_complement_; scoped_ptr<VisibilityBasedPreconditioner> visibility_based_preconditioner_; Vector reduced_linear_system_solution_; + CERES_DISALLOW_COPY_AND_ASSIGN(IterativeSchurComplementSolver); }; } // namespace internal
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h index 29959d3..219939e 100644 --- a/internal/ceres/linear_solver.h +++ b/internal/ceres/linear_solver.h
@@ -43,6 +43,7 @@ #include "ceres/compressed_row_sparse_matrix.h" #include "ceres/dense_sparse_matrix.h" #include "ceres/execution_summary.h" +#include "ceres/execution_summary.h" #include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" @@ -256,6 +257,18 @@ const PerSolveOptions& per_solve_options, double* x) = 0; + // The following two methods return copies instead of references so + // that the base class implementation does not have to worry about + // life time issues. Further, these calls are not expected to be + // frequent or performance sensitive. + virtual map<string, int> CallStatistics() const { + return map<string, int>(); + } + + virtual map<string, double> TimeStatistics() const { + return map<string, double>(); + } + // Factory static LinearSolver* Create(const Options& options); }; @@ -276,18 +289,29 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + ScopedExecutionTimer total_time("LinearSolver::Solve", &execution_summary_); CHECK_NOTNULL(A); CHECK_NOTNULL(b); CHECK_NOTNULL(x); return SolveImpl(down_cast<MatrixType*>(A), b, per_solve_options, x); } + virtual map<string, int> CallStatistics() const { + return execution_summary_.calls(); + } + + virtual map<string, double> TimeStatistics() const { + return execution_summary_.times(); + } + private: virtual LinearSolver::Summary SolveImpl( MatrixType* A, const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) = 0; + + ExecutionSummary execution_summary_; }; // Linear solvers that depend on acccess to the low level structure of
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h index 1ecf4aa..e9640ab 100644 --- a/internal/ceres/program_evaluator.h +++ b/internal/ceres/program_evaluator.h
@@ -88,6 +88,7 @@ #include "ceres/residual_block.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/execution_summary.h" namespace ceres { namespace internal { @@ -122,6 +123,12 @@ double* residuals, double* gradient, SparseMatrix* jacobian) { + ScopedExecutionTimer total_timer("Evaluator::Total", &execution_summary_); + ScopedExecutionTimer call_type_timer(gradient == NULL && jacobian == NULL + ? "Evaluator::Residual" + : "Evaluator::Jacobian", + &execution_summary_); + // The parameters are stateful, so set the state before evaluating. if (!program_->StateVectorToParameterBlocks(state)) { return false; @@ -266,6 +273,14 @@ return program_->NumResiduals(); } + virtual map<string, int> CallStatistics() const { + return execution_summary_.calls(); + } + + virtual map<string, double> TimeStatistics() const { + return execution_summary_.times(); + } + private: // Per-thread scratch space needed to evaluate and store each residual block. struct EvaluateScratch { @@ -331,6 +346,7 @@ scoped_array<EvaluatePreparer> evaluate_preparers_; scoped_array<EvaluateScratch> evaluate_scratch_; vector<int> residual_layout_; + ::ceres::internal::ExecutionSummary execution_summary_; }; } // namespace internal
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index 9b7d4e5..1753759 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc
@@ -44,15 +44,15 @@ #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/detect_structure.h" +#include "ceres/internal/eigen.h" +#include "ceres/internal/port.h" +#include "ceres/internal/scoped_ptr.h" #include "ceres/linear_solver.h" #include "ceres/schur_complement_solver.h" #include "ceres/suitesparse.h" #include "ceres/triplet_sparse_matrix.h" -#include "ceres/internal/eigen.h" -#include "ceres/internal/port.h" -#include "ceres/internal/scoped_ptr.h" #include "ceres/types.h" - +#include "ceres/wall_time.h" namespace ceres { namespace internal { @@ -62,7 +62,8 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { - const time_t start_time = time(NULL); + EventLogger event_logger("SchurComplementSolver::Solve"); + if (eliminator_.get() == NULL) { InitStorage(A->block_structure()); DetectStructure(*A->block_structure(), @@ -73,32 +74,27 @@ eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_))); eliminator_->Init(options_.elimination_groups[0], A->block_structure()); }; - const time_t init_time = time(NULL); fill(x, x + A->num_cols(), 0.0); + event_logger.AddEvent("Setup"); LinearSolver::Summary summary; summary.num_iterations = 1; summary.termination_type = FAILURE; eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get()); - const time_t eliminate_time = time(NULL); + event_logger.AddEvent("Eliminate"); double* reduced_solution = x + A->num_cols() - lhs_->num_cols(); const bool status = SolveReducedLinearSystem(reduced_solution); - const time_t solve_time = time(NULL); + event_logger.AddEvent("ReducedSolve"); if (!status) { return summary; } eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x); - const time_t backsubstitute_time = time(NULL); summary.termination_type = TOLERANCE; - VLOG(2) << "time (sec) total: " << (backsubstitute_time - start_time) - << " init: " << (init_time - start_time) - << " eliminate: " << (eliminate_time - init_time) - << " solve: " << (solve_time - eliminate_time) - << " backsubstitute: " << (backsubstitute_time - solve_time); + event_logger.AddEvent("BackSubstitute"); return summary; } @@ -267,8 +263,6 @@ // CHOLMOD's sparse cholesky factorization routines. bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( double* solution) { - const time_t start_time = time(NULL); - TripletSparseMatrix* tsm = const_cast<TripletSparseMatrix*>( down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix()); @@ -285,11 +279,9 @@ // The matrix is symmetric, and the upper triangular part of the // matrix contains the values. cholmod_lhs->stype = 1; - const time_t lhs_time = time(NULL); cholmod_dense* cholmod_rhs = ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows); - const time_t rhs_time = time(NULL); // Symbolic factorization is computed if we don't already have one handy. if (factor_ == NULL) { @@ -306,12 +298,9 @@ CHECK_NOTNULL(factor_); - const time_t symbolic_time = time(NULL); cholmod_dense* cholmod_solution = ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs); - const time_t solve_time = time(NULL); - ss_.Free(cholmod_lhs); cholmod_lhs = NULL; ss_.Free(cholmod_rhs); @@ -325,13 +314,6 @@ VectorRef(solution, num_rows) = VectorRef(static_cast<double*>(cholmod_solution->x), num_rows); ss_.Free(cholmod_solution); - const time_t final_time = time(NULL); - VLOG(2) << "time: " << (final_time - start_time) - << " lhs : " << (lhs_time - start_time) - << " rhs: " << (rhs_time - lhs_time) - << " analyze: " << (symbolic_time - rhs_time) - << " factor_and_solve: " << (solve_time - symbolic_time) - << " cleanup: " << (final_time - solve_time); return true; } #else
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc index af32629..2ae255a 100644 --- a/internal/ceres/solver.cc +++ b/internal/ceres/solver.cc
@@ -94,6 +94,9 @@ minimizer_time_in_seconds(-1.0), postprocessor_time_in_seconds(-1.0), total_time_in_seconds(-1.0), + linear_solver_time_in_seconds(-1.0), + residual_evaluation_time_in_seconds(-1.0), + jacobian_evaluation_time_in_seconds(-1.0), num_parameter_blocks(-1), num_parameters(-1), num_residual_blocks(-1), @@ -264,11 +267,22 @@ StringAppendF(&report, "Total % 20d\n", num_successful_steps + num_unsuccessful_steps); StringAppendF(&report, "\nTime (in seconds):\n"); - StringAppendF(&report, "Preprocessor % 25e\n", + StringAppendF(&report, "Preprocessor % 25.3e\n", preprocessor_time_in_seconds); - StringAppendF(&report, "Minimizer % 25e\n", + + StringAppendF(&report, "\n Residual Evaluations % 22.3e\n", + residual_evaluation_time_in_seconds); + StringAppendF(&report, " Jacobian Evaluations % 22.3e\n", + jacobian_evaluation_time_in_seconds); + StringAppendF(&report, " Linear Solver % 23.3e\n", + linear_solver_time_in_seconds); + StringAppendF(&report, "Minimizer % 25.3e\n\n", minimizer_time_in_seconds); - StringAppendF(&report, "Total % 25e\n", + + StringAppendF(&report, "Postprocessor % 24.3e\n", + postprocessor_time_in_seconds); + + StringAppendF(&report, "Total % 25.3e\n\n", total_time_in_seconds); StringAppendF(&report, "Termination: %25s\n",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc index 50c96e5..3dfbb67 100644 --- a/internal/ceres/solver_impl.cc +++ b/internal/ceres/solver_impl.cc
@@ -177,6 +177,8 @@ FILE* fptr_; }; +// Iterate over each of the groups in order of their priority and fill +// summary with their sizes. void SummarizeOrdering(ParameterBlockOrdering* ordering, vector<int>* summary) { CHECK_NOTNULL(summary)->clear(); @@ -308,6 +310,7 @@ void SolverImpl::TrustRegionSolve(const Solver::Options& original_options, ProblemImpl* original_problem_impl, Solver::Summary* summary) { + EventLogger event_logger("TrustRegionSolve"); double solver_start_time = WallTimeInSeconds(); Program* original_program = original_problem_impl->mutable_program(); @@ -369,6 +372,7 @@ " currently broken. Ignoring Solver::Options::lsqp_iterations_to_dump"; } + event_logger.AddEvent("Init"); // Evaluate the initial cost, residual vector and the jacobian // matrix if requested by the user. The initial cost needs to be // computed on the original unpreprocessed problem, as it is used to @@ -392,6 +396,8 @@ return; } + event_logger.AddEvent("InitialEvaluate"); + original_program->SetParameterBlockStatePtrsToUserStatePtrs(); // If the user requests gradient checking, construct a new @@ -430,12 +436,15 @@ } } + event_logger.AddEvent("ConstructOrdering"); + // Create the three objects needed to minimize: the transformed program, the // evaluator, and the linear solver. scoped_ptr<Program> reduced_program(CreateReducedProgram(&options, problem_impl, &summary->fixed_cost, &summary->error)); + event_logger.AddEvent("CreateReducedProgram"); if (reduced_program == NULL) { return; } @@ -484,6 +493,7 @@ // Ensure the program state is set to the user parameters on the way out. original_program->SetParameterBlockStatePtrsToUserStatePtrs(); + event_logger.AddEvent("FinalEvaluate"); summary->postprocessor_time_in_seconds = WallTimeInSeconds() - post_process_start_time; return; @@ -491,6 +501,7 @@ scoped_ptr<LinearSolver> linear_solver(CreateLinearSolver(&options, &summary->error)); + event_logger.AddEvent("CreateLinearSolver"); if (linear_solver == NULL) { return; } @@ -527,6 +538,9 @@ problem_impl->parameter_map(), reduced_program.get(), &summary->error)); + + event_logger.AddEvent("CreateEvaluator"); + if (evaluator == NULL) { return; } @@ -549,6 +563,8 @@ } } + event_logger.AddEvent("CreateIIM"); + // The optimizer works on contiguous parameter vectors; allocate some. Vector parameters(reduced_program->NumParameters()); @@ -561,6 +577,7 @@ summary->preprocessor_time_in_seconds = minimizer_start_time - solver_start_time; + event_logger.AddEvent("FinalInit"); // Run the optimization. TrustRegionMinimize(options, reduced_program.get(), @@ -569,6 +586,7 @@ linear_solver.get(), parameters.data(), summary); + event_logger.AddEvent("Minimize"); // If the user aborted mid-optimization or the optimization // terminated because of a numerical failure, then return without @@ -612,15 +630,31 @@ summary->termination_type = NUMERICAL_FAILURE; summary->error = "Unable to evaluate the final cost."; LOG(ERROR) << summary->error; + + event_logger.AddEvent("PostProcess"); return; } // Ensure the program state is set to the user parameters on the way out. original_program->SetParameterBlockStatePtrsToUserStatePtrs(); + const map<string, double>& linear_solver_time_statistics = + linear_solver->TimeStatistics(); + summary->linear_solver_time_in_seconds = + FindWithDefault(linear_solver_time_statistics, "LinearSolver::Solve", 0.0); + + const map<string, double>& evaluator_time_statistics = + evaluator->TimeStatistics(); + + summary->residual_evaluation_time_in_seconds = + FindWithDefault(evaluator_time_statistics, "Evaluator::Residual", 0.0); + summary->jacobian_evaluation_time_in_seconds = + FindWithDefault(evaluator_time_statistics, "Evaluator::Jacobian", 0.0); + // Stick a fork in it, we're done. summary->postprocessor_time_in_seconds = WallTimeInSeconds() - post_process_start_time; + event_logger.AddEvent("PostProcess"); } void SolverImpl::LineSearchSolve(const Solver::Options& original_options, @@ -764,7 +798,7 @@ // any further. summary->termination_type = FUNCTION_TOLERANCE; - double post_process_start_time = WallTimeInSeconds(); + const double post_process_start_time = WallTimeInSeconds(); // Evaluate the final cost, residual vector and the jacobian // matrix if requested by the user. if (!Evaluator::Evaluate(original_program, @@ -809,7 +843,7 @@ Vector original_parameters = parameters; - double minimizer_start_time = WallTimeInSeconds(); + const double minimizer_start_time = WallTimeInSeconds(); summary->preprocessor_time_in_seconds = minimizer_start_time - solver_start_time; @@ -828,7 +862,7 @@ return; } - double post_process_start_time = WallTimeInSeconds(); + const double post_process_start_time = WallTimeInSeconds(); // Push the contiguous optimized parameters back to the user's parameters. reduced_program->StateVectorToParameterBlocks(parameters.data()); @@ -1026,9 +1060,13 @@ ProblemImpl* problem_impl, double* fixed_cost, string* error) { + EventLogger event_logger("CreateReducedProgram"); + CHECK_NOTNULL(options->linear_solver_ordering); Program* original_program = problem_impl->mutable_program(); scoped_ptr<Program> transformed_program(new Program(*original_program)); + event_logger.AddEvent("TransformedProgram"); + ParameterBlockOrdering* linear_solver_ordering = options->linear_solver_ordering; @@ -1043,6 +1081,8 @@ return NULL; } + event_logger.AddEvent("RemovedFixedBlocks"); + if (transformed_program->NumParameterBlocks() == 0) { if (transformed_program->NumResidualBlocks() > 0) { *error = "Zero parameter blocks but non-zero residual blocks" @@ -1076,6 +1116,7 @@ (i < num_eliminate_blocks) ? 0 : 1); } } + event_logger.AddEvent("SchurOrdering"); if (!ApplyUserOrdering(problem_impl->parameter_map(), linear_solver_ordering, @@ -1083,6 +1124,7 @@ error)) { return NULL; } + event_logger.AddEvent("ApplyOrdering"); // If the user requested the use of a Schur type solver, and // supplied a non-NULL linear_solver_ordering object with more than @@ -1123,9 +1165,13 @@ LOG(WARNING) << msg; } + event_logger.AddEvent("AlternateSolver"); + // Since the transformed program is the "active" program, and it is mutated, // update the parameter offsets and indices. transformed_program->SetParameterOffsetsAndIndex(); + + event_logger.AddEvent("SetOffsets"); return transformed_program.release(); }
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc index 9e00b44..6fae204 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -39,12 +39,13 @@ #endif #include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/internal/eigen.h" +#include "ceres/internal/scoped_ptr.h" #include "ceres/linear_solver.h" #include "ceres/suitesparse.h" #include "ceres/triplet_sparse_matrix.h" -#include "ceres/internal/eigen.h" -#include "ceres/internal/scoped_ptr.h" #include "ceres/types.h" +#include "ceres/wall_time.h" namespace ceres { namespace internal { @@ -103,6 +104,8 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double * x) { + EventLogger event_logger("SparseNormalCholeskySolver::CXSparse::Solve"); + LinearSolver::Summary summary; summary.num_iterations = 1; const int num_cols = A->num_cols(); @@ -136,18 +139,27 @@ A->DeleteRows(num_cols); } + event_logger.AddEvent("Setup"); + // Compute symbolic factorization if not available. if (cxsparse_factor_ == NULL) { cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(AtA)); } + event_logger.AddEvent("Analysis"); + + // Solve the linear system. if (cxsparse_.SolveCholesky(AtA, cxsparse_factor_, Atb.data())) { VectorRef(x, Atb.rows()) = Atb; summary.termination_type = TOLERANCE; } + event_logger.AddEvent("Solve"); + cxsparse_.Free(AtA); + + event_logger.AddEvent("Teardown"); return summary; } #else @@ -169,9 +181,9 @@ const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double * x) { - const time_t start_time = time(NULL); - const int num_cols = A->num_cols(); + EventLogger event_logger("SparseNormalCholeskySolver::SuiteSparse::Solve"); + const int num_cols = A->num_cols(); LinearSolver::Summary summary; Vector Atb = Vector::Zero(num_cols); A->LeftMultiply(b, Atb.data()); @@ -189,7 +201,7 @@ CHECK_NOTNULL(lhs.get()); cholmod_dense* rhs = ss_.CreateDenseVector(Atb.data(), num_cols, num_cols); - const time_t init_time = time(NULL); + event_logger.AddEvent("Setup"); if (factor_ == NULL) { if (options_.use_block_amd) { @@ -206,11 +218,10 @@ } CHECK_NOTNULL(factor_); - - const time_t symbolic_time = time(NULL); + event_logger.AddEvent("Analysis"); cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs); - const time_t solve_time = time(NULL); + event_logger.AddEvent("Solve"); ss_.Free(rhs); rhs = NULL; @@ -228,12 +239,7 @@ summary.termination_type = TOLERANCE; } - const time_t cleanup_time = time(NULL); - VLOG(2) << "time (sec) total: " << (cleanup_time - start_time) - << " init: " << (init_time - start_time) - << " symbolic: " << (symbolic_time - init_time) - << " solve: " << (solve_time - symbolic_time) - << " cleanup: " << (cleanup_time - solve_time); + event_logger.AddEvent("Teardown"); return summary; } #else
diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h index 40d9e0a..8d48096 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.h +++ b/internal/ceres/sparse_normal_cholesky_solver.h
@@ -35,8 +35,8 @@ #define CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_ #include "ceres/cxsparse.h" -#include "ceres/linear_solver.h" #include "ceres/internal/macros.h" +#include "ceres/linear_solver.h" #include "ceres/suitesparse.h" namespace ceres {
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc index 0dce19f..8dc4e1f 100644 --- a/internal/ceres/wall_time.cc +++ b/internal/ceres/wall_time.cc
@@ -28,12 +28,20 @@ // // Author: strandmark@google.com (Petter Strandmark) +#include "ceres/wall_time.h" + #ifdef CERES_USE_OPENMP #include <omp.h> #else #include <ctime> #endif +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/time.h> +#endif + namespace ceres { namespace internal { @@ -41,8 +49,47 @@ #ifdef CERES_USE_OPENMP return omp_get_wtime(); #else +#ifdef _WIN32 return static_cast<double>(std::time(NULL)); +#else + timeval time_val; + gettimeofday(&time_val, NULL); + return (time_val.tv_sec + time_val.tv_usec * 1e-6); #endif +#endif +} + +EventLogger::EventLogger(const string& logger_name) + : start_time_(WallTimeInSeconds()), + last_event_time_(start_time_), + events_("") { + StringAppendF(&events_, + "\n%s\n Delta Cumulative\n", + logger_name.c_str()); +} + +EventLogger::~EventLogger() { + if (VLOG_IS_ON(3)) { + AddEvent("Total"); + VLOG(2) << "\n" << events_ << "\n"; + } +} + +void EventLogger::AddEvent(const string& event_name) { + if (!VLOG_IS_ON(3)) { + return; + } + + const double current_time = WallTimeInSeconds(); + const double relative_time_delta = current_time - last_event_time_; + const double absolute_time_delta = current_time - start_time_; + last_event_time_ = current_time; + + StringAppendF(&events_, + " %25s : %5.3e %5.3e\n", + event_name.c_str(), + relative_time_delta, + absolute_time_delta); } } // namespace internal
diff --git a/internal/ceres/wall_time.h b/internal/ceres/wall_time.h index 1a6e3bb..1dbd6e9 100644 --- a/internal/ceres/wall_time.h +++ b/internal/ceres/wall_time.h
@@ -31,13 +31,57 @@ #ifndef CERES_INTERNAL_WALL_TIME_H_ #define CERES_INTERNAL_WALL_TIME_H_ +#include <map> + +#include "ceres/internal/port.h" +#include "ceres/stringprintf.h" +#include "glog/logging.h" + namespace ceres { namespace internal { -// Returns time, in seconds, from some arbitrary starting point. Has very -// high precision if OpenMP is available, otherwise only second granularity. +// Returns time, in seconds, from some arbitrary starting point. If +// OpenMP is available then the high precision openmp_get_wtime() +// function is used. Otherwise on unixes, gettimeofday is used. The +// granularity is in seconds on windows systems. double WallTimeInSeconds(); +// Log a series of events, recording for each event the time elapsed +// since the last event and since the creation of the object. +// +// The information is output to VLOG(3) upon destruction. A +// name::Total event is added as the final event right before +// destruction. +// +// Example usage: +// +// void Foo() { +// EventLogger event_logger("Foo"); +// Bar1(); +// event_logger.AddEvent("Bar1") +// Bar2(); +// event_logger.AddEvent("Bar2") +// Bar3(); +// } +// +// Will produce output that looks like +// +// Foo +// Bar1: time1 time1 +// Bar2: time2 time1 + time2; +// Total: time3 time1 + time2 + time3; +class EventLogger { + public: + EventLogger(const string& logger_name); + ~EventLogger(); + void AddEvent(const string& event_name); + + private: + const double start_time_; + double last_event_time_; + string events_; +}; + } // namespace internal } // namespace ceres