Expand reporting of timing information.

1. Add an ExecutionSummary object to record execution
   information about Ceres objects.
2. Add an EventLogger object to log events in a function call.
3. Add a ScopedExecutionTimer object to log times in ExecutionSummary.
4. Instrument ProgramEvaluator and all the linear solvers
   to report their timing statistics.
5. Connect the timing statistics to Summary::FullReport.
6. Add high precision timer on unix systems using
   gettimeofday() call.
7. Various minor clean ups all around.

Change-Id: I5e09804b730b09535484124be7dbc1c58eccd1d4
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 9ad7e50..cb858ba 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -664,6 +664,10 @@
     // Some total of all time spent inside Ceres when Solve is called.
     double total_time_in_seconds;
 
+    double linear_solver_time_in_seconds;
+    double residual_evaluation_time_in_seconds;
+    double jacobian_evaluation_time_in_seconds;
+
     // Preprocessor summary.
     int num_parameter_blocks;
     int num_parameters;
diff --git a/internal/ceres/cgnr_solver.cc b/internal/ceres/cgnr_solver.cc
index ccc8026..5ef521c 100644
--- a/internal/ceres/cgnr_solver.cc
+++ b/internal/ceres/cgnr_solver.cc
@@ -30,11 +30,12 @@
 
 #include "ceres/cgnr_solver.h"
 
-#include "glog/logging.h"
-#include "ceres/linear_solver.h"
+#include "ceres/block_jacobi_preconditioner.h"
 #include "ceres/cgnr_linear_operator.h"
 #include "ceres/conjugate_gradients_solver.h"
-#include "ceres/block_jacobi_preconditioner.h"
+#include "ceres/linear_solver.h"
+#include "ceres/wall_time.h"
+#include "glog/logging.h"
 
 namespace ceres {
 namespace internal {
@@ -49,6 +50,8 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  EventLogger event_logger("CgnrSolver::Solve");
+
   // Form z = Atb.
   scoped_array<double> z(new double[A->num_cols()]);
   std::fill(z.get(), z.get() + A->num_cols(), 0.0);
@@ -69,11 +72,14 @@
   // Solve (AtA + DtD)x = z (= Atb).
   std::fill(x, x + A->num_cols(), 0.0);
   CgnrLinearOperator lhs(*A, per_solve_options.D);
+  event_logger.AddEvent("Setup");
+
   ConjugateGradientsSolver conjugate_gradient_solver(options_);
-  return conjugate_gradient_solver.Solve(&lhs,
-                                         z.get(),
-                                         cg_per_solve_options,
-                                         x);
+  LinearSolver::Summary summary =
+      conjugate_gradient_solver.Solve(&lhs, z.get(), cg_per_solve_options, x);
+  event_logger.AddEvent("Solve");
+
+  return summary;
 }
 
 }  // namespace internal
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc
index f6bb99a..a340e16 100644
--- a/internal/ceres/dense_normal_cholesky_solver.cc
+++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -34,10 +34,11 @@
 
 #include "Eigen/Dense"
 #include "ceres/dense_sparse_matrix.h"
-#include "ceres/linear_solver.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/linear_solver.h"
 #include "ceres/types.h"
+#include "ceres/wall_time.h"
 
 namespace ceres {
 namespace internal {
@@ -51,6 +52,8 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  EventLogger event_logger("DenseNormalCholeskySolver::Solve");
+
   const int num_rows = A->num_rows();
   const int num_cols = A->num_cols();
 
@@ -58,6 +61,7 @@
   Matrix lhs(num_cols, num_cols);
   lhs.setZero();
 
+  event_logger.AddEvent("Setup");
   //   lhs += A'A
   //
   // Using rankUpdate instead of GEMM, exposes the fact that its the
@@ -73,12 +77,13 @@
     lhs += D.array().square().matrix().asDiagonal();
   }
 
-  VectorRef(x, num_cols) =
-      lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
-
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
   summary.termination_type = TOLERANCE;
+  VectorRef(x, num_cols) =
+      lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
+  event_logger.AddEvent("Solve");
+
   return summary;
 }
 
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 7aec450..21b6b15 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -34,10 +34,11 @@
 
 #include "Eigen/Dense"
 #include "ceres/dense_sparse_matrix.h"
-#include "ceres/linear_solver.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/linear_solver.h"
 #include "ceres/types.h"
+#include "ceres/wall_time.h"
 
 namespace ceres {
 namespace internal {
@@ -50,10 +51,10 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  EventLogger event_logger("DenseQRSolver::Solve");
+
   const int num_rows = A->num_rows();
   const int num_cols = A->num_cols();
-  VLOG(2) << "DenseQRSolver: "
-          << num_rows << " x " << num_cols << " system.";
 
   if (per_solve_options.D != NULL) {
     // Temporarily append a diagonal block to the A matrix, but undo
@@ -68,9 +69,11 @@
     rhs_.setZero();
   }
   rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
+  event_logger.AddEvent("Setup");
 
   // Solve the system.
   VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_);
+  event_logger.AddEvent("Solve");
 
   if (per_solve_options.D != NULL) {
     // Undo the modifications to the matrix A.
@@ -83,6 +86,8 @@
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
   summary.termination_type = TOLERANCE;
+
+  event_logger.AddEvent("TearDown");
   return summary;
 }
 
diff --git a/internal/ceres/evaluator.h b/internal/ceres/evaluator.h
index 600feda..76cf1e0 100644
--- a/internal/ceres/evaluator.h
+++ b/internal/ceres/evaluator.h
@@ -155,6 +155,17 @@
   // The number of residuals in the optimization problem.
   virtual int NumResiduals() const = 0;
 
+  // The following two methods return copies instead of references so
+  // that the base class implementation does not have to worry about
+  // life time issues. Further, these calls are not expected to be
+  // frequent or performance sensitive.
+  virtual map<string, int> CallStatistics() const {
+    return map<string, int>();
+  }
+
+  virtual map<string, double> TimeStatistics() const {
+    return map<string, double>();
+  }
 };
 
 }  // namespace internal
diff --git a/internal/ceres/execution_summary.h b/internal/ceres/execution_summary.h
index 02a4448..2eb3c1a 100644
--- a/internal/ceres/execution_summary.h
+++ b/internal/ceres/execution_summary.h
@@ -35,6 +35,8 @@
 #include <string>
 
 #include "ceres/internal/port.h"
+#include "ceres/wall_time.h"
+#include "ceres/mutex.h"
 
 namespace ceres {
 namespace internal {
@@ -42,8 +44,44 @@
 // Struct used by various objects to report statistics and other
 // information about their execution. e.g., ExecutionSummary::times
 // can be used for reporting times associated with various activities.
-struct ExecutionSummary {
-  map<string, double> times;
+class ExecutionSummary {
+ public:
+  void IncrementTimeBy(const string& name, const double value) {
+    CeresMutexLock l(&times_mutex_);
+    times_[name] += value;
+  }
+
+  void IncrementCall(const string& name) {
+    CeresMutexLock l(&calls_mutex_);
+    calls_[name] += 1;
+  }
+
+  const map<string, double>& times() const { return times_; };
+  const map<string, int>& calls() const { return calls_; };
+
+ private:
+  Mutex times_mutex_;
+  map<string, double> times_;
+
+  Mutex calls_mutex_;
+  map<string, int> calls_;
+};
+
+class ScopedExecutionTimer {
+ public:
+  ScopedExecutionTimer(const string& name, ExecutionSummary* summary)
+      : start_time_(WallTimeInSeconds()),
+        name_(name),
+        summary_(summary) {}
+
+  ~ScopedExecutionTimer() {
+    summary_->IncrementTimeBy(name_, WallTimeInSeconds() - start_time_);
+  }
+
+ private:
+  const double start_time_;
+  const string name_;
+  ExecutionSummary* summary_;
 };
 
 }  // namespace internal
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index 376a586..cb2d827 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -33,6 +33,7 @@
 #include <algorithm>
 #include <cstring>
 #include <vector>
+
 #include "Eigen/Dense"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
@@ -44,6 +45,7 @@
 #include "ceres/triplet_sparse_matrix.h"
 #include "ceres/types.h"
 #include "ceres/visibility_based_preconditioner.h"
+#include "ceres/wall_time.h"
 #include "glog/logging.h"
 
 namespace ceres {
@@ -62,6 +64,8 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  EventLogger event_logger("IterativeSchurComplementSolver::Solve");
+
   CHECK_NOTNULL(A->block_structure());
 
   // Initialize a ImplicitSchurComplement object.
@@ -121,6 +125,7 @@
     default:
       LOG(FATAL) << "Unknown Preconditioner Type";
   }
+  event_logger.AddEvent("Setup");
 
   LinearSolver::Summary cg_summary;
   cg_summary.num_iterations = 0;
@@ -138,6 +143,8 @@
   }
 
   VLOG(2) << "CG Iterations : " << cg_summary.num_iterations;
+
+  event_logger.AddEvent("Solve");
   return cg_summary;
 }
 
diff --git a/internal/ceres/iterative_schur_complement_solver.h b/internal/ceres/iterative_schur_complement_solver.h
index cfeb65e..862453c 100644
--- a/internal/ceres/iterative_schur_complement_solver.h
+++ b/internal/ceres/iterative_schur_complement_solver.h
@@ -86,6 +86,7 @@
   scoped_ptr<internal::ImplicitSchurComplement> schur_complement_;
   scoped_ptr<VisibilityBasedPreconditioner> visibility_based_preconditioner_;
   Vector reduced_linear_system_solution_;
+  CERES_DISALLOW_COPY_AND_ASSIGN(IterativeSchurComplementSolver);
 };
 
 }  // namespace internal
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 29959d3..219939e 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -43,6 +43,7 @@
 #include "ceres/compressed_row_sparse_matrix.h"
 #include "ceres/dense_sparse_matrix.h"
 #include "ceres/execution_summary.h"
+#include "ceres/execution_summary.h"
 #include "ceres/triplet_sparse_matrix.h"
 #include "ceres/types.h"
 
@@ -256,6 +257,18 @@
                         const PerSolveOptions& per_solve_options,
                         double* x) = 0;
 
+  // The following two methods return copies instead of references so
+  // that the base class implementation does not have to worry about
+  // life time issues. Further, these calls are not expected to be
+  // frequent or performance sensitive.
+  virtual map<string, int> CallStatistics() const {
+    return map<string, int>();
+  }
+
+  virtual map<string, double> TimeStatistics() const {
+    return map<string, double>();
+  }
+
   // Factory
   static LinearSolver* Create(const Options& options);
 };
@@ -276,18 +289,29 @@
       const double* b,
       const LinearSolver::PerSolveOptions& per_solve_options,
       double* x) {
+    ScopedExecutionTimer total_time("LinearSolver::Solve", &execution_summary_);
     CHECK_NOTNULL(A);
     CHECK_NOTNULL(b);
     CHECK_NOTNULL(x);
     return SolveImpl(down_cast<MatrixType*>(A), b, per_solve_options, x);
   }
 
+  virtual map<string, int> CallStatistics() const {
+    return execution_summary_.calls();
+  }
+
+  virtual map<string, double> TimeStatistics() const {
+    return execution_summary_.times();
+  }
+
  private:
   virtual LinearSolver::Summary SolveImpl(
       MatrixType* A,
       const double* b,
       const LinearSolver::PerSolveOptions& per_solve_options,
       double* x) = 0;
+
+  ExecutionSummary execution_summary_;
 };
 
 // Linear solvers that depend on acccess to the low level structure of
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h
index 1ecf4aa..e9640ab 100644
--- a/internal/ceres/program_evaluator.h
+++ b/internal/ceres/program_evaluator.h
@@ -88,6 +88,7 @@
 #include "ceres/residual_block.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/execution_summary.h"
 
 namespace ceres {
 namespace internal {
@@ -122,6 +123,12 @@
                 double* residuals,
                 double* gradient,
                 SparseMatrix* jacobian) {
+    ScopedExecutionTimer total_timer("Evaluator::Total", &execution_summary_);
+    ScopedExecutionTimer call_type_timer(gradient == NULL && jacobian == NULL
+                                         ? "Evaluator::Residual"
+                                         : "Evaluator::Jacobian",
+                                         &execution_summary_);
+
     // The parameters are stateful, so set the state before evaluating.
     if (!program_->StateVectorToParameterBlocks(state)) {
       return false;
@@ -266,6 +273,14 @@
     return program_->NumResiduals();
   }
 
+  virtual map<string, int> CallStatistics() const {
+    return execution_summary_.calls();
+  }
+
+  virtual map<string, double> TimeStatistics() const {
+    return execution_summary_.times();
+  }
+
  private:
   // Per-thread scratch space needed to evaluate and store each residual block.
   struct EvaluateScratch {
@@ -331,6 +346,7 @@
   scoped_array<EvaluatePreparer> evaluate_preparers_;
   scoped_array<EvaluateScratch> evaluate_scratch_;
   vector<int> residual_layout_;
+  ::ceres::internal::ExecutionSummary execution_summary_;
 };
 
 }  // namespace internal
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 9b7d4e5..1753759 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -44,15 +44,15 @@
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/detect_structure.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/port.h"
+#include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_solver.h"
 #include "ceres/schur_complement_solver.h"
 #include "ceres/suitesparse.h"
 #include "ceres/triplet_sparse_matrix.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/internal/port.h"
-#include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
-
+#include "ceres/wall_time.h"
 
 namespace ceres {
 namespace internal {
@@ -62,7 +62,8 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
-  const time_t start_time = time(NULL);
+  EventLogger event_logger("SchurComplementSolver::Solve");
+
   if (eliminator_.get() == NULL) {
     InitStorage(A->block_structure());
     DetectStructure(*A->block_structure(),
@@ -73,32 +74,27 @@
     eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_)));
     eliminator_->Init(options_.elimination_groups[0], A->block_structure());
   };
-  const time_t init_time = time(NULL);
   fill(x, x + A->num_cols(), 0.0);
+  event_logger.AddEvent("Setup");
 
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
   summary.termination_type = FAILURE;
   eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get());
-  const time_t eliminate_time = time(NULL);
+  event_logger.AddEvent("Eliminate");
 
   double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
   const bool status = SolveReducedLinearSystem(reduced_solution);
-  const time_t solve_time = time(NULL);
+  event_logger.AddEvent("ReducedSolve");
 
   if (!status) {
     return summary;
   }
 
   eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x);
-  const time_t backsubstitute_time = time(NULL);
   summary.termination_type = TOLERANCE;
 
-  VLOG(2) << "time (sec) total: " << (backsubstitute_time - start_time)
-          << " init: " << (init_time - start_time)
-          << " eliminate: " << (eliminate_time - init_time)
-          << " solve: " << (solve_time - eliminate_time)
-          << " backsubstitute: " << (backsubstitute_time - solve_time);
+  event_logger.AddEvent("BackSubstitute");
   return summary;
 }
 
@@ -267,8 +263,6 @@
 // CHOLMOD's sparse cholesky factorization routines.
 bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
     double* solution) {
-  const time_t start_time = time(NULL);
-
   TripletSparseMatrix* tsm =
       const_cast<TripletSparseMatrix*>(
           down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
@@ -285,11 +279,9 @@
   // The matrix is symmetric, and the upper triangular part of the
   // matrix contains the values.
   cholmod_lhs->stype = 1;
-  const time_t lhs_time = time(NULL);
 
   cholmod_dense*  cholmod_rhs =
       ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows);
-  const time_t rhs_time = time(NULL);
 
   // Symbolic factorization is computed if we don't already have one handy.
   if (factor_ == NULL) {
@@ -306,12 +298,9 @@
 
   CHECK_NOTNULL(factor_);
 
-  const time_t symbolic_time = time(NULL);
   cholmod_dense* cholmod_solution =
       ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
 
-  const time_t solve_time = time(NULL);
-
   ss_.Free(cholmod_lhs);
   cholmod_lhs = NULL;
   ss_.Free(cholmod_rhs);
@@ -325,13 +314,6 @@
   VectorRef(solution, num_rows)
       = VectorRef(static_cast<double*>(cholmod_solution->x), num_rows);
   ss_.Free(cholmod_solution);
-  const time_t final_time = time(NULL);
-  VLOG(2) << "time: " << (final_time - start_time)
-          << " lhs : " << (lhs_time - start_time)
-          << " rhs:  " << (rhs_time - lhs_time)
-          << " analyze: " <<  (symbolic_time - rhs_time)
-          << " factor_and_solve: " << (solve_time - symbolic_time)
-          << " cleanup: " << (final_time - solve_time);
   return true;
 }
 #else
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index af32629..2ae255a 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -94,6 +94,9 @@
       minimizer_time_in_seconds(-1.0),
       postprocessor_time_in_seconds(-1.0),
       total_time_in_seconds(-1.0),
+      linear_solver_time_in_seconds(-1.0),
+      residual_evaluation_time_in_seconds(-1.0),
+      jacobian_evaluation_time_in_seconds(-1.0),
       num_parameter_blocks(-1),
       num_parameters(-1),
       num_residual_blocks(-1),
@@ -264,11 +267,22 @@
   StringAppendF(&report, "Total                    % 20d\n",
                 num_successful_steps + num_unsuccessful_steps);
   StringAppendF(&report, "\nTime (in seconds):\n");
-  StringAppendF(&report, "Preprocessor        % 25e\n",
+  StringAppendF(&report, "Preprocessor        % 25.3e\n",
                 preprocessor_time_in_seconds);
-  StringAppendF(&report, "Minimizer           % 25e\n",
+
+  StringAppendF(&report, "\n  Residual Evaluations % 22.3e\n",
+                residual_evaluation_time_in_seconds);
+  StringAppendF(&report, "  Jacobian Evaluations % 22.3e\n",
+                jacobian_evaluation_time_in_seconds);
+  StringAppendF(&report, "  Linear Solver       % 23.3e\n",
+                linear_solver_time_in_seconds);
+  StringAppendF(&report, "Minimizer           % 25.3e\n\n",
                 minimizer_time_in_seconds);
-  StringAppendF(&report, "Total               % 25e\n",
+
+  StringAppendF(&report, "Postprocessor        % 24.3e\n",
+                postprocessor_time_in_seconds);
+
+  StringAppendF(&report, "Total               % 25.3e\n\n",
                 total_time_in_seconds);
 
   StringAppendF(&report, "Termination:        %25s\n",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index 50c96e5..3dfbb67 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -177,6 +177,8 @@
     FILE* fptr_;
 };
 
+// Iterate over each of the groups in order of their priority and fill
+// summary with their sizes.
 void SummarizeOrdering(ParameterBlockOrdering* ordering,
                        vector<int>* summary) {
   CHECK_NOTNULL(summary)->clear();
@@ -308,6 +310,7 @@
 void SolverImpl::TrustRegionSolve(const Solver::Options& original_options,
                                   ProblemImpl* original_problem_impl,
                                   Solver::Summary* summary) {
+  EventLogger event_logger("TrustRegionSolve");
   double solver_start_time = WallTimeInSeconds();
 
   Program* original_program = original_problem_impl->mutable_program();
@@ -369,6 +372,7 @@
         " currently broken. Ignoring Solver::Options::lsqp_iterations_to_dump";
   }
 
+  event_logger.AddEvent("Init");
   // Evaluate the initial cost, residual vector and the jacobian
   // matrix if requested by the user. The initial cost needs to be
   // computed on the original unpreprocessed problem, as it is used to
@@ -392,6 +396,8 @@
     return;
   }
 
+  event_logger.AddEvent("InitialEvaluate");
+
   original_program->SetParameterBlockStatePtrsToUserStatePtrs();
 
   // If the user requests gradient checking, construct a new
@@ -430,12 +436,15 @@
     }
   }
 
+  event_logger.AddEvent("ConstructOrdering");
+
   // Create the three objects needed to minimize: the transformed program, the
   // evaluator, and the linear solver.
   scoped_ptr<Program> reduced_program(CreateReducedProgram(&options,
                                                            problem_impl,
                                                            &summary->fixed_cost,
                                                            &summary->error));
+  event_logger.AddEvent("CreateReducedProgram");
   if (reduced_program == NULL) {
     return;
   }
@@ -484,6 +493,7 @@
     // Ensure the program state is set to the user parameters on the way out.
     original_program->SetParameterBlockStatePtrsToUserStatePtrs();
 
+    event_logger.AddEvent("FinalEvaluate");
     summary->postprocessor_time_in_seconds =
         WallTimeInSeconds() - post_process_start_time;
     return;
@@ -491,6 +501,7 @@
 
   scoped_ptr<LinearSolver>
       linear_solver(CreateLinearSolver(&options, &summary->error));
+  event_logger.AddEvent("CreateLinearSolver");
   if (linear_solver == NULL) {
     return;
   }
@@ -527,6 +538,9 @@
                                                   problem_impl->parameter_map(),
                                                   reduced_program.get(),
                                                   &summary->error));
+
+  event_logger.AddEvent("CreateEvaluator");
+
   if (evaluator == NULL) {
     return;
   }
@@ -549,6 +563,8 @@
     }
   }
 
+  event_logger.AddEvent("CreateIIM");
+
   // The optimizer works on contiguous parameter vectors; allocate some.
   Vector parameters(reduced_program->NumParameters());
 
@@ -561,6 +577,7 @@
   summary->preprocessor_time_in_seconds =
       minimizer_start_time - solver_start_time;
 
+  event_logger.AddEvent("FinalInit");
   // Run the optimization.
   TrustRegionMinimize(options,
                       reduced_program.get(),
@@ -569,6 +586,7 @@
                       linear_solver.get(),
                       parameters.data(),
                       summary);
+  event_logger.AddEvent("Minimize");
 
   // If the user aborted mid-optimization or the optimization
   // terminated because of a numerical failure, then return without
@@ -612,15 +630,31 @@
     summary->termination_type = NUMERICAL_FAILURE;
     summary->error = "Unable to evaluate the final cost.";
     LOG(ERROR) << summary->error;
+
+    event_logger.AddEvent("PostProcess");
     return;
   }
 
   // Ensure the program state is set to the user parameters on the way out.
   original_program->SetParameterBlockStatePtrsToUserStatePtrs();
 
+  const map<string, double>& linear_solver_time_statistics =
+      linear_solver->TimeStatistics();
+  summary->linear_solver_time_in_seconds =
+      FindWithDefault(linear_solver_time_statistics, "LinearSolver::Solve", 0.0);
+
+  const map<string, double>& evaluator_time_statistics =
+      evaluator->TimeStatistics();
+
+  summary->residual_evaluation_time_in_seconds =
+      FindWithDefault(evaluator_time_statistics, "Evaluator::Residual", 0.0);
+  summary->jacobian_evaluation_time_in_seconds =
+      FindWithDefault(evaluator_time_statistics, "Evaluator::Jacobian", 0.0);
+
   // Stick a fork in it, we're done.
   summary->postprocessor_time_in_seconds =
       WallTimeInSeconds() - post_process_start_time;
+  event_logger.AddEvent("PostProcess");
 }
 
 void SolverImpl::LineSearchSolve(const Solver::Options& original_options,
@@ -764,7 +798,7 @@
     // any further.
     summary->termination_type = FUNCTION_TOLERANCE;
 
-    double post_process_start_time = WallTimeInSeconds();
+    const double post_process_start_time = WallTimeInSeconds();
     // Evaluate the final cost, residual vector and the jacobian
     // matrix if requested by the user.
     if (!Evaluator::Evaluate(original_program,
@@ -809,7 +843,7 @@
 
   Vector original_parameters = parameters;
 
-  double minimizer_start_time = WallTimeInSeconds();
+  const double minimizer_start_time = WallTimeInSeconds();
   summary->preprocessor_time_in_seconds =
       minimizer_start_time - solver_start_time;
 
@@ -828,7 +862,7 @@
     return;
   }
 
-  double post_process_start_time = WallTimeInSeconds();
+  const double post_process_start_time = WallTimeInSeconds();
 
   // Push the contiguous optimized parameters back to the user's parameters.
   reduced_program->StateVectorToParameterBlocks(parameters.data());
@@ -1026,9 +1060,13 @@
                                           ProblemImpl* problem_impl,
                                           double* fixed_cost,
                                           string* error) {
+  EventLogger event_logger("CreateReducedProgram");
+
   CHECK_NOTNULL(options->linear_solver_ordering);
   Program* original_program = problem_impl->mutable_program();
   scoped_ptr<Program> transformed_program(new Program(*original_program));
+  event_logger.AddEvent("TransformedProgram");
+
   ParameterBlockOrdering* linear_solver_ordering =
       options->linear_solver_ordering;
 
@@ -1043,6 +1081,8 @@
     return NULL;
   }
 
+  event_logger.AddEvent("RemovedFixedBlocks");
+
   if (transformed_program->NumParameterBlocks() == 0) {
     if (transformed_program->NumResidualBlocks() > 0) {
       *error = "Zero parameter blocks but non-zero residual blocks"
@@ -1076,6 +1116,7 @@
           (i < num_eliminate_blocks) ? 0 : 1);
     }
   }
+  event_logger.AddEvent("SchurOrdering");
 
   if (!ApplyUserOrdering(problem_impl->parameter_map(),
                          linear_solver_ordering,
@@ -1083,6 +1124,7 @@
                          error)) {
     return NULL;
   }
+  event_logger.AddEvent("ApplyOrdering");
 
   // If the user requested the use of a Schur type solver, and
   // supplied a non-NULL linear_solver_ordering object with more than
@@ -1123,9 +1165,13 @@
     LOG(WARNING) << msg;
   }
 
+  event_logger.AddEvent("AlternateSolver");
+
   // Since the transformed program is the "active" program, and it is mutated,
   // update the parameter offsets and indices.
   transformed_program->SetParameterOffsetsAndIndex();
+
+  event_logger.AddEvent("SetOffsets");
   return transformed_program.release();
 }
 
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc
index 9e00b44..6fae204 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -39,12 +39,13 @@
 #endif
 
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_solver.h"
 #include "ceres/suitesparse.h"
 #include "ceres/triplet_sparse_matrix.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
+#include "ceres/wall_time.h"
 
 namespace ceres {
 namespace internal {
@@ -103,6 +104,8 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double * x) {
+  EventLogger event_logger("SparseNormalCholeskySolver::CXSparse::Solve");
+
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
   const int num_cols = A->num_cols();
@@ -136,18 +139,27 @@
     A->DeleteRows(num_cols);
   }
 
+  event_logger.AddEvent("Setup");
+
   // Compute symbolic factorization if not available.
   if (cxsparse_factor_ == NULL) {
     cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(AtA));
   }
 
+  event_logger.AddEvent("Analysis");
+
+
   // Solve the linear system.
   if (cxsparse_.SolveCholesky(AtA, cxsparse_factor_, Atb.data())) {
     VectorRef(x, Atb.rows()) = Atb;
     summary.termination_type = TOLERANCE;
   }
 
+  event_logger.AddEvent("Solve");
+
   cxsparse_.Free(AtA);
+
+  event_logger.AddEvent("Teardown");
   return summary;
 }
 #else
@@ -169,9 +181,9 @@
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double * x) {
-  const time_t start_time = time(NULL);
-  const int num_cols = A->num_cols();
+  EventLogger event_logger("SparseNormalCholeskySolver::SuiteSparse::Solve");
 
+  const int num_cols = A->num_cols();
   LinearSolver::Summary summary;
   Vector Atb = Vector::Zero(num_cols);
   A->LeftMultiply(b, Atb.data());
@@ -189,7 +201,7 @@
   CHECK_NOTNULL(lhs.get());
 
   cholmod_dense* rhs = ss_.CreateDenseVector(Atb.data(), num_cols, num_cols);
-  const time_t init_time = time(NULL);
+  event_logger.AddEvent("Setup");
 
   if (factor_ == NULL) {
     if (options_.use_block_amd) {
@@ -206,11 +218,10 @@
   }
 
   CHECK_NOTNULL(factor_);
-
-  const time_t symbolic_time = time(NULL);
+  event_logger.AddEvent("Analysis");
 
   cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
-  const time_t solve_time = time(NULL);
+  event_logger.AddEvent("Solve");
 
   ss_.Free(rhs);
   rhs = NULL;
@@ -228,12 +239,7 @@
     summary.termination_type = TOLERANCE;
   }
 
-  const time_t cleanup_time = time(NULL);
-  VLOG(2) << "time (sec) total: " << (cleanup_time - start_time)
-          << " init: " << (init_time - start_time)
-          << " symbolic: " << (symbolic_time - init_time)
-          << " solve: " << (solve_time - symbolic_time)
-          << " cleanup: " << (cleanup_time - solve_time);
+  event_logger.AddEvent("Teardown");
   return summary;
 }
 #else
diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h
index 40d9e0a..8d48096 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.h
+++ b/internal/ceres/sparse_normal_cholesky_solver.h
@@ -35,8 +35,8 @@
 #define CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_
 
 #include "ceres/cxsparse.h"
-#include "ceres/linear_solver.h"
 #include "ceres/internal/macros.h"
+#include "ceres/linear_solver.h"
 #include "ceres/suitesparse.h"
 
 namespace ceres {
diff --git a/internal/ceres/wall_time.cc b/internal/ceres/wall_time.cc
index 0dce19f..8dc4e1f 100644
--- a/internal/ceres/wall_time.cc
+++ b/internal/ceres/wall_time.cc
@@ -28,12 +28,20 @@
 //
 // Author: strandmark@google.com (Petter Strandmark)
 
+#include "ceres/wall_time.h"
+
 #ifdef CERES_USE_OPENMP
 #include <omp.h>
 #else
 #include <ctime>
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
 namespace ceres {
 namespace internal {
 
@@ -41,8 +49,47 @@
 #ifdef CERES_USE_OPENMP
   return omp_get_wtime();
 #else
+#ifdef _WIN32
   return static_cast<double>(std::time(NULL));
+#else
+  timeval time_val;
+  gettimeofday(&time_val, NULL);
+  return (time_val.tv_sec + time_val.tv_usec * 1e-6);
 #endif
+#endif
+}
+
+EventLogger::EventLogger(const string& logger_name)
+    : start_time_(WallTimeInSeconds()),
+      last_event_time_(start_time_),
+      events_("") {
+  StringAppendF(&events_,
+                "\n%s\n                                  Delta  Cumulative\n",
+                logger_name.c_str());
+}
+
+EventLogger::~EventLogger() {
+  if (VLOG_IS_ON(3)) {
+    AddEvent("Total");
+    VLOG(2) << "\n" << events_ << "\n";
+  }
+}
+
+void EventLogger::AddEvent(const string& event_name) {
+  if (!VLOG_IS_ON(3)) {
+    return;
+  }
+
+  const double current_time = WallTimeInSeconds();
+  const double relative_time_delta = current_time - last_event_time_;
+  const double absolute_time_delta = current_time - start_time_;
+  last_event_time_ = current_time;
+
+  StringAppendF(&events_,
+                "  %25s : %5.3e   %5.3e\n",
+                event_name.c_str(),
+                relative_time_delta,
+                absolute_time_delta);
 }
 
 }  // namespace internal
diff --git a/internal/ceres/wall_time.h b/internal/ceres/wall_time.h
index 1a6e3bb..1dbd6e9 100644
--- a/internal/ceres/wall_time.h
+++ b/internal/ceres/wall_time.h
@@ -31,13 +31,57 @@
 #ifndef CERES_INTERNAL_WALL_TIME_H_
 #define CERES_INTERNAL_WALL_TIME_H_
 
+#include <map>
+
+#include "ceres/internal/port.h"
+#include "ceres/stringprintf.h"
+#include "glog/logging.h"
+
 namespace ceres {
 namespace internal {
 
-// Returns time, in seconds, from some arbitrary starting point. Has very
-// high precision if OpenMP is available, otherwise only second granularity.
+// Returns time, in seconds, from some arbitrary starting point. If
+// OpenMP is available then the high precision openmp_get_wtime()
+// function is used. Otherwise on unixes, gettimeofday is used. The
+// granularity is in seconds on windows systems.
 double WallTimeInSeconds();
 
+// Log a series of events, recording for each event the time elapsed
+// since the last event and since the creation of the object.
+//
+// The information is output to VLOG(3) upon destruction. A
+// name::Total event is added as the final event right before
+// destruction.
+//
+// Example usage:
+//
+//  void Foo() {
+//    EventLogger event_logger("Foo");
+//    Bar1();
+//    event_logger.AddEvent("Bar1")
+//    Bar2();
+//    event_logger.AddEvent("Bar2")
+//    Bar3();
+//  }
+//
+// Will produce output that looks like
+//
+//  Foo
+//      Bar1:  time1  time1
+//      Bar2:  time2  time1 + time2;
+//     Total:  time3  time1 + time2 + time3;
+class EventLogger {
+ public:
+  EventLogger(const string& logger_name);
+  ~EventLogger();
+  void AddEvent(const string& event_name);
+
+ private:
+  const double start_time_;
+  double last_event_time_;
+  string events_;
+};
+
 }  // namespace internal
 }  // namespace ceres