Improve the performance of DenseQRSolver

1. Reduce amount of reallocations.

Change-Id: I91b17c781ae94ed12014d647f0162cfce4f6ed7b
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc
index 370b004..72edfe2 100644
--- a/internal/ceres/coordinate_descent_minimizer.cc
+++ b/internal/ceres/coordinate_descent_minimizer.cc
@@ -106,11 +106,6 @@
     }
   }
 
-  LinearSolver::Options linear_solver_options;
-  linear_solver_options.type = DENSE_QR;
-  linear_solver_.reset(LinearSolver::Create(linear_solver_options));
-  CHECK_NOTNULL(linear_solver_.get());
-
   evaluator_options_.linear_solver_type = DENSE_QR;
   evaluator_options_.num_eliminate_blocks = 0;
   evaluator_options_.num_threads = 1;
@@ -129,6 +124,15 @@
     parameter_block->SetConstant();
   }
 
+  scoped_array<LinearSolver*> linear_solvers(new LinearSolver*[options.num_threads]);
+
+  LinearSolver::Options linear_solver_options;
+  linear_solver_options.type = DENSE_QR;
+
+  for (int i = 0; i < options.num_threads; ++i) {
+    linear_solvers[i] = LinearSolver::Create(linear_solver_options);
+  }
+
   for (int i = 0; i < independent_set_offsets_.size() - 1; ++i) {
     // No point paying the price for an OpemMP call if the set if of
     // size zero.
@@ -142,6 +146,11 @@
     for (int j = independent_set_offsets_[i];
          j < independent_set_offsets_[i + 1];
          ++j) {
+#ifdef CERES_USE_OPENMP
+      int thread_id = omp_get_thread_num();
+#else
+      int thread_id = 0;
+#endif
 
       ParameterBlock* parameter_block = parameter_blocks_[j];
       const int old_index = parameter_block->index();
@@ -164,6 +173,7 @@
       // we are fine.
       Solver::Summary inner_summary;
       Solve(&inner_program,
+            linear_solvers[thread_id],
             parameters + parameter_block->state_offset(),
             &inner_summary);
 
@@ -177,10 +187,15 @@
   for (int i =  0; i < parameter_blocks_.size(); ++i) {
     parameter_blocks_[i]->SetVarying();
   }
+
+  for (int i = 0; i < options.num_threads; ++i) {
+    delete linear_solvers[i];
+  }
 }
 
 // Solve the optimization problem for one parameter block.
 void CoordinateDescentMinimizer::Solve(Program* program,
+                                       LinearSolver* linear_solver,
                                        double* parameter,
                                        Solver::Summary* summary) {
   *summary = Solver::Summary();
@@ -196,11 +211,11 @@
   scoped_ptr<SparseMatrix> jacobian(evaluator->CreateJacobian());
   CHECK_NOTNULL(jacobian.get());
 
-  TrustRegionStrategy::Options trust_region_strategy_options;
-  trust_region_strategy_options.linear_solver = linear_solver_.get();
+  TrustRegionStrategy::Options trs_options;
+  trs_options.linear_solver = linear_solver;
+
   scoped_ptr<TrustRegionStrategy>trust_region_strategy(
-      TrustRegionStrategy::Create(trust_region_strategy_options));
-  CHECK_NOTNULL(trust_region_strategy.get());
+      CHECK_NOTNULL(TrustRegionStrategy::Create(trs_options)));
 
   Minimizer::Options minimizer_options;
   minimizer_options.evaluator = evaluator.get();
diff --git a/internal/ceres/coordinate_descent_minimizer.h b/internal/ceres/coordinate_descent_minimizer.h
index d7ff4db..fb96b3c 100644
--- a/internal/ceres/coordinate_descent_minimizer.h
+++ b/internal/ceres/coordinate_descent_minimizer.h
@@ -65,6 +65,7 @@
 
  private:
   void Solve(Program* program,
+             LinearSolver* linear_solver,
              double* parameters,
              Solver::Summary* summary);
 
@@ -77,7 +78,6 @@
   vector<int> independent_set_offsets_;
 
   Evaluator::Options evaluator_options_;
-  scoped_ptr<LinearSolver> linear_solver_;
 };
 
 }  // namespace internal
diff --git a/internal/ceres/dense_jacobian_writer.h b/internal/ceres/dense_jacobian_writer.h
index 1177b83..be743a8 100644
--- a/internal/ceres/dense_jacobian_writer.h
+++ b/internal/ceres/dense_jacobian_writer.h
@@ -62,7 +62,8 @@
 
   SparseMatrix* CreateJacobian() const {
     return new DenseSparseMatrix(program_->NumResiduals(),
-                                 program_->NumEffectiveParameters());
+                                 program_->NumEffectiveParameters(),
+                                 true);
   }
 
   void Write(int residual_id,
@@ -87,10 +88,10 @@
         continue;
       }
 
-      int parameter_block_size = parameter_block->LocalSize();
-      MatrixRef parameter_jacobian(jacobians[j],
-                                   num_residuals,
-                                   parameter_block_size);
+      const int parameter_block_size = parameter_block->LocalSize();
+      ConstMatrixRef parameter_jacobian(jacobians[j],
+                                        num_residuals,
+                                        parameter_block_size);
 
       dense_jacobian->mutable_matrix().block(
           residual_offset,
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 2b329ee..e2463c8 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -62,17 +62,20 @@
   }
 
   // rhs = [b;0] to account for the additional rows in the lhs.
-  Vector rhs(num_rows + ((per_solve_options.D != NULL) ? num_cols : 0));
-  rhs.setZero();
-  rhs.head(num_rows) = ConstVectorRef(b, num_rows);
+  const int augmented_num_rows = num_rows + ((per_solve_options.D != NULL) ? num_cols : 0);
+  if (rhs_.rows() != augmented_num_rows) {
+    rhs_.resize(augmented_num_rows);
+    rhs_.setZero();
+  }
+  rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
 
   // Solve the system.
-  VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs);
+  VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_);
 
   VLOG(3) << "A:\n" << A->matrix();
   VLOG(3) << "x:\n" << VectorRef(x, num_cols);
-  VLOG(3) << "b:\n" << rhs;
-  VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs).norm();
+  VLOG(3) << "b:\n" << rhs_;
+  VLOG(3) << "error: " << (A->matrix() * VectorRef(x, num_cols) - rhs_).norm();
 
 
   if (per_solve_options.D != NULL) {
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h
index dd683a8..f78fa72 100644
--- a/internal/ceres/dense_qr_solver.h
+++ b/internal/ceres/dense_qr_solver.h
@@ -33,6 +33,7 @@
 #define CERES_INTERNAL_DENSE_QR_SOLVER_H_
 
 #include "ceres/linear_solver.h"
+#include "ceres/internal/eigen.h"
 #include "ceres/internal/macros.h"
 
 namespace ceres {
@@ -90,6 +91,7 @@
       double* x);
 
   const LinearSolver::Options options_;
+  Vector rhs_;
   CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver);
 };
 
diff --git a/internal/ceres/dense_sparse_matrix.cc b/internal/ceres/dense_sparse_matrix.cc
index 86730cc..7fb7b83 100644
--- a/internal/ceres/dense_sparse_matrix.cc
+++ b/internal/ceres/dense_sparse_matrix.cc
@@ -47,6 +47,18 @@
   m_.setZero();
 }
 
+DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal)
+    : has_diagonal_appended_(false),
+      has_diagonal_reserved_(reserve_diagonal) {
+  // Allocate enough space for the diagonal.
+  if (reserve_diagonal) {
+    m_.resize(num_rows +  num_cols, num_cols);
+  } else {
+    m_.resize(num_rows, num_cols);
+  }
+  m_.setZero();
+}
+
 DenseSparseMatrix::DenseSparseMatrix(const TripletSparseMatrix& m)
     : m_(Eigen::MatrixXd::Zero(m.num_rows(), m.num_cols())),
       has_diagonal_appended_(false),
diff --git a/internal/ceres/dense_sparse_matrix.h b/internal/ceres/dense_sparse_matrix.h
index e7ad14d..1e4d499 100644
--- a/internal/ceres/dense_sparse_matrix.h
+++ b/internal/ceres/dense_sparse_matrix.h
@@ -57,6 +57,7 @@
 #endif
 
   DenseSparseMatrix(int num_rows, int num_cols);
+  DenseSparseMatrix(int num_rows, int num_cols, bool reserve_diagonal);
 
   virtual ~DenseSparseMatrix() {}