Faster LBFGS.

1. Use column major storage for the various matrices used by
LowRankInverseHessian. Since all the operations are on columns.

2. Use a circular buffer to keep track of history of the LBFGS updates
so that an update does not require copying the entire history. This
makes the updates O(1) rather than O(rank).

The implementation has been checked against the denoising code
where it gives numerically identical results. The overhead of the
LBFGS code is now near negligible as compared to the gradient evaluation.

On a sample problem

before 1050ms after: 630ms

Change-Id: I537ba506ac35fc4960b304c10d923a8dea2ae031
diff --git a/internal/ceres/low_rank_inverse_hessian.cc b/internal/ceres/low_rank_inverse_hessian.cc
index 9aeafec..4816e3c 100644
--- a/internal/ceres/low_rank_inverse_hessian.cc
+++ b/internal/ceres/low_rank_inverse_hessian.cc
@@ -28,6 +28,8 @@
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 
+#include <list>
+
 #include "ceres/internal/eigen.h"
 #include "ceres/low_rank_inverse_hessian.h"
 #include "glog/logging.h"
@@ -77,7 +79,6 @@
     : num_parameters_(num_parameters),
       max_num_corrections_(max_num_corrections),
       use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling),
-      num_corrections_(0),
       approximate_eigenvalue_scale_(1.0),
       delta_x_history_(num_parameters, max_num_corrections),
       delta_gradient_history_(num_parameters, max_num_corrections),
@@ -96,29 +97,20 @@
     return false;
   }
 
-  if (num_corrections_ == max_num_corrections_) {
-    // TODO(sameeragarwal): This can be done more efficiently using
-    // a circular buffer/indexing scheme, but for simplicity we will
-    // do the expensive copy for now.
-    delta_x_history_.block(0, 0, num_parameters_, max_num_corrections_ - 1) =
-        delta_x_history_
-        .block(0, 1, num_parameters_, max_num_corrections_ - 1);
 
-    delta_gradient_history_
-        .block(0, 0, num_parameters_, max_num_corrections_ - 1) =
-        delta_gradient_history_
-        .block(0, 1, num_parameters_, max_num_corrections_ - 1);
-
-    delta_x_dot_delta_gradient_.head(num_corrections_ - 1) =
-        delta_x_dot_delta_gradient_.tail(num_corrections_ - 1);
-  } else {
-    ++num_corrections_;
+  int next = indices_.size();
+  // Once the size of the list reaches max_num_corrections_, simulate
+  // a circular buffer by removing the first element of the list and
+  // making it the next position where the LBFGS history is stored.
+  if (next == max_num_corrections_) {
+    next = indices_.front();
+    indices_.pop_front();
   }
 
-  delta_x_history_.col(num_corrections_ - 1) = delta_x;
-  delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient;
-  delta_x_dot_delta_gradient_(num_corrections_ - 1) =
-      delta_x_dot_delta_gradient;
+  indices_.push_back(next);
+  delta_x_history_.col(next) = delta_x;
+  delta_gradient_history_.col(next) = delta_gradient;
+  delta_x_dot_delta_gradient_(next) = delta_x_dot_delta_gradient;
   approximate_eigenvalue_scale_ =
       delta_x_dot_delta_gradient / delta_gradient.squaredNorm();
   return true;
@@ -131,12 +123,16 @@
 
   search_direction = gradient;
 
-  Vector alpha(num_corrections_);
+  const int num_corrections = indices_.size();
+  Vector alpha(num_corrections);
 
-  for (int i = num_corrections_ - 1; i >= 0; --i) {
-    alpha(i) = delta_x_history_.col(i).dot(search_direction) /
-        delta_x_dot_delta_gradient_(i);
-    search_direction -= alpha(i) * delta_gradient_history_.col(i);
+  for (std::list<int>::const_reverse_iterator it = indices_.rbegin();
+       it != indices_.rend();
+       ++it) {
+    const double alpha_i = delta_x_history_.col(*it).dot(search_direction) /
+        delta_x_dot_delta_gradient_(*it);
+    search_direction -= alpha_i * delta_gradient_history_.col(*it);
+    alpha(*it) = alpha_i;
   }
 
   if (use_approximate_eigenvalue_scaling_) {
@@ -177,10 +173,12 @@
             << "approximation.";
   }
 
-  for (int i = 0; i < num_corrections_; ++i) {
-    const double beta = delta_gradient_history_.col(i).dot(search_direction) /
-        delta_x_dot_delta_gradient_(i);
-    search_direction += delta_x_history_.col(i) * (alpha(i) - beta);
+  for (std::list<int>::const_iterator it = indices_.begin();
+       it != indices_.end();
+       ++it) {
+    const double beta = delta_gradient_history_.col(*it).dot(search_direction) /
+        delta_x_dot_delta_gradient_(*it);
+    search_direction += delta_x_history_.col(*it) * (alpha(*it) - beta);
   }
 }