Faster LBFGS. 1. Use column major storage for the various matrices used by LowRankInverseHessian. Since all the operations are on columns. 2. Use a circular buffer to keep track of history of the LBFGS updates so that an update does not require copying the entire history. This makes the updates O(1) rather than O(rank). The implementation has been checked against the denoising code where it gives numerically identical results. The overhead of the LBFGS code is now near negligible as compared to the gradient evaluation. On a sample problem before 1050ms after: 630ms Change-Id: I537ba506ac35fc4960b304c10d923a8dea2ae031

commit: 80a53eebfd28bfc032cedbf7852d5c56eb1d5af5 [log] [tgz]
author: Sameer Agarwal <sameeragarwal@google.com> Thu Jan 09 12:40:54 2014 -0800
committer: Sameer Agarwal <sameeragarwal@google.com> Thu Jan 09 13:29:35 2014 -0800
tree: 776f1653378b904236266857fd3612f95619c295
parent: f55780063620e7a3dcfe7e018d6488bf6a5b29da [diff] [blame]
diff --git a/internal/ceres/low_rank_inverse_hessian.cc b/internal/ceres/low_rank_inverse_hessian.cc
index 9aeafec..4816e3c 100644
--- a/internal/ceres/low_rank_inverse_hessian.cc
+++ b/internal/ceres/low_rank_inverse_hessian.cc

@@ -28,6 +28,8 @@
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 
+#include <list>
+
 #include "ceres/internal/eigen.h"
 #include "ceres/low_rank_inverse_hessian.h"
 #include "glog/logging.h"
@@ -77,7 +79,6 @@
     : num_parameters_(num_parameters),
       max_num_corrections_(max_num_corrections),
       use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling),
-      num_corrections_(0),
       approximate_eigenvalue_scale_(1.0),
       delta_x_history_(num_parameters, max_num_corrections),
       delta_gradient_history_(num_parameters, max_num_corrections),
@@ -96,29 +97,20 @@
     return false;
   }
 
-  if (num_corrections_ == max_num_corrections_) {
-    // TODO(sameeragarwal): This can be done more efficiently using
-    // a circular buffer/indexing scheme, but for simplicity we will
-    // do the expensive copy for now.
-    delta_x_history_.block(0, 0, num_parameters_, max_num_corrections_ - 1) =
-        delta_x_history_
-        .block(0, 1, num_parameters_, max_num_corrections_ - 1);
 
-    delta_gradient_history_
-        .block(0, 0, num_parameters_, max_num_corrections_ - 1) =
-        delta_gradient_history_
-        .block(0, 1, num_parameters_, max_num_corrections_ - 1);
-
-    delta_x_dot_delta_gradient_.head(num_corrections_ - 1) =
-        delta_x_dot_delta_gradient_.tail(num_corrections_ - 1);
-  } else {
-    ++num_corrections_;
+  int next = indices_.size();
+  // Once the size of the list reaches max_num_corrections_, simulate
+  // a circular buffer by removing the first element of the list and
+  // making it the next position where the LBFGS history is stored.
+  if (next == max_num_corrections_) {
+    next = indices_.front();
+    indices_.pop_front();
   }
 
-  delta_x_history_.col(num_corrections_ - 1) = delta_x;
-  delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient;
-  delta_x_dot_delta_gradient_(num_corrections_ - 1) =
-      delta_x_dot_delta_gradient;
+  indices_.push_back(next);
+  delta_x_history_.col(next) = delta_x;
+  delta_gradient_history_.col(next) = delta_gradient;
+  delta_x_dot_delta_gradient_(next) = delta_x_dot_delta_gradient;
   approximate_eigenvalue_scale_ =
       delta_x_dot_delta_gradient / delta_gradient.squaredNorm();
   return true;
@@ -131,12 +123,16 @@
 
   search_direction = gradient;
 
-  Vector alpha(num_corrections_);
+  const int num_corrections = indices_.size();
+  Vector alpha(num_corrections);
 
-  for (int i = num_corrections_ - 1; i >= 0; --i) {
-    alpha(i) = delta_x_history_.col(i).dot(search_direction) /
-        delta_x_dot_delta_gradient_(i);
-    search_direction -= alpha(i) * delta_gradient_history_.col(i);
+  for (std::list<int>::const_reverse_iterator it = indices_.rbegin();
+       it != indices_.rend();
+       ++it) {
+    const double alpha_i = delta_x_history_.col(*it).dot(search_direction) /
+        delta_x_dot_delta_gradient_(*it);
+    search_direction -= alpha_i * delta_gradient_history_.col(*it);
+    alpha(*it) = alpha_i;
   }
 
   if (use_approximate_eigenvalue_scaling_) {
@@ -177,10 +173,12 @@
             << "approximation.";
   }
 
-  for (int i = 0; i < num_corrections_; ++i) {
-    const double beta = delta_gradient_history_.col(i).dot(search_direction) /
-        delta_x_dot_delta_gradient_(i);
-    search_direction += delta_x_history_.col(i) * (alpha(i) - beta);
+  for (std::list<int>::const_iterator it = indices_.begin();
+       it != indices_.end();
+       ++it) {
+    const double beta = delta_gradient_history_.col(*it).dot(search_direction) /
+        delta_x_dot_delta_gradient_(*it);
+    search_direction += delta_x_history_.col(*it) * (alpha(*it) - beta);
   }
 }
commit	80a53eebfd28bfc032cedbf7852d5c56eb1d5af5	[log] [tgz]
author	Sameer Agarwal <sameeragarwal@google.com>	Thu Jan 09 12:40:54 2014 -0800
committer	Sameer Agarwal <sameeragarwal@google.com>	Thu Jan 09 13:29:35 2014 -0800
tree	776f1653378b904236266857fd3612f95619c295
parent	f55780063620e7a3dcfe7e018d6488bf6a5b29da [diff] [blame]