Add a rough implementation of LBFGS.
Change-Id: I2bc816adfe0c02773a23035ea31de3cddc1322a4
diff --git a/include/ceres/types.h b/include/ceres/types.h
index 60e076a..37beeda 100644
--- a/include/ceres/types.h
+++ b/include/ceres/types.h
@@ -167,6 +167,20 @@
// precise choice of the non-linear conjugate gradient algorithm
// used is determined by NonlineConjuateGradientType.
NONLINEAR_CONJUGATE_GRADIENT,
+
+ // A limited memory approximation to the inverse Hessian is
+ // maintained and used to compute a quasi-Newton step.
+ //
+ // For more details see
+ //
+ // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with Limited
+ // Storage". Mathematics of Computation 35 (151): 773–782.
+ //
+ // Byrd, R. H.; Nocedal, J.; Schnabel, R. B. (1994).
+ // "Representations of Quasi-Newton Matrices and their use in
+ // Limited Memory Methods". Mathematical Programming 63 (4):
+ // 129–156.
+ LBFGS,
};
// Nonliner conjugate gradient methods are a generalization of the
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 9f5767c..6b664e4 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -57,6 +57,7 @@
gradient_checking_cost_function.cc
implicit_schur_complement.cc
iterative_schur_complement_solver.cc
+ lbfgs.cc
levenberg_marquardt_strategy.cc
line_search.cc
line_search_minimizer.cc
diff --git a/internal/ceres/lbfgs.cc b/internal/ceres/lbfgs.cc
new file mode 100644
index 0000000..48067c8
--- /dev/null
+++ b/internal/ceres/lbfgs.cc
@@ -0,0 +1,106 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2012 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "glog/logging.h"
+#include "ceres/lbfgs.h"
+#include "ceres/internal/eigen.h"
+
+namespace ceres {
+namespace internal {
+
+LBFGS::LBFGS(int num_parameters, int max_num_corrections)
+ : num_parameters_(num_parameters),
+ max_num_corrections_(max_num_corrections),
+ num_corrections_(0),
+ diagonal_(1.0),
+ delta_x_history_(num_parameters, max_num_corrections),
+ delta_gradient_history_(num_parameters, max_num_corrections),
+ delta_x_dot_delta_gradient_(max_num_corrections) {
+}
+
+bool LBFGS::Update(const Vector& delta_x, const Vector& delta_gradient) {
+ const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient);
+ if (delta_x_dot_delta_gradient <= 1e-10) {
+ VLOG(2) << "Skipping LBFGS Update. " << delta_x_dot_delta_gradient;
+ return false;
+ }
+
+ if (num_corrections_ == max_num_corrections_) {
+ // TODO(sameeragarwal): This can be done more efficiently using
+ // a circular buffer/indexing scheme, but for simplicity we will
+ // do the expensive copy for now.
+ delta_x_history_.block(0, 0, num_parameters_, max_num_corrections_ - 2) =
+ delta_x_history_
+ .block(0, 1, num_parameters_, max_num_corrections_ - 1);
+
+ delta_gradient_history_
+ .block(0, 0, num_parameters_, max_num_corrections_ - 2) =
+ delta_gradient_history_
+ .block(0, 1, num_parameters_, max_num_corrections_ - 1);
+
+ delta_x_dot_delta_gradient_.head(num_corrections_ - 2) =
+ delta_x_dot_delta_gradient_.tail(num_corrections_ - 1);
+ } else {
+ ++num_corrections_;
+ }
+
+ delta_x_history_.col(num_corrections_ - 1) = delta_x;
+ delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient;
+ delta_x_dot_delta_gradient_(num_corrections_ - 1) =
+ delta_x_dot_delta_gradient;
+ diagonal_ = delta_x_dot_delta_gradient / delta_gradient.squaredNorm();
+ return true;
+}
+
+void LBFGS::RightMultiply(const double* x_ptr, double* y_ptr) const {
+ ConstVectorRef gradient(x_ptr, num_parameters_);
+ VectorRef search_direction(y_ptr, num_parameters_);
+
+ search_direction = gradient;
+
+ Vector alpha(num_corrections_);
+
+ for (int i = num_corrections_ - 1; i >= 0; --i) {
+ alpha(i) = delta_x_history_.col(i).dot(search_direction) /
+ delta_x_dot_delta_gradient_(i);
+ search_direction -= alpha(i) * delta_gradient_history_.col(i);
+ }
+
+ search_direction *= diagonal_;
+
+ for (int i = 0; i < num_corrections_; ++i) {
+ const double beta = delta_gradient_history_.col(i).dot(search_direction) /
+ delta_x_dot_delta_gradient_(i);
+ search_direction += delta_x_history_.col(i) * (alpha(i) - beta);
+ }
+}
+
+} // namespace internal
+} // namespace ceres
diff --git a/internal/ceres/lbfgs.h b/internal/ceres/lbfgs.h
new file mode 100644
index 0000000..f6281d8
--- /dev/null
+++ b/internal/ceres/lbfgs.h
@@ -0,0 +1,84 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2012 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// Limited memory positive definite approximation to the inverse
+// Hessian, using the work of
+//
+// Nocedal, J. (1980). "Updating Quasi-Newton Matrices with Limited
+// Storage". Mathematics of Computation 35 (151): 773–782.
+//
+// Byrd, R. H.; Nocedal, J.; Schnabel, R. B. (1994).
+// "Representations of Quasi-Newton Matrices and their use in
+// Limited Memory Methods". Mathematical Programming 63 (4):
+
+#include "ceres/internal/eigen.h"
+#include "ceres/linear_operator.h"
+
+namespace ceres {
+namespace internal {
+
+// Right multiplying with an LBFGS operator is equivalent to
+// multiplying by the inverse Hessian.
+class LBFGS : public LinearOperator {
+ public:
+ // num_parameters is the row/column size of the Hessian.
+ // max_num_corrections is the rank of the Hessian approximation.
+ // The approximation uses:
+ // 2 * max_num_corrections * num_parameters + max_num_corrections
+ // doubles.
+ LBFGS(int num_parameters, int max_num_corrections);
+ virtual ~LBFGS() {}
+
+ // Update the low rank approximation, i.e. store delta_x and
+ // delta_gradient, and get rid of the oldest delta_x and
+ // delta_gradient vectors if the number of corrections is already
+ // equal to max_num_corrections.
+ bool Update(const Vector& delta_x, const Vector& delta_gradient);
+
+ // LinearOperator interface
+ virtual void RightMultiply(const double* x, double* y) const;
+ virtual void LeftMultiply(const double* x, double* y) const {
+ RightMultiply(x,y);
+ }
+ virtual int num_rows() const { return num_parameters_; }
+ virtual int num_cols() const { return num_parameters_; }
+
+ private:
+ const int num_parameters_;
+ const int max_num_corrections_;
+ int num_corrections_;
+ double diagonal_;
+ Matrix delta_x_history_;
+ Matrix delta_gradient_history_;
+ Vector delta_x_dot_delta_gradient_;
+};
+
+} // namespace internal
+} // namespace ceres
diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc
index 0ab9a82..e7508ca 100644
--- a/internal/ceres/line_search.cc
+++ b/internal/ceres/line_search.cc
@@ -73,7 +73,7 @@
}
void LineSearchFunction::Init(const Vector& position,
- const Vector& direction) {
+ const Vector& direction) {
position_ = position;
direction_ = direction;
}
diff --git a/internal/ceres/line_search_minimizer.cc b/internal/ceres/line_search_minimizer.cc
index 5a36edb..db0e814 100644
--- a/internal/ceres/line_search_minimizer.cc
+++ b/internal/ceres/line_search_minimizer.cc
@@ -51,6 +51,7 @@
#include "Eigen/Dense"
#include "ceres/array_utils.h"
+#include "ceres/lbfgs.h"
#include "ceres/evaluator.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"
@@ -191,6 +192,11 @@
ArmijoLineSearch line_search;
LineSearch::Summary line_search_summary;
+ scoped_ptr<LBFGS> lbfgs;
+ if (options_.line_search_direction_type == ceres::LBFGS) {
+ lbfgs.reset(new LBFGS(num_effective_parameters, 20));
+ }
+
while (true) {
iteration_start_time = WallTimeInSeconds();
if (iteration_summary.iteration >= options_.max_num_iterations) {
@@ -218,6 +224,11 @@
search_direction = -gradient;
directional_derivative = -gradient_squared_norm;
} else {
+
+ if (lbfgs.get() != NULL) {
+ lbfgs->Update(delta, gradient_change);
+ }
+
// TODO(sameeragarwal): This should probably be refactored into
// a set of functions. But we will do that once things settle
// down in this solver.
@@ -266,6 +277,13 @@
}
break;
+ case ceres::LBFGS:
+ search_direction.setZero();
+ lbfgs->RightMultiply(gradient.data(), search_direction.data());
+ search_direction *= -1.0;
+ directional_derivative = gradient.dot(search_direction);
+ break;
+
default:
LOG(FATAL) << "Unknown line search direction type: "
<< options_.line_search_direction_type;