Add a rough implementation of LBFGS. Change-Id: I2bc816adfe0c02773a23035ea31de3cddc1322a4
diff --git a/include/ceres/types.h b/include/ceres/types.h index 60e076a..37beeda 100644 --- a/include/ceres/types.h +++ b/include/ceres/types.h
@@ -167,6 +167,20 @@ // precise choice of the non-linear conjugate gradient algorithm // used is determined by NonlineConjuateGradientType. NONLINEAR_CONJUGATE_GRADIENT, + + // A limited memory approximation to the inverse Hessian is + // maintained and used to compute a quasi-Newton step. + // + // For more details see + // + // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with Limited + // Storage". Mathematics of Computation 35 (151): 773–782. + // + // Byrd, R. H.; Nocedal, J.; Schnabel, R. B. (1994). + // "Representations of Quasi-Newton Matrices and their use in + // Limited Memory Methods". Mathematical Programming 63 (4): + // 129–156. + LBFGS, }; // Nonliner conjugate gradient methods are a generalization of the
diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 9f5767c..6b664e4 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt
@@ -57,6 +57,7 @@ gradient_checking_cost_function.cc implicit_schur_complement.cc iterative_schur_complement_solver.cc + lbfgs.cc levenberg_marquardt_strategy.cc line_search.cc line_search_minimizer.cc
diff --git a/internal/ceres/lbfgs.cc b/internal/ceres/lbfgs.cc new file mode 100644 index 0000000..48067c8 --- /dev/null +++ b/internal/ceres/lbfgs.cc
@@ -0,0 +1,106 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2012 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#include "glog/logging.h" +#include "ceres/lbfgs.h" +#include "ceres/internal/eigen.h" + +namespace ceres { +namespace internal { + +LBFGS::LBFGS(int num_parameters, int max_num_corrections) + : num_parameters_(num_parameters), + max_num_corrections_(max_num_corrections), + num_corrections_(0), + diagonal_(1.0), + delta_x_history_(num_parameters, max_num_corrections), + delta_gradient_history_(num_parameters, max_num_corrections), + delta_x_dot_delta_gradient_(max_num_corrections) { +} + +bool LBFGS::Update(const Vector& delta_x, const Vector& delta_gradient) { + const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient); + if (delta_x_dot_delta_gradient <= 1e-10) { + VLOG(2) << "Skipping LBFGS Update. " << delta_x_dot_delta_gradient; + return false; + } + + if (num_corrections_ == max_num_corrections_) { + // TODO(sameeragarwal): This can be done more efficiently using + // a circular buffer/indexing scheme, but for simplicity we will + // do the expensive copy for now. + delta_x_history_.block(0, 0, num_parameters_, max_num_corrections_ - 2) = + delta_x_history_ + .block(0, 1, num_parameters_, max_num_corrections_ - 1); + + delta_gradient_history_ + .block(0, 0, num_parameters_, max_num_corrections_ - 2) = + delta_gradient_history_ + .block(0, 1, num_parameters_, max_num_corrections_ - 1); + + delta_x_dot_delta_gradient_.head(num_corrections_ - 2) = + delta_x_dot_delta_gradient_.tail(num_corrections_ - 1); + } else { + ++num_corrections_; + } + + delta_x_history_.col(num_corrections_ - 1) = delta_x; + delta_gradient_history_.col(num_corrections_ - 1) = delta_gradient; + delta_x_dot_delta_gradient_(num_corrections_ - 1) = + delta_x_dot_delta_gradient; + diagonal_ = delta_x_dot_delta_gradient / delta_gradient.squaredNorm(); + return true; +} + +void LBFGS::RightMultiply(const double* x_ptr, double* y_ptr) const { + ConstVectorRef gradient(x_ptr, num_parameters_); + VectorRef search_direction(y_ptr, num_parameters_); + + search_direction = gradient; + + Vector alpha(num_corrections_); + + for (int i = num_corrections_ - 1; i >= 0; --i) { + alpha(i) = delta_x_history_.col(i).dot(search_direction) / + delta_x_dot_delta_gradient_(i); + search_direction -= alpha(i) * delta_gradient_history_.col(i); + } + + search_direction *= diagonal_; + + for (int i = 0; i < num_corrections_; ++i) { + const double beta = delta_gradient_history_.col(i).dot(search_direction) / + delta_x_dot_delta_gradient_(i); + search_direction += delta_x_history_.col(i) * (alpha(i) - beta); + } +} + +} // namespace internal +} // namespace ceres
diff --git a/internal/ceres/lbfgs.h b/internal/ceres/lbfgs.h new file mode 100644 index 0000000..f6281d8 --- /dev/null +++ b/internal/ceres/lbfgs.h
@@ -0,0 +1,84 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2012 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Limited memory positive definite approximation to the inverse +// Hessian, using the work of +// +// Nocedal, J. (1980). "Updating Quasi-Newton Matrices with Limited +// Storage". Mathematics of Computation 35 (151): 773–782. +// +// Byrd, R. H.; Nocedal, J.; Schnabel, R. B. (1994). +// "Representations of Quasi-Newton Matrices and their use in +// Limited Memory Methods". Mathematical Programming 63 (4): + +#include "ceres/internal/eigen.h" +#include "ceres/linear_operator.h" + +namespace ceres { +namespace internal { + +// Right multiplying with an LBFGS operator is equivalent to +// multiplying by the inverse Hessian. +class LBFGS : public LinearOperator { + public: + // num_parameters is the row/column size of the Hessian. + // max_num_corrections is the rank of the Hessian approximation. + // The approximation uses: + // 2 * max_num_corrections * num_parameters + max_num_corrections + // doubles. + LBFGS(int num_parameters, int max_num_corrections); + virtual ~LBFGS() {} + + // Update the low rank approximation, i.e. store delta_x and + // delta_gradient, and get rid of the oldest delta_x and + // delta_gradient vectors if the number of corrections is already + // equal to max_num_corrections. + bool Update(const Vector& delta_x, const Vector& delta_gradient); + + // LinearOperator interface + virtual void RightMultiply(const double* x, double* y) const; + virtual void LeftMultiply(const double* x, double* y) const { + RightMultiply(x,y); + } + virtual int num_rows() const { return num_parameters_; } + virtual int num_cols() const { return num_parameters_; } + + private: + const int num_parameters_; + const int max_num_corrections_; + int num_corrections_; + double diagonal_; + Matrix delta_x_history_; + Matrix delta_gradient_history_; + Vector delta_x_dot_delta_gradient_; +}; + +} // namespace internal +} // namespace ceres
diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc index 0ab9a82..e7508ca 100644 --- a/internal/ceres/line_search.cc +++ b/internal/ceres/line_search.cc
@@ -73,7 +73,7 @@ } void LineSearchFunction::Init(const Vector& position, - const Vector& direction) { + const Vector& direction) { position_ = position; direction_ = direction; }
diff --git a/internal/ceres/line_search_minimizer.cc b/internal/ceres/line_search_minimizer.cc index 5a36edb..db0e814 100644 --- a/internal/ceres/line_search_minimizer.cc +++ b/internal/ceres/line_search_minimizer.cc
@@ -51,6 +51,7 @@ #include "Eigen/Dense" #include "ceres/array_utils.h" +#include "ceres/lbfgs.h" #include "ceres/evaluator.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" @@ -191,6 +192,11 @@ ArmijoLineSearch line_search; LineSearch::Summary line_search_summary; + scoped_ptr<LBFGS> lbfgs; + if (options_.line_search_direction_type == ceres::LBFGS) { + lbfgs.reset(new LBFGS(num_effective_parameters, 20)); + } + while (true) { iteration_start_time = WallTimeInSeconds(); if (iteration_summary.iteration >= options_.max_num_iterations) { @@ -218,6 +224,11 @@ search_direction = -gradient; directional_derivative = -gradient_squared_norm; } else { + + if (lbfgs.get() != NULL) { + lbfgs->Update(delta, gradient_change); + } + // TODO(sameeragarwal): This should probably be refactored into // a set of functions. But we will do that once things settle // down in this solver. @@ -266,6 +277,13 @@ } break; + case ceres::LBFGS: + search_direction.setZero(); + lbfgs->RightMultiply(gradient.data(), search_direction.data()); + search_direction *= -1.0; + directional_derivative = gradient.dot(search_direction); + break; + default: LOG(FATAL) << "Unknown line search direction type: " << options_.line_search_direction_type;