Blame - internal/ceres/line_search_direction.cc - ceres-solver

blob: 8ded823e5bdde9982db742524978fe58a1adcbfb [file] [log] [blame]

Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30
Sameer Agarwal	8140f0f	2013-03-12 09:45:08 -0700	[diff] [blame]	31	#ifndef CERES_NO_LINE_SEARCH_MINIMIZER
				32
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	33	#include "ceres/line_search_direction.h"
				34	#include "ceres/line_search_minimizer.h"
				35	#include "ceres/low_rank_inverse_hessian.h"
				36	#include "ceres/internal/eigen.h"
				37	#include "glog/logging.h"
				38
				39	namespace ceres {
				40	namespace internal {
				41
				42	class SteepestDescent : public LineSearchDirection {
				43	public:
				44	virtual ~SteepestDescent() {}
				45	bool NextDirection(const LineSearchMinimizer::State& previous,
				46	const LineSearchMinimizer::State& current,
				47	Vector* search_direction) {
				48	*search_direction = -current.gradient;
				49	return true;
				50	}
				51	};
				52
				53	class NonlinearConjugateGradient : public LineSearchDirection {
				54	public:
				55	NonlinearConjugateGradient(const NonlinearConjugateGradientType type,
				56	const double function_tolerance)
				57	: type_(type),
				58	function_tolerance_(function_tolerance) {
				59	}
				60
				61	bool NextDirection(const LineSearchMinimizer::State& previous,
				62	const LineSearchMinimizer::State& current,
				63	Vector* search_direction) {
				64	double beta = 0.0;
				65	Vector gradient_change;
				66	switch (type_) {
				67	case FLETCHER_REEVES:
				68	beta = current.gradient_squared_norm / previous.gradient_squared_norm;
				69	break;
				70	case POLAK_RIBIRERE:
				71	gradient_change = current.gradient - previous.gradient;
				72	beta = (current.gradient.dot(gradient_change) /
				73	previous.gradient_squared_norm);
				74	break;
				75	case HESTENES_STIEFEL:
				76	gradient_change = current.gradient - previous.gradient;
				77	beta = (current.gradient.dot(gradient_change) /
				78	previous.search_direction.dot(gradient_change));
				79	break;
				80	default:
				81	LOG(FATAL) << "Unknown nonlinear conjugate gradient type: " << type_;
				82	}
				83
				84	search_direction = -current.gradient + beta previous.search_direction;
Sameer Agarwal	509f68c	2013-02-20 01:39:03 -0800	[diff] [blame]	85	const double directional_derivative =
Sameer Agarwal	931c309	2013-02-25 09:46:21 -0800	[diff] [blame]	86	current.gradient.dot(*search_direction);
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	87	if (directional_derivative > -function_tolerance_) {
				88	LOG(WARNING) << "Restarting non-linear conjugate gradients: "
				89	<< directional_derivative;
				90	*search_direction = -current.gradient;
				91	};
				92
				93	return true;
				94	}
				95
				96	private:
				97	const NonlinearConjugateGradientType type_;
				98	const double function_tolerance_;
				99	};
				100
				101	class LBFGS : public LineSearchDirection {
				102	public:
Alex Stewart	9aa0e3c	2013-07-05 20:22:37 +0100	[diff] [blame]	103	LBFGS(const int num_parameters,
				104	const int max_lbfgs_rank,
				105	const bool use_approximate_eigenvalue_bfgs_scaling)
				106	: low_rank_inverse_hessian_(num_parameters,
				107	max_lbfgs_rank,
				108	use_approximate_eigenvalue_bfgs_scaling),
				109	is_positive_definite_(true) {}
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	110
				111	virtual ~LBFGS() {}
				112
				113	bool NextDirection(const LineSearchMinimizer::State& previous,
				114	const LineSearchMinimizer::State& current,
				115	Vector* search_direction) {
Alex Stewart	9aa0e3c	2013-07-05 20:22:37 +0100	[diff] [blame]	116	CHECK(is_positive_definite_)
				117	<< "Ceres bug: NextDirection() called on L-BFGS after inverse Hessian "
				118	<< "approximation has become indefinite, please contact the "
				119	<< "developers!";
				120
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	121	low_rank_inverse_hessian_.Update(
				122	previous.search_direction * previous.step_size,
				123	current.gradient - previous.gradient);
				124	search_direction->setZero();
				125	low_rank_inverse_hessian_.RightMultiply(current.gradient.data(),
				126	search_direction->data());
				127	search_direction = -1.0;
Alex Stewart	9aa0e3c	2013-07-05 20:22:37 +0100	[diff] [blame]	128
				129	if (search_direction->dot(current.gradient) >= 0.0) {
				130	LOG(WARNING) << "Numerical failure in L-BFGS update: inverse Hessian "
				131	<< "approximation is not positive definite, and thus "
				132	<< "initial gradient for search direction is positive: "
				133	<< search_direction->dot(current.gradient);
				134	is_positive_definite_ = false;
				135	return false;
				136	}
				137
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	138	return true;
				139	}
				140
				141	private:
				142	LowRankInverseHessian low_rank_inverse_hessian_;
Alex Stewart	9aa0e3c	2013-07-05 20:22:37 +0100	[diff] [blame]	143	bool is_positive_definite_;
				144	};
				145
				146	class BFGS : public LineSearchDirection {
				147	public:
				148	BFGS(const int num_parameters,
				149	const bool use_approximate_eigenvalue_scaling)
				150	: num_parameters_(num_parameters),
				151	use_approximate_eigenvalue_scaling_(use_approximate_eigenvalue_scaling),
				152	initialized_(false),
				153	is_positive_definite_(true) {
				154	LOG_IF(WARNING, num_parameters_ >= 1e3)
				155	<< "BFGS line search being created with: " << num_parameters_
				156	<< " parameters, this will allocate a dense approximate inverse Hessian"
				157	<< " of size: " << num_parameters_ << " x " << num_parameters_
				158	<< ", consider using the L-BFGS memory-efficient line search direction "
				159	<< "instead.";
				160	// Construct inverse_hessian_ after logging warning about size s.t. if the
				161	// allocation crashes us, the log will highlight what the issue likely was.
				162	inverse_hessian_ = Matrix::Identity(num_parameters, num_parameters);
				163	}
				164
				165	virtual ~BFGS() {}
				166
				167	bool NextDirection(const LineSearchMinimizer::State& previous,
				168	const LineSearchMinimizer::State& current,
				169	Vector* search_direction) {
				170	CHECK(is_positive_definite_)
				171	<< "Ceres bug: NextDirection() called on BFGS after inverse Hessian "
				172	<< "approximation has become indefinite, please contact the "
				173	<< "developers!";
				174
				175	const Vector delta_x = previous.search_direction * previous.step_size;
				176	const Vector delta_gradient = current.gradient - previous.gradient;
				177	const double delta_x_dot_delta_gradient = delta_x.dot(delta_gradient);
				178
				179	if (delta_x_dot_delta_gradient <= 1e-10) {
				180	VLOG(2) << "Skipping BFGS Update, delta_x_dot_delta_gradient too "
				181	<< "small: " << delta_x_dot_delta_gradient;
				182	} else {
				183	// Update dense inverse Hessian approximation.
				184
				185	if (!initialized_ && use_approximate_eigenvalue_scaling_) {
				186	// Rescale the initial inverse Hessian approximation (H_0) to be
				187	// iteratively updated so that it is of similar 'size' to the true
				188	// inverse Hessian at the start point. As shown in [1]:
				189	//
				190	// \gamma = (delta_gradient_{0}' * delta_x_{0}) /
				191	// (delta_gradient_{0}' * delta_gradient_{0})
				192	//
				193	// Satisfies:
				194	//
				195	// (1 / \lambda_m) <= \gamma <= (1 / \lambda_1)
				196	//
				197	// Where \lambda_1 & \lambda_m are the smallest and largest eigenvalues
				198	// of the true initial Hessian (not the inverse) respectively. Thus,
				199	// \gamma is an approximate eigenvalue of the true inverse Hessian, and
				200	// choosing: H_0 = I * \gamma will yield a starting point that has a
				201	// similar scale to the true inverse Hessian. This technique is widely
				202	// reported to often improve convergence, however this is not
				203	// universally true, particularly if there are errors in the initial
				204	// gradients, or if there are significant differences in the sensitivity
				205	// of the problem to the parameters (i.e. the range of the magnitudes of
				206	// the components of the gradient is large).
				207	//
				208	// The original origin of this rescaling trick is somewhat unclear, the
				209	// earliest reference appears to be Oren [1], however it is widely
				210	// discussed without specific attributation in various texts including
				211	// [2] (p143).
				212	//
				213	// [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms
				214	// Part II: Implementation and experiments, Management Science,
				215	// 20(5), 863-874, 1974.
				216	// [2] Nocedal J., Wright S., Numerical Optimization, Springer, 1999.
				217	inverse_hessian_ *=
				218	delta_x_dot_delta_gradient / delta_gradient.dot(delta_gradient);
				219	}
				220	initialized_ = true;
				221
				222	// Efficient O(num_parameters^2) BFGS update [2].
				223	//
				224	// Starting from dense BFGS update detailed in Nocedal [2] p140/177 and
				225	// using: y_k = delta_gradient, s_k = delta_x:
				226	//
				227	// \rho_k = 1.0 / (s_k' * y_k)
				228	// V_k = I - \rho_k * y_k * s_k'
				229	// H_k = (V_k' * H_{k-1} * V_k) + (\rho_k * s_k * s_k')
				230	//
				231	// This update involves matrix, matrix products which naively O(N^3),
				232	// however we can exploit our knowledge that H_k is positive definite
				233	// and thus by defn. symmetric to reduce the cost of the update:
				234	//
				235	// Expanding the update above yields:
				236	//
				237	// H_k = H_{k-1} +
				238	// \rho_k * ( (1.0 + \rho_k * y_k' * H_k * y_k) * s_k * s_k' -
				239	// (s_k * y_k' * H_k + H_k * y_k * s_k') )
				240	//
				241	// Using: A = (s_k * y_k' * H_k), and the knowledge that H_k = H_k', the
				242	// last term simplifies to (A + A'). Note that although A is not symmetric
				243	// (A + A') is symmetric. For ease of construction we also define
				244	// B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k', which is by defn
				245	// symmetric due to construction from: s_k * s_k'.
				246	//
				247	// Now we can write the BFGS update as:
				248	//
				249	// H_k = H_{k-1} + \rho_k * (B - (A + A'))
				250
				251	// For efficiency, as H_k is by defn. symmetric, we will only maintain the
				252	// lower triangle of H_k (and all intermediary terms).
				253
				254	const double rho_k = 1.0 / delta_x_dot_delta_gradient;
				255
				256	// Calculate: A = s_k * y_k' * H_k
				257	Matrix A = delta_x * (delta_gradient.transpose() *
				258	inverse_hessian_.selfadjointView<Eigen::Lower>());
				259
				260	// Calculate scalar: (1 + \rho_k * y_k' * H_k * y_k)
				261	const double delta_x_times_delta_x_transpose_scale_factor =
				262	(1.0 + (rho_k * delta_gradient.transpose() *
				263	inverse_hessian_.selfadjointView<Eigen::Lower>() *
				264	delta_gradient));
				265	// Calculate: B = (1 + \rho_k * y_k' * H_k * y_k) * s_k * s_k'
				266	Matrix B = Matrix::Zero(num_parameters_, num_parameters_);
				267	B.selfadjointView<Eigen::Lower>().
				268	rankUpdate(delta_x, delta_x_times_delta_x_transpose_scale_factor);
				269
				270	// Finally, update inverse Hessian approximation according to:
				271	// H_k = H_{k-1} + \rho_k * (B - (A + A')). Note that (A + A') is
				272	// symmetric, even though A is not.
				273	inverse_hessian_.triangularView<Eigen::Lower>() +=
				274	rho_k * (B - A - A.transpose());
				275	}
				276
				277	*search_direction =
				278	inverse_hessian_.selfadjointView<Eigen::Lower>() *
				279	(-1.0 * current.gradient);
				280
				281	if (search_direction->dot(current.gradient) >= 0.0) {
				282	LOG(WARNING) << "Numerical failure in BFGS update: inverse Hessian "
				283	<< "approximation is not positive definite, and thus "
				284	<< "initial gradient for search direction is positive: "
				285	<< search_direction->dot(current.gradient);
				286	is_positive_definite_ = false;
				287	return false;
				288	}
				289
				290	return true;
				291	}
				292
				293	private:
				294	const int num_parameters_;
				295	const bool use_approximate_eigenvalue_scaling_;
				296	Matrix inverse_hessian_;
				297	bool initialized_;
				298	bool is_positive_definite_;
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	299	};
				300
				301	LineSearchDirection*
Sameer Agarwal	509f68c	2013-02-20 01:39:03 -0800	[diff] [blame]	302	LineSearchDirection::Create(const LineSearchDirection::Options& options) {
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	303	if (options.type == STEEPEST_DESCENT) {
				304	return new SteepestDescent;
				305	}
				306
				307	if (options.type == NONLINEAR_CONJUGATE_GRADIENT) {
				308	return new NonlinearConjugateGradient(
				309	options.nonlinear_conjugate_gradient_type,
				310	options.function_tolerance);
				311	}
				312
				313	if (options.type == ceres::LBFGS) {
Alex Stewart	9aa0e3c	2013-07-05 20:22:37 +0100	[diff] [blame]	314	return new ceres::internal::LBFGS(
				315	options.num_parameters,
				316	options.max_lbfgs_rank,
				317	options.use_approximate_eigenvalue_bfgs_scaling);
				318	}
				319
				320	if (options.type == ceres::BFGS) {
				321	return new ceres::internal::BFGS(
				322	options.num_parameters,
				323	options.use_approximate_eigenvalue_bfgs_scaling);
Sameer Agarwal	9883fc3	2012-11-30 12:32:43 -0800	[diff] [blame]	324	}
				325
				326	LOG(ERROR) << "Unknown line search direction type: " << options.type;
				327	return NULL;
				328	}
				329
				330	} // namespace internal
				331	} // namespace ceres
Sameer Agarwal	8140f0f	2013-03-12 09:45:08 -0700	[diff] [blame]	332
				333	#endif // CERES_NO_LINE_SEARCH_MINIMIZER