Blame - include/ceres/loss_function.h - ceres-solver

blob: 2c585009990d9e556e92ea0e9d326a4777b1995c [file] [log] [blame]

Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// The LossFunction interface is the way users describe how residuals
				32	// are converted to cost terms for the overall problem cost function.
				33	// For the exact manner in which loss functions are converted to the
				34	// overall cost for a problem, see problem.h.
				35	//
				36	// For least squares problem where there are no outliers and standard
				37	// squared loss is expected, it is not necessary to create a loss
				38	// function; instead passing a NULL to the problem when adding
				39	// residuals implies a standard squared loss.
				40	//
				41	// For least squares problems where the minimization may encounter
				42	// input terms that contain outliers, that is, completely bogus
				43	// measurements, it is important to use a loss function that reduces
				44	// their associated penalty.
				45	//
				46	// Consider a structure from motion problem. The unknowns are 3D
				47	// points and camera parameters, and the measurements are image
				48	// coordinates describing the expected reprojected position for a
				49	// point in a camera. For example, we want to model the geometry of a
				50	// street scene with fire hydrants and cars, observed by a moving
				51	// camera with unknown parameters, and the only 3D points we care
				52	// about are the pointy tippy-tops of the fire hydrants. Our magic
				53	// image processing algorithm, which is responsible for producing the
				54	// measurements that are input to Ceres, has found and matched all
				55	// such tippy-tops in all image frames, except that in one of the
				56	// frame it mistook a car's headlight for a hydrant. If we didn't do
				57	// anything special (i.e. if we used a basic quadratic loss), the
				58	// residual for the erroneous measurement will result in extreme error
				59	// due to the quadratic nature of squared loss. This results in the
				60	// entire solution getting pulled away from the optimimum to reduce
				61	// the large error that would otherwise be attributed to the wrong
				62	// measurement.
				63	//
				64	// Using a robust loss function, the cost for large residuals is
				65	// reduced. In the example above, this leads to outlier terms getting
				66	// downweighted so they do not overly influence the final solution.
				67	//
				68	// What cost function is best?
				69	//
				70	// In general, there isn't a principled way to select a robust loss
				71	// function. The authors suggest starting with a non-robust cost, then
				72	// only experimenting with robust loss functions if standard squared
				73	// loss doesn't work.
				74
				75	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
				76	#define CERES_PUBLIC_LOSS_FUNCTION_H_
				77
Björn Piltz	c0b8838	2014-05-19 08:21:00 +0200	[diff] [blame]	78	#include "glog/logging.h"
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	79	#include "ceres/internal/macros.h"
				80	#include "ceres/internal/scoped_ptr.h"
				81	#include "ceres/types.h"
Björn Piltz	c0b8838	2014-05-19 08:21:00 +0200	[diff] [blame]	82	#include "ceres/internal/disable_warnings.h"
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	83
				84	namespace ceres {
				85
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	86	class CERES_EXPORT LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	87	public:
				88	virtual ~LossFunction() {}
				89
				90	// For a residual vector with squared 2-norm 'sq_norm', this method
				91	// is required to fill in the value and derivatives of the loss
				92	// function (rho in this example):
				93	//
				94	// out[0] = rho(sq_norm),
				95	// out[1] = rho'(sq_norm),
				96	// out[2] = rho''(sq_norm),
				97	//
				98	// Here the convention is that the contribution of a term to the
				99	// cost function is given by 1/2 rho(s), where
				100	//
				101	// s = \|\|residuals\|\|^2.
				102	//
				103	// Calling the method with a negative value of 's' is an error and
				104	// the implementations are not required to handle that case.
				105	//
				106	// Most sane choices of rho() satisfy:
				107	//
				108	// rho(0) = 0,
				109	// rho'(0) = 1,
				110	// rho'(s) < 1 in outlier region,
				111	// rho''(s) < 0 in outlier region,
				112	//
				113	// so that they mimic the least squares cost for small residuals.
				114	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
				115	};
				116
				117	// Some common implementations follow below.
				118	//
				119	// Note: in the region of interest (i.e. s < 3) we have:
				120	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
				121
				122
				123	// This corresponds to no robustification.
				124	//
				125	// rho(s) = s
				126	//
				127	// At s = 0: rho = [0, 1, 0].
				128	//
				129	// It is not normally necessary to use this, as passing NULL for the
				130	// loss function when building the problem accomplishes the same
				131	// thing.
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	132	class CERES_EXPORT TrivialLoss : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	133	public:
				134	virtual void Evaluate(double, double*) const;
				135	};
				136
				137	// Scaling
				138	// -------
				139	// Given one robustifier
				140	// s -> rho(s)
				141	// one can change the length scale at which robustification takes
				142	// place, by adding a scale factor 'a' as follows:
				143	//
				144	// s -> a^2 rho(s / a^2).
				145	//
				146	// The first and second derivatives are:
				147	//
				148	// s -> rho'(s / a^2),
				149	// s -> (1 / a^2) rho''(s / a^2),
				150	//
				151	// but the behaviour near s = 0 is the same as the original function,
				152	// i.e.
				153	//
				154	// rho(s) = s + higher order terms,
				155	// a^2 rho(s / a^2) = s + higher order terms.
				156	//
				157	// The scalar 'a' should be positive.
				158	//
				159	// The reason for the appearance of squaring is that 'a' is in the
				160	// units of the residual vector norm whereas 's' is a squared
				161	// norm. For applications it is more convenient to specify 'a' than
				162	// its square. The commonly used robustifiers below are described in
				163	// un-scaled format (a = 1) but their implementations work for any
				164	// non-zero value of 'a'.
				165
				166	// Huber.
				167	//
				168	// rho(s) = s for s <= 1,
				169	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
				170	//
				171	// At s = 0: rho = [0, 1, 0].
				172	//
				173	// The scaling parameter 'a' corresponds to 'delta' on this page:
				174	// http://en.wikipedia.org/wiki/Huber_Loss_Function
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	175	class CERES_EXPORT HuberLoss : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	176	public:
				177	explicit HuberLoss(double a) : a_(a), b_(a * a) { }
				178	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	179
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	180	private:
				181	const double a_;
				182	// b = a^2.
				183	const double b_;
				184	};
				185
				186	// Soft L1, similar to Huber but smooth.
				187	//
				188	// rho(s) = 2 (sqrt(1 + s) - 1).
				189	//
				190	// At s = 0: rho = [0, 1, -1/2].
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	191	class CERES_EXPORT SoftLOneLoss : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	192	public:
				193	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
				194	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	195
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	196	private:
				197	// b = a^2.
				198	const double b_;
				199	// c = 1 / a^2.
				200	const double c_;
				201	};
				202
				203	// Inspired by the Cauchy distribution
				204	//
				205	// rho(s) = log(1 + s).
				206	//
				207	// At s = 0: rho = [0, 1, -1].
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	208	class CERES_EXPORT CauchyLoss : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	209	public:
				210	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
				211	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	212
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	213	private:
				214	// b = a^2.
				215	const double b_;
				216	// c = 1 / a^2.
				217	const double c_;
				218	};
				219
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	220	// Loss that is capped beyond a certain level using the arc-tangent function.
				221	// The scaling parameter 'a' determines the level where falloff occurs.
				222	// For costs much smaller than 'a', the loss function is linear and behaves like
				223	// TrivialLoss, and for values much larger than 'a' the value asymptotically
				224	// approaches the constant value of a * PI / 2.
				225	//
				226	// rho(s) = a atan(s / a).
				227	//
				228	// At s = 0: rho = [0, 1, 0].
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	229	class CERES_EXPORT ArctanLoss : public LossFunction {
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	230	public:
				231	explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
				232	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	233
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	234	private:
				235	const double a_;
				236	// b = 1 / a^2.
				237	const double b_;
				238	};
				239
				240	// Loss function that maps to approximately zero cost in a range around the
				241	// origin, and reverts to linear in error (quadratic in cost) beyond this range.
				242	// The tolerance parameter 'a' sets the nominal point at which the
				243	// transition occurs, and the transition size parameter 'b' sets the nominal
				244	// distance over which most of the transition occurs. Both a and b must be
				245	// greater than zero, and typically b will be set to a fraction of a.
				246	// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
				247	// about 1 at s >= a + b.
				248	//
				249	// The term is computed as:
				250	//
				251	// rho(s) = b log(1 + exp((s - a) / b)) - c0.
				252	//
				253	// where c0 is chosen so that rho(0) == 0
				254	//
				255	// c0 = b log(1 + exp(-a / b)
				256	//
				257	// This has the following useful properties:
				258	//
				259	// rho(s) == 0 for s = 0
				260	// rho'(s) ~= 0 for s << a - b
				261	// rho'(s) ~= 1 for s >> a + b
				262	// rho''(s) > 0 for all s
				263	//
				264	// In addition, all derivatives are continuous, and the curvature is
				265	// concentrated in the range a - b to a + b.
				266	//
				267	// At s = 0: rho = [0, ~0, ~0].
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	268	class CERES_EXPORT TolerantLoss : public LossFunction {
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	269	public:
				270	explicit TolerantLoss(double a, double b);
				271	virtual void Evaluate(double, double*) const;
				272
				273	private:
				274	const double a_, b_, c_;
				275	};
				276
				277	// Composition of two loss functions. The error is the result of first
				278	// evaluating g followed by f to yield the composition f(g(s)).
				279	// The loss functions must not be NULL.
				280	class ComposedLoss : public LossFunction {
				281	public:
				282	explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
				283	const LossFunction* g, Ownership ownership_g);
				284	virtual ~ComposedLoss();
				285	virtual void Evaluate(double, double*) const;
				286
				287	private:
				288	internal::scoped_ptr<const LossFunction> f_, g_;
				289	const Ownership ownership_f_, ownership_g_;
				290	};
				291
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	292	// The discussion above has to do with length scaling: it affects the space
				293	// in which s is measured. Sometimes you want to simply scale the output
				294	// value of the robustifier. For example, you might want to weight
				295	// different error terms differently (e.g., weight pixel reprojection
				296	// errors differently from terrain errors).
				297	//
				298	// If rho is the wrapped robustifier, then this simply outputs
				299	// s -> a * rho(s)
				300	//
				301	// The first and second derivatives are, not surprisingly
				302	// s -> a * rho'(s)
				303	// s -> a * rho''(s)
				304	//
				305	// Since we treat the a NULL Loss function as the Identity loss
				306	// function, rho = NULL is a valid input and will result in the input
				307	// being scaled by a. This provides a simple way of implementing a
				308	// scaled ResidualBlock.
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	309	class CERES_EXPORT ScaledLoss : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	310	public:
				311	// Constructs a ScaledLoss wrapping another loss function. Takes
				312	// ownership of the wrapped loss function or not depending on the
				313	// ownership parameter.
				314	ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
				315	rho_(rho), a_(a), ownership_(ownership) { }
				316
				317	virtual ~ScaledLoss() {
				318	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				319	rho_.release();
				320	}
				321	}
				322	virtual void Evaluate(double, double*) const;
				323
				324	private:
				325	internal::scoped_ptr<const LossFunction> rho_;
				326	const double a_;
				327	const Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	328	CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	329	};
				330
				331	// Sometimes after the optimization problem has been constructed, we
				332	// wish to mutate the scale of the loss function. For example, when
				333	// performing estimation from data which has substantial outliers,
				334	// convergence can be improved by starting out with a large scale,
				335	// optimizing the problem and then reducing the scale. This can have
				336	// better convergence behaviour than just using a loss function with a
				337	// small scale.
				338	//
				339	// This templated class allows the user to implement a loss function
				340	// whose scale can be mutated after an optimization problem has been
				341	// constructed.
				342	//
				343	// Example usage
				344	//
				345	// Problem problem;
				346	//
				347	// // Add parameter blocks
				348	//
				349	// CostFunction* cost_function =
				350	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
Sameer Agarwal	ebbb984	2013-05-26 12:40:12 -0700	[diff] [blame]	351	// new UW_Camera_Mapper(feature_x, feature_y));
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	352	//
				353	// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
				354	//
				355	// problem.AddResidualBlock(cost_function, loss_function, parameters);
				356	//
				357	// Solver::Options options;
Sameer Agarwal	ebbb984	2013-05-26 12:40:12 -0700	[diff] [blame]	358	// Solger::Summary summary;
				359	//
				360	// Solve(options, &problem, &summary)
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	361	//
				362	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
				363	//
Sameer Agarwal	ebbb984	2013-05-26 12:40:12 -0700	[diff] [blame]	364	// Solve(options, &problem, &summary)
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	365	//
Björn Piltz	5d7eed8	2014-04-23 22:13:37 +0200	[diff] [blame]	366	class CERES_EXPORT LossFunctionWrapper : public LossFunction {
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	367	public:
				368	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
				369	: rho_(rho), ownership_(ownership) {
				370	}
				371
				372	virtual ~LossFunctionWrapper() {
				373	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				374	rho_.release();
				375	}
				376	}
				377
				378	virtual void Evaluate(double sq_norm, double out[3]) const {
				379	CHECK_NOTNULL(rho_.get());
				380	rho_->Evaluate(sq_norm, out);
				381	}
				382
				383	void Reset(LossFunction* rho, Ownership ownership) {
				384	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				385	rho_.release();
				386	}
				387	rho_.reset(rho);
				388	ownership_ = ownership;
				389	}
				390
				391	private:
				392	internal::scoped_ptr<const LossFunction> rho_;
				393	Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	394	CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	395	};
				396
				397	} // namespace ceres
				398
Björn Piltz	c0b8838	2014-05-19 08:21:00 +0200	[diff] [blame]	399	#include "ceres/internal/disable_warnings.h"
				400
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	401	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_