Blame - include/ceres/loss_function.h - ceres-solver

blob: 0c0ceaaecd06bb47584fba6b436c3c4e86302e0e [file] [log] [blame]

Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// The LossFunction interface is the way users describe how residuals
				32	// are converted to cost terms for the overall problem cost function.
				33	// For the exact manner in which loss functions are converted to the
				34	// overall cost for a problem, see problem.h.
				35	//
				36	// For least squares problem where there are no outliers and standard
				37	// squared loss is expected, it is not necessary to create a loss
				38	// function; instead passing a NULL to the problem when adding
				39	// residuals implies a standard squared loss.
				40	//
				41	// For least squares problems where the minimization may encounter
				42	// input terms that contain outliers, that is, completely bogus
				43	// measurements, it is important to use a loss function that reduces
				44	// their associated penalty.
				45	//
				46	// Consider a structure from motion problem. The unknowns are 3D
				47	// points and camera parameters, and the measurements are image
				48	// coordinates describing the expected reprojected position for a
				49	// point in a camera. For example, we want to model the geometry of a
				50	// street scene with fire hydrants and cars, observed by a moving
				51	// camera with unknown parameters, and the only 3D points we care
				52	// about are the pointy tippy-tops of the fire hydrants. Our magic
				53	// image processing algorithm, which is responsible for producing the
				54	// measurements that are input to Ceres, has found and matched all
				55	// such tippy-tops in all image frames, except that in one of the
				56	// frame it mistook a car's headlight for a hydrant. If we didn't do
				57	// anything special (i.e. if we used a basic quadratic loss), the
				58	// residual for the erroneous measurement will result in extreme error
				59	// due to the quadratic nature of squared loss. This results in the
				60	// entire solution getting pulled away from the optimimum to reduce
				61	// the large error that would otherwise be attributed to the wrong
				62	// measurement.
				63	//
				64	// Using a robust loss function, the cost for large residuals is
				65	// reduced. In the example above, this leads to outlier terms getting
				66	// downweighted so they do not overly influence the final solution.
				67	//
				68	// What cost function is best?
				69	//
				70	// In general, there isn't a principled way to select a robust loss
				71	// function. The authors suggest starting with a non-robust cost, then
				72	// only experimenting with robust loss functions if standard squared
				73	// loss doesn't work.
				74
				75	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
				76	#define CERES_PUBLIC_LOSS_FUNCTION_H_
				77
				78	#include <glog/logging.h>
				79	#include "ceres/internal/macros.h"
				80	#include "ceres/internal/scoped_ptr.h"
				81	#include "ceres/types.h"
				82
				83	namespace ceres {
				84
				85	class LossFunction {
				86	public:
				87	virtual ~LossFunction() {}
				88
				89	// For a residual vector with squared 2-norm 'sq_norm', this method
				90	// is required to fill in the value and derivatives of the loss
				91	// function (rho in this example):
				92	//
				93	// out[0] = rho(sq_norm),
				94	// out[1] = rho'(sq_norm),
				95	// out[2] = rho''(sq_norm),
				96	//
				97	// Here the convention is that the contribution of a term to the
				98	// cost function is given by 1/2 rho(s), where
				99	//
				100	// s = \|\|residuals\|\|^2.
				101	//
				102	// Calling the method with a negative value of 's' is an error and
				103	// the implementations are not required to handle that case.
				104	//
				105	// Most sane choices of rho() satisfy:
				106	//
				107	// rho(0) = 0,
				108	// rho'(0) = 1,
				109	// rho'(s) < 1 in outlier region,
				110	// rho''(s) < 0 in outlier region,
				111	//
				112	// so that they mimic the least squares cost for small residuals.
				113	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
				114	};
				115
				116	// Some common implementations follow below.
				117	//
				118	// Note: in the region of interest (i.e. s < 3) we have:
				119	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
				120
				121
				122	// This corresponds to no robustification.
				123	//
				124	// rho(s) = s
				125	//
				126	// At s = 0: rho = [0, 1, 0].
				127	//
				128	// It is not normally necessary to use this, as passing NULL for the
				129	// loss function when building the problem accomplishes the same
				130	// thing.
				131	class TrivialLoss : public LossFunction {
				132	public:
				133	virtual void Evaluate(double, double*) const;
				134	};
				135
				136	// Scaling
				137	// -------
				138	// Given one robustifier
				139	// s -> rho(s)
				140	// one can change the length scale at which robustification takes
				141	// place, by adding a scale factor 'a' as follows:
				142	//
				143	// s -> a^2 rho(s / a^2).
				144	//
				145	// The first and second derivatives are:
				146	//
				147	// s -> rho'(s / a^2),
				148	// s -> (1 / a^2) rho''(s / a^2),
				149	//
				150	// but the behaviour near s = 0 is the same as the original function,
				151	// i.e.
				152	//
				153	// rho(s) = s + higher order terms,
				154	// a^2 rho(s / a^2) = s + higher order terms.
				155	//
				156	// The scalar 'a' should be positive.
				157	//
				158	// The reason for the appearance of squaring is that 'a' is in the
				159	// units of the residual vector norm whereas 's' is a squared
				160	// norm. For applications it is more convenient to specify 'a' than
				161	// its square. The commonly used robustifiers below are described in
				162	// un-scaled format (a = 1) but their implementations work for any
				163	// non-zero value of 'a'.
				164
				165	// Huber.
				166	//
				167	// rho(s) = s for s <= 1,
				168	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
				169	//
				170	// At s = 0: rho = [0, 1, 0].
				171	//
				172	// The scaling parameter 'a' corresponds to 'delta' on this page:
				173	// http://en.wikipedia.org/wiki/Huber_Loss_Function
				174	class HuberLoss : public LossFunction {
				175	public:
				176	explicit HuberLoss(double a) : a_(a), b_(a * a) { }
				177	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	178
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	179	private:
				180	const double a_;
				181	// b = a^2.
				182	const double b_;
				183	};
				184
				185	// Soft L1, similar to Huber but smooth.
				186	//
				187	// rho(s) = 2 (sqrt(1 + s) - 1).
				188	//
				189	// At s = 0: rho = [0, 1, -1/2].
				190	class SoftLOneLoss : public LossFunction {
				191	public:
				192	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
				193	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	194
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	195	private:
				196	// b = a^2.
				197	const double b_;
				198	// c = 1 / a^2.
				199	const double c_;
				200	};
				201
				202	// Inspired by the Cauchy distribution
				203	//
				204	// rho(s) = log(1 + s).
				205	//
				206	// At s = 0: rho = [0, 1, -1].
				207	class CauchyLoss : public LossFunction {
				208	public:
				209	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
				210	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	211
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	212	private:
				213	// b = a^2.
				214	const double b_;
				215	// c = 1 / a^2.
				216	const double c_;
				217	};
				218
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	219	// Loss that is capped beyond a certain level using the arc-tangent function.
				220	// The scaling parameter 'a' determines the level where falloff occurs.
				221	// For costs much smaller than 'a', the loss function is linear and behaves like
				222	// TrivialLoss, and for values much larger than 'a' the value asymptotically
				223	// approaches the constant value of a * PI / 2.
				224	//
				225	// rho(s) = a atan(s / a).
				226	//
				227	// At s = 0: rho = [0, 1, 0].
				228	class ArctanLoss : public LossFunction {
				229	public:
				230	explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
				231	virtual void Evaluate(double, double*) const;
Sameer Agarwal	0c714a7	2012-08-20 11:18:16 -0700	[diff] [blame]	232
Sameer Agarwal	ad1f7b7	2012-08-20 11:10:34 -0700	[diff] [blame]	233	private:
				234	const double a_;
				235	// b = 1 / a^2.
				236	const double b_;
				237	};
				238
				239	// Loss function that maps to approximately zero cost in a range around the
				240	// origin, and reverts to linear in error (quadratic in cost) beyond this range.
				241	// The tolerance parameter 'a' sets the nominal point at which the
				242	// transition occurs, and the transition size parameter 'b' sets the nominal
				243	// distance over which most of the transition occurs. Both a and b must be
				244	// greater than zero, and typically b will be set to a fraction of a.
				245	// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
				246	// about 1 at s >= a + b.
				247	//
				248	// The term is computed as:
				249	//
				250	// rho(s) = b log(1 + exp((s - a) / b)) - c0.
				251	//
				252	// where c0 is chosen so that rho(0) == 0
				253	//
				254	// c0 = b log(1 + exp(-a / b)
				255	//
				256	// This has the following useful properties:
				257	//
				258	// rho(s) == 0 for s = 0
				259	// rho'(s) ~= 0 for s << a - b
				260	// rho'(s) ~= 1 for s >> a + b
				261	// rho''(s) > 0 for all s
				262	//
				263	// In addition, all derivatives are continuous, and the curvature is
				264	// concentrated in the range a - b to a + b.
				265	//
				266	// At s = 0: rho = [0, ~0, ~0].
				267	class TolerantLoss : public LossFunction {
				268	public:
				269	explicit TolerantLoss(double a, double b);
				270	virtual void Evaluate(double, double*) const;
				271
				272	private:
				273	const double a_, b_, c_;
				274	};
				275
				276	// Composition of two loss functions. The error is the result of first
				277	// evaluating g followed by f to yield the composition f(g(s)).
				278	// The loss functions must not be NULL.
				279	class ComposedLoss : public LossFunction {
				280	public:
				281	explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
				282	const LossFunction* g, Ownership ownership_g);
				283	virtual ~ComposedLoss();
				284	virtual void Evaluate(double, double*) const;
				285
				286	private:
				287	internal::scoped_ptr<const LossFunction> f_, g_;
				288	const Ownership ownership_f_, ownership_g_;
				289	};
				290
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	291	// The discussion above has to do with length scaling: it affects the space
				292	// in which s is measured. Sometimes you want to simply scale the output
				293	// value of the robustifier. For example, you might want to weight
				294	// different error terms differently (e.g., weight pixel reprojection
				295	// errors differently from terrain errors).
				296	//
				297	// If rho is the wrapped robustifier, then this simply outputs
				298	// s -> a * rho(s)
				299	//
				300	// The first and second derivatives are, not surprisingly
				301	// s -> a * rho'(s)
				302	// s -> a * rho''(s)
				303	//
				304	// Since we treat the a NULL Loss function as the Identity loss
				305	// function, rho = NULL is a valid input and will result in the input
				306	// being scaled by a. This provides a simple way of implementing a
				307	// scaled ResidualBlock.
				308	class ScaledLoss : public LossFunction {
				309	public:
				310	// Constructs a ScaledLoss wrapping another loss function. Takes
				311	// ownership of the wrapped loss function or not depending on the
				312	// ownership parameter.
				313	ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
				314	rho_(rho), a_(a), ownership_(ownership) { }
				315
				316	virtual ~ScaledLoss() {
				317	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				318	rho_.release();
				319	}
				320	}
				321	virtual void Evaluate(double, double*) const;
				322
				323	private:
				324	internal::scoped_ptr<const LossFunction> rho_;
				325	const double a_;
				326	const Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	327	CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	328	};
				329
				330	// Sometimes after the optimization problem has been constructed, we
				331	// wish to mutate the scale of the loss function. For example, when
				332	// performing estimation from data which has substantial outliers,
				333	// convergence can be improved by starting out with a large scale,
				334	// optimizing the problem and then reducing the scale. This can have
				335	// better convergence behaviour than just using a loss function with a
				336	// small scale.
				337	//
				338	// This templated class allows the user to implement a loss function
				339	// whose scale can be mutated after an optimization problem has been
				340	// constructed.
				341	//
				342	// Example usage
				343	//
				344	// Problem problem;
				345	//
				346	// // Add parameter blocks
				347	//
				348	// CostFunction* cost_function =
				349	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
				350	// new UW_Camera_Mapper(data->observations[2*i + 0],
				351	// data->observations[2*i + 1]));
				352	//
				353	// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
				354	//
				355	// problem.AddResidualBlock(cost_function, loss_function, parameters);
				356	//
				357	// Solver::Options options;
				358	// scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
				359	//
				360	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
				361	//
				362	// scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
				363	//
				364	class LossFunctionWrapper : public LossFunction {
				365	public:
				366	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
				367	: rho_(rho), ownership_(ownership) {
				368	}
				369
				370	virtual ~LossFunctionWrapper() {
				371	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				372	rho_.release();
				373	}
				374	}
				375
				376	virtual void Evaluate(double sq_norm, double out[3]) const {
				377	CHECK_NOTNULL(rho_.get());
				378	rho_->Evaluate(sq_norm, out);
				379	}
				380
				381	void Reset(LossFunction* rho, Ownership ownership) {
				382	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				383	rho_.release();
				384	}
				385	rho_.reset(rho);
				386	ownership_ = ownership;
				387	}
				388
				389	private:
				390	internal::scoped_ptr<const LossFunction> rho_;
				391	Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	392	CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	393	};
				394
				395	} // namespace ceres
				396
				397	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_