Blame - include/ceres/loss_function.h - ceres-solver

blob: c95a0a08ca3b57aef2503258f526b5e7b035f9f4 [file] [log] [blame]

Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// The LossFunction interface is the way users describe how residuals
				32	// are converted to cost terms for the overall problem cost function.
				33	// For the exact manner in which loss functions are converted to the
				34	// overall cost for a problem, see problem.h.
				35	//
				36	// For least squares problem where there are no outliers and standard
				37	// squared loss is expected, it is not necessary to create a loss
				38	// function; instead passing a NULL to the problem when adding
				39	// residuals implies a standard squared loss.
				40	//
				41	// For least squares problems where the minimization may encounter
				42	// input terms that contain outliers, that is, completely bogus
				43	// measurements, it is important to use a loss function that reduces
				44	// their associated penalty.
				45	//
				46	// Consider a structure from motion problem. The unknowns are 3D
				47	// points and camera parameters, and the measurements are image
				48	// coordinates describing the expected reprojected position for a
				49	// point in a camera. For example, we want to model the geometry of a
				50	// street scene with fire hydrants and cars, observed by a moving
				51	// camera with unknown parameters, and the only 3D points we care
				52	// about are the pointy tippy-tops of the fire hydrants. Our magic
				53	// image processing algorithm, which is responsible for producing the
				54	// measurements that are input to Ceres, has found and matched all
				55	// such tippy-tops in all image frames, except that in one of the
				56	// frame it mistook a car's headlight for a hydrant. If we didn't do
				57	// anything special (i.e. if we used a basic quadratic loss), the
				58	// residual for the erroneous measurement will result in extreme error
				59	// due to the quadratic nature of squared loss. This results in the
				60	// entire solution getting pulled away from the optimimum to reduce
				61	// the large error that would otherwise be attributed to the wrong
				62	// measurement.
				63	//
				64	// Using a robust loss function, the cost for large residuals is
				65	// reduced. In the example above, this leads to outlier terms getting
				66	// downweighted so they do not overly influence the final solution.
				67	//
				68	// What cost function is best?
				69	//
				70	// In general, there isn't a principled way to select a robust loss
				71	// function. The authors suggest starting with a non-robust cost, then
				72	// only experimenting with robust loss functions if standard squared
				73	// loss doesn't work.
				74
				75	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
				76	#define CERES_PUBLIC_LOSS_FUNCTION_H_
				77
				78	#include <glog/logging.h>
				79	#include "ceres/internal/macros.h"
				80	#include "ceres/internal/scoped_ptr.h"
				81	#include "ceres/types.h"
				82
				83	namespace ceres {
				84
				85	class LossFunction {
				86	public:
				87	virtual ~LossFunction() {}
				88
				89	// For a residual vector with squared 2-norm 'sq_norm', this method
				90	// is required to fill in the value and derivatives of the loss
				91	// function (rho in this example):
				92	//
				93	// out[0] = rho(sq_norm),
				94	// out[1] = rho'(sq_norm),
				95	// out[2] = rho''(sq_norm),
				96	//
				97	// Here the convention is that the contribution of a term to the
				98	// cost function is given by 1/2 rho(s), where
				99	//
				100	// s = \|\|residuals\|\|^2.
				101	//
				102	// Calling the method with a negative value of 's' is an error and
				103	// the implementations are not required to handle that case.
				104	//
				105	// Most sane choices of rho() satisfy:
				106	//
				107	// rho(0) = 0,
				108	// rho'(0) = 1,
				109	// rho'(s) < 1 in outlier region,
				110	// rho''(s) < 0 in outlier region,
				111	//
				112	// so that they mimic the least squares cost for small residuals.
				113	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
				114	};
				115
				116	// Some common implementations follow below.
				117	//
				118	// Note: in the region of interest (i.e. s < 3) we have:
				119	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
				120
				121
				122	// This corresponds to no robustification.
				123	//
				124	// rho(s) = s
				125	//
				126	// At s = 0: rho = [0, 1, 0].
				127	//
				128	// It is not normally necessary to use this, as passing NULL for the
				129	// loss function when building the problem accomplishes the same
				130	// thing.
				131	class TrivialLoss : public LossFunction {
				132	public:
				133	virtual void Evaluate(double, double*) const;
				134	};
				135
				136	// Scaling
				137	// -------
				138	// Given one robustifier
				139	// s -> rho(s)
				140	// one can change the length scale at which robustification takes
				141	// place, by adding a scale factor 'a' as follows:
				142	//
				143	// s -> a^2 rho(s / a^2).
				144	//
				145	// The first and second derivatives are:
				146	//
				147	// s -> rho'(s / a^2),
				148	// s -> (1 / a^2) rho''(s / a^2),
				149	//
				150	// but the behaviour near s = 0 is the same as the original function,
				151	// i.e.
				152	//
				153	// rho(s) = s + higher order terms,
				154	// a^2 rho(s / a^2) = s + higher order terms.
				155	//
				156	// The scalar 'a' should be positive.
				157	//
				158	// The reason for the appearance of squaring is that 'a' is in the
				159	// units of the residual vector norm whereas 's' is a squared
				160	// norm. For applications it is more convenient to specify 'a' than
				161	// its square. The commonly used robustifiers below are described in
				162	// un-scaled format (a = 1) but their implementations work for any
				163	// non-zero value of 'a'.
				164
				165	// Huber.
				166	//
				167	// rho(s) = s for s <= 1,
				168	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
				169	//
				170	// At s = 0: rho = [0, 1, 0].
				171	//
				172	// The scaling parameter 'a' corresponds to 'delta' on this page:
				173	// http://en.wikipedia.org/wiki/Huber_Loss_Function
				174	class HuberLoss : public LossFunction {
				175	public:
				176	explicit HuberLoss(double a) : a_(a), b_(a * a) { }
				177	virtual void Evaluate(double, double*) const;
				178	private:
				179	const double a_;
				180	// b = a^2.
				181	const double b_;
				182	};
				183
				184	// Soft L1, similar to Huber but smooth.
				185	//
				186	// rho(s) = 2 (sqrt(1 + s) - 1).
				187	//
				188	// At s = 0: rho = [0, 1, -1/2].
				189	class SoftLOneLoss : public LossFunction {
				190	public:
				191	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
				192	virtual void Evaluate(double, double*) const;
				193	private:
				194	// b = a^2.
				195	const double b_;
				196	// c = 1 / a^2.
				197	const double c_;
				198	};
				199
				200	// Inspired by the Cauchy distribution
				201	//
				202	// rho(s) = log(1 + s).
				203	//
				204	// At s = 0: rho = [0, 1, -1].
				205	class CauchyLoss : public LossFunction {
				206	public:
				207	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
				208	virtual void Evaluate(double, double*) const;
				209	private:
				210	// b = a^2.
				211	const double b_;
				212	// c = 1 / a^2.
				213	const double c_;
				214	};
				215
				216	// The discussion above has to do with length scaling: it affects the space
				217	// in which s is measured. Sometimes you want to simply scale the output
				218	// value of the robustifier. For example, you might want to weight
				219	// different error terms differently (e.g., weight pixel reprojection
				220	// errors differently from terrain errors).
				221	//
				222	// If rho is the wrapped robustifier, then this simply outputs
				223	// s -> a * rho(s)
				224	//
				225	// The first and second derivatives are, not surprisingly
				226	// s -> a * rho'(s)
				227	// s -> a * rho''(s)
				228	//
				229	// Since we treat the a NULL Loss function as the Identity loss
				230	// function, rho = NULL is a valid input and will result in the input
				231	// being scaled by a. This provides a simple way of implementing a
				232	// scaled ResidualBlock.
				233	class ScaledLoss : public LossFunction {
				234	public:
				235	// Constructs a ScaledLoss wrapping another loss function. Takes
				236	// ownership of the wrapped loss function or not depending on the
				237	// ownership parameter.
				238	ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
				239	rho_(rho), a_(a), ownership_(ownership) { }
				240
				241	virtual ~ScaledLoss() {
				242	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				243	rho_.release();
				244	}
				245	}
				246	virtual void Evaluate(double, double*) const;
				247
				248	private:
				249	internal::scoped_ptr<const LossFunction> rho_;
				250	const double a_;
				251	const Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	252	CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	253	};
				254
				255	// Sometimes after the optimization problem has been constructed, we
				256	// wish to mutate the scale of the loss function. For example, when
				257	// performing estimation from data which has substantial outliers,
				258	// convergence can be improved by starting out with a large scale,
				259	// optimizing the problem and then reducing the scale. This can have
				260	// better convergence behaviour than just using a loss function with a
				261	// small scale.
				262	//
				263	// This templated class allows the user to implement a loss function
				264	// whose scale can be mutated after an optimization problem has been
				265	// constructed.
				266	//
				267	// Example usage
				268	//
				269	// Problem problem;
				270	//
				271	// // Add parameter blocks
				272	//
				273	// CostFunction* cost_function =
				274	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
				275	// new UW_Camera_Mapper(data->observations[2*i + 0],
				276	// data->observations[2*i + 1]));
				277	//
				278	// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
				279	//
				280	// problem.AddResidualBlock(cost_function, loss_function, parameters);
				281	//
				282	// Solver::Options options;
				283	// scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
				284	//
				285	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
				286	//
				287	// scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
				288	//
				289	class LossFunctionWrapper : public LossFunction {
				290	public:
				291	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
				292	: rho_(rho), ownership_(ownership) {
				293	}
				294
				295	virtual ~LossFunctionWrapper() {
				296	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				297	rho_.release();
				298	}
				299	}
				300
				301	virtual void Evaluate(double sq_norm, double out[3]) const {
				302	CHECK_NOTNULL(rho_.get());
				303	rho_->Evaluate(sq_norm, out);
				304	}
				305
				306	void Reset(LossFunction* rho, Ownership ownership) {
				307	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				308	rho_.release();
				309	}
				310	rho_.reset(rho);
				311	ownership_ = ownership;
				312	}
				313
				314	private:
				315	internal::scoped_ptr<const LossFunction> rho_;
				316	Ownership ownership_;
Sameer Agarwal	237d659	2012-05-30 20:34:49 -0700	[diff] [blame]	317	CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	318	};
				319
				320	} // namespace ceres
				321
				322	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_