blob: c95a0a08ca3b57aef2503258f526b5e7b035f9f4 [file] [log] [blame]
Keir Mierle8ebb0732012-04-30 23:09:08 -07001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30//
31// The LossFunction interface is the way users describe how residuals
32// are converted to cost terms for the overall problem cost function.
33// For the exact manner in which loss functions are converted to the
34// overall cost for a problem, see problem.h.
35//
36// For least squares problem where there are no outliers and standard
37// squared loss is expected, it is not necessary to create a loss
38// function; instead passing a NULL to the problem when adding
39// residuals implies a standard squared loss.
40//
41// For least squares problems where the minimization may encounter
42// input terms that contain outliers, that is, completely bogus
43// measurements, it is important to use a loss function that reduces
44// their associated penalty.
45//
46// Consider a structure from motion problem. The unknowns are 3D
47// points and camera parameters, and the measurements are image
48// coordinates describing the expected reprojected position for a
49// point in a camera. For example, we want to model the geometry of a
50// street scene with fire hydrants and cars, observed by a moving
51// camera with unknown parameters, and the only 3D points we care
52// about are the pointy tippy-tops of the fire hydrants. Our magic
53// image processing algorithm, which is responsible for producing the
54// measurements that are input to Ceres, has found and matched all
55// such tippy-tops in all image frames, except that in one of the
56// frame it mistook a car's headlight for a hydrant. If we didn't do
57// anything special (i.e. if we used a basic quadratic loss), the
58// residual for the erroneous measurement will result in extreme error
59// due to the quadratic nature of squared loss. This results in the
60// entire solution getting pulled away from the optimimum to reduce
61// the large error that would otherwise be attributed to the wrong
62// measurement.
63//
64// Using a robust loss function, the cost for large residuals is
65// reduced. In the example above, this leads to outlier terms getting
66// downweighted so they do not overly influence the final solution.
67//
68// What cost function is best?
69//
70// In general, there isn't a principled way to select a robust loss
71// function. The authors suggest starting with a non-robust cost, then
72// only experimenting with robust loss functions if standard squared
73// loss doesn't work.
74
75#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
76#define CERES_PUBLIC_LOSS_FUNCTION_H_
77
78#include <glog/logging.h>
79#include "ceres/internal/macros.h"
80#include "ceres/internal/scoped_ptr.h"
81#include "ceres/types.h"
82
83namespace ceres {
84
85class LossFunction {
86 public:
87 virtual ~LossFunction() {}
88
89 // For a residual vector with squared 2-norm 'sq_norm', this method
90 // is required to fill in the value and derivatives of the loss
91 // function (rho in this example):
92 //
93 // out[0] = rho(sq_norm),
94 // out[1] = rho'(sq_norm),
95 // out[2] = rho''(sq_norm),
96 //
97 // Here the convention is that the contribution of a term to the
98 // cost function is given by 1/2 rho(s), where
99 //
100 // s = ||residuals||^2.
101 //
102 // Calling the method with a negative value of 's' is an error and
103 // the implementations are not required to handle that case.
104 //
105 // Most sane choices of rho() satisfy:
106 //
107 // rho(0) = 0,
108 // rho'(0) = 1,
109 // rho'(s) < 1 in outlier region,
110 // rho''(s) < 0 in outlier region,
111 //
112 // so that they mimic the least squares cost for small residuals.
113 virtual void Evaluate(double sq_norm, double out[3]) const = 0;
114};
115
116// Some common implementations follow below.
117//
118// Note: in the region of interest (i.e. s < 3) we have:
119// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
120
121
122// This corresponds to no robustification.
123//
124// rho(s) = s
125//
126// At s = 0: rho = [0, 1, 0].
127//
128// It is not normally necessary to use this, as passing NULL for the
129// loss function when building the problem accomplishes the same
130// thing.
131class TrivialLoss : public LossFunction {
132 public:
133 virtual void Evaluate(double, double*) const;
134};
135
136// Scaling
137// -------
138// Given one robustifier
139// s -> rho(s)
140// one can change the length scale at which robustification takes
141// place, by adding a scale factor 'a' as follows:
142//
143// s -> a^2 rho(s / a^2).
144//
145// The first and second derivatives are:
146//
147// s -> rho'(s / a^2),
148// s -> (1 / a^2) rho''(s / a^2),
149//
150// but the behaviour near s = 0 is the same as the original function,
151// i.e.
152//
153// rho(s) = s + higher order terms,
154// a^2 rho(s / a^2) = s + higher order terms.
155//
156// The scalar 'a' should be positive.
157//
158// The reason for the appearance of squaring is that 'a' is in the
159// units of the residual vector norm whereas 's' is a squared
160// norm. For applications it is more convenient to specify 'a' than
161// its square. The commonly used robustifiers below are described in
162// un-scaled format (a = 1) but their implementations work for any
163// non-zero value of 'a'.
164
165// Huber.
166//
167// rho(s) = s for s <= 1,
168// rho(s) = 2 sqrt(s) - 1 for s >= 1.
169//
170// At s = 0: rho = [0, 1, 0].
171//
172// The scaling parameter 'a' corresponds to 'delta' on this page:
173// http://en.wikipedia.org/wiki/Huber_Loss_Function
174class HuberLoss : public LossFunction {
175 public:
176 explicit HuberLoss(double a) : a_(a), b_(a * a) { }
177 virtual void Evaluate(double, double*) const;
178 private:
179 const double a_;
180 // b = a^2.
181 const double b_;
182};
183
184// Soft L1, similar to Huber but smooth.
185//
186// rho(s) = 2 (sqrt(1 + s) - 1).
187//
188// At s = 0: rho = [0, 1, -1/2].
189class SoftLOneLoss : public LossFunction {
190 public:
191 explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
192 virtual void Evaluate(double, double*) const;
193 private:
194 // b = a^2.
195 const double b_;
196 // c = 1 / a^2.
197 const double c_;
198};
199
200// Inspired by the Cauchy distribution
201//
202// rho(s) = log(1 + s).
203//
204// At s = 0: rho = [0, 1, -1].
205class CauchyLoss : public LossFunction {
206 public:
207 explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
208 virtual void Evaluate(double, double*) const;
209 private:
210 // b = a^2.
211 const double b_;
212 // c = 1 / a^2.
213 const double c_;
214};
215
216// The discussion above has to do with length scaling: it affects the space
217// in which s is measured. Sometimes you want to simply scale the output
218// value of the robustifier. For example, you might want to weight
219// different error terms differently (e.g., weight pixel reprojection
220// errors differently from terrain errors).
221//
222// If rho is the wrapped robustifier, then this simply outputs
223// s -> a * rho(s)
224//
225// The first and second derivatives are, not surprisingly
226// s -> a * rho'(s)
227// s -> a * rho''(s)
228//
229// Since we treat the a NULL Loss function as the Identity loss
230// function, rho = NULL is a valid input and will result in the input
231// being scaled by a. This provides a simple way of implementing a
232// scaled ResidualBlock.
233class ScaledLoss : public LossFunction {
234 public:
235 // Constructs a ScaledLoss wrapping another loss function. Takes
236 // ownership of the wrapped loss function or not depending on the
237 // ownership parameter.
238 ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
239 rho_(rho), a_(a), ownership_(ownership) { }
240
241 virtual ~ScaledLoss() {
242 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
243 rho_.release();
244 }
245 }
246 virtual void Evaluate(double, double*) const;
247
248 private:
249 internal::scoped_ptr<const LossFunction> rho_;
250 const double a_;
251 const Ownership ownership_;
Sameer Agarwal237d6592012-05-30 20:34:49 -0700252 CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
Keir Mierle8ebb0732012-04-30 23:09:08 -0700253};
254
255// Sometimes after the optimization problem has been constructed, we
256// wish to mutate the scale of the loss function. For example, when
257// performing estimation from data which has substantial outliers,
258// convergence can be improved by starting out with a large scale,
259// optimizing the problem and then reducing the scale. This can have
260// better convergence behaviour than just using a loss function with a
261// small scale.
262//
263// This templated class allows the user to implement a loss function
264// whose scale can be mutated after an optimization problem has been
265// constructed.
266//
267// Example usage
268//
269// Problem problem;
270//
271// // Add parameter blocks
272//
273// CostFunction* cost_function =
274// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
275// new UW_Camera_Mapper(data->observations[2*i + 0],
276// data->observations[2*i + 1]));
277//
278// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
279//
280// problem.AddResidualBlock(cost_function, loss_function, parameters);
281//
282// Solver::Options options;
283// scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
284//
285// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
286//
287// scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
288//
289class LossFunctionWrapper : public LossFunction {
290 public:
291 LossFunctionWrapper(LossFunction* rho, Ownership ownership)
292 : rho_(rho), ownership_(ownership) {
293 }
294
295 virtual ~LossFunctionWrapper() {
296 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
297 rho_.release();
298 }
299 }
300
301 virtual void Evaluate(double sq_norm, double out[3]) const {
302 CHECK_NOTNULL(rho_.get());
303 rho_->Evaluate(sq_norm, out);
304 }
305
306 void Reset(LossFunction* rho, Ownership ownership) {
307 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
308 rho_.release();
309 }
310 rho_.reset(rho);
311 ownership_ = ownership;
312 }
313
314 private:
315 internal::scoped_ptr<const LossFunction> rho_;
316 Ownership ownership_;
Sameer Agarwal237d6592012-05-30 20:34:49 -0700317 CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
Keir Mierle8ebb0732012-04-30 23:09:08 -0700318};
319
320} // namespace ceres
321
322#endif // CERES_PUBLIC_LOSS_FUNCTION_H_