Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 1 | // Ceres Solver - A fast non-linear least squares minimizer |
| 2 | // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. |
| 3 | // http://code.google.com/p/ceres-solver/ |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright notice, |
| 9 | // this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 11 | // this list of conditions and the following disclaimer in the documentation |
| 12 | // and/or other materials provided with the distribution. |
| 13 | // * Neither the name of Google Inc. nor the names of its contributors may be |
| 14 | // used to endorse or promote products derived from this software without |
| 15 | // specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | // POSSIBILITY OF SUCH DAMAGE. |
| 28 | // |
| 29 | // Author: sameeragarwal@google.com (Sameer Agarwal) |
| 30 | // |
| 31 | // The LossFunction interface is the way users describe how residuals |
| 32 | // are converted to cost terms for the overall problem cost function. |
| 33 | // For the exact manner in which loss functions are converted to the |
| 34 | // overall cost for a problem, see problem.h. |
| 35 | // |
| 36 | // For least squares problem where there are no outliers and standard |
| 37 | // squared loss is expected, it is not necessary to create a loss |
| 38 | // function; instead passing a NULL to the problem when adding |
| 39 | // residuals implies a standard squared loss. |
| 40 | // |
| 41 | // For least squares problems where the minimization may encounter |
| 42 | // input terms that contain outliers, that is, completely bogus |
| 43 | // measurements, it is important to use a loss function that reduces |
| 44 | // their associated penalty. |
| 45 | // |
| 46 | // Consider a structure from motion problem. The unknowns are 3D |
| 47 | // points and camera parameters, and the measurements are image |
| 48 | // coordinates describing the expected reprojected position for a |
| 49 | // point in a camera. For example, we want to model the geometry of a |
| 50 | // street scene with fire hydrants and cars, observed by a moving |
| 51 | // camera with unknown parameters, and the only 3D points we care |
| 52 | // about are the pointy tippy-tops of the fire hydrants. Our magic |
| 53 | // image processing algorithm, which is responsible for producing the |
| 54 | // measurements that are input to Ceres, has found and matched all |
| 55 | // such tippy-tops in all image frames, except that in one of the |
| 56 | // frame it mistook a car's headlight for a hydrant. If we didn't do |
| 57 | // anything special (i.e. if we used a basic quadratic loss), the |
| 58 | // residual for the erroneous measurement will result in extreme error |
| 59 | // due to the quadratic nature of squared loss. This results in the |
| 60 | // entire solution getting pulled away from the optimimum to reduce |
| 61 | // the large error that would otherwise be attributed to the wrong |
| 62 | // measurement. |
| 63 | // |
| 64 | // Using a robust loss function, the cost for large residuals is |
| 65 | // reduced. In the example above, this leads to outlier terms getting |
| 66 | // downweighted so they do not overly influence the final solution. |
| 67 | // |
| 68 | // What cost function is best? |
| 69 | // |
| 70 | // In general, there isn't a principled way to select a robust loss |
| 71 | // function. The authors suggest starting with a non-robust cost, then |
| 72 | // only experimenting with robust loss functions if standard squared |
| 73 | // loss doesn't work. |
| 74 | |
| 75 | #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_ |
| 76 | #define CERES_PUBLIC_LOSS_FUNCTION_H_ |
| 77 | |
Björn Piltz | c0b8838 | 2014-05-19 08:21:00 +0200 | [diff] [blame] | 78 | #include "glog/logging.h" |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 79 | #include "ceres/internal/macros.h" |
| 80 | #include "ceres/internal/scoped_ptr.h" |
| 81 | #include "ceres/types.h" |
Björn Piltz | c0b8838 | 2014-05-19 08:21:00 +0200 | [diff] [blame] | 82 | #include "ceres/internal/disable_warnings.h" |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 83 | |
| 84 | namespace ceres { |
| 85 | |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 86 | class CERES_EXPORT LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 87 | public: |
| 88 | virtual ~LossFunction() {} |
| 89 | |
| 90 | // For a residual vector with squared 2-norm 'sq_norm', this method |
| 91 | // is required to fill in the value and derivatives of the loss |
| 92 | // function (rho in this example): |
| 93 | // |
| 94 | // out[0] = rho(sq_norm), |
| 95 | // out[1] = rho'(sq_norm), |
| 96 | // out[2] = rho''(sq_norm), |
| 97 | // |
| 98 | // Here the convention is that the contribution of a term to the |
| 99 | // cost function is given by 1/2 rho(s), where |
| 100 | // |
| 101 | // s = ||residuals||^2. |
| 102 | // |
| 103 | // Calling the method with a negative value of 's' is an error and |
| 104 | // the implementations are not required to handle that case. |
| 105 | // |
| 106 | // Most sane choices of rho() satisfy: |
| 107 | // |
| 108 | // rho(0) = 0, |
| 109 | // rho'(0) = 1, |
| 110 | // rho'(s) < 1 in outlier region, |
| 111 | // rho''(s) < 0 in outlier region, |
| 112 | // |
| 113 | // so that they mimic the least squares cost for small residuals. |
| 114 | virtual void Evaluate(double sq_norm, double out[3]) const = 0; |
| 115 | }; |
| 116 | |
| 117 | // Some common implementations follow below. |
| 118 | // |
| 119 | // Note: in the region of interest (i.e. s < 3) we have: |
| 120 | // TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss |
| 121 | |
| 122 | |
| 123 | // This corresponds to no robustification. |
| 124 | // |
| 125 | // rho(s) = s |
| 126 | // |
| 127 | // At s = 0: rho = [0, 1, 0]. |
| 128 | // |
| 129 | // It is not normally necessary to use this, as passing NULL for the |
| 130 | // loss function when building the problem accomplishes the same |
| 131 | // thing. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 132 | class CERES_EXPORT TrivialLoss : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 133 | public: |
| 134 | virtual void Evaluate(double, double*) const; |
| 135 | }; |
| 136 | |
| 137 | // Scaling |
| 138 | // ------- |
| 139 | // Given one robustifier |
| 140 | // s -> rho(s) |
| 141 | // one can change the length scale at which robustification takes |
| 142 | // place, by adding a scale factor 'a' as follows: |
| 143 | // |
| 144 | // s -> a^2 rho(s / a^2). |
| 145 | // |
| 146 | // The first and second derivatives are: |
| 147 | // |
| 148 | // s -> rho'(s / a^2), |
| 149 | // s -> (1 / a^2) rho''(s / a^2), |
| 150 | // |
| 151 | // but the behaviour near s = 0 is the same as the original function, |
| 152 | // i.e. |
| 153 | // |
| 154 | // rho(s) = s + higher order terms, |
| 155 | // a^2 rho(s / a^2) = s + higher order terms. |
| 156 | // |
| 157 | // The scalar 'a' should be positive. |
| 158 | // |
| 159 | // The reason for the appearance of squaring is that 'a' is in the |
| 160 | // units of the residual vector norm whereas 's' is a squared |
| 161 | // norm. For applications it is more convenient to specify 'a' than |
| 162 | // its square. The commonly used robustifiers below are described in |
| 163 | // un-scaled format (a = 1) but their implementations work for any |
| 164 | // non-zero value of 'a'. |
| 165 | |
| 166 | // Huber. |
| 167 | // |
| 168 | // rho(s) = s for s <= 1, |
| 169 | // rho(s) = 2 sqrt(s) - 1 for s >= 1. |
| 170 | // |
| 171 | // At s = 0: rho = [0, 1, 0]. |
| 172 | // |
| 173 | // The scaling parameter 'a' corresponds to 'delta' on this page: |
| 174 | // http://en.wikipedia.org/wiki/Huber_Loss_Function |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 175 | class CERES_EXPORT HuberLoss : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 176 | public: |
| 177 | explicit HuberLoss(double a) : a_(a), b_(a * a) { } |
| 178 | virtual void Evaluate(double, double*) const; |
Sameer Agarwal | 0c714a7 | 2012-08-20 11:18:16 -0700 | [diff] [blame] | 179 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 180 | private: |
| 181 | const double a_; |
| 182 | // b = a^2. |
| 183 | const double b_; |
| 184 | }; |
| 185 | |
| 186 | // Soft L1, similar to Huber but smooth. |
| 187 | // |
| 188 | // rho(s) = 2 (sqrt(1 + s) - 1). |
| 189 | // |
| 190 | // At s = 0: rho = [0, 1, -1/2]. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 191 | class CERES_EXPORT SoftLOneLoss : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 192 | public: |
| 193 | explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { } |
| 194 | virtual void Evaluate(double, double*) const; |
Sameer Agarwal | 0c714a7 | 2012-08-20 11:18:16 -0700 | [diff] [blame] | 195 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 196 | private: |
| 197 | // b = a^2. |
| 198 | const double b_; |
| 199 | // c = 1 / a^2. |
| 200 | const double c_; |
| 201 | }; |
| 202 | |
| 203 | // Inspired by the Cauchy distribution |
| 204 | // |
| 205 | // rho(s) = log(1 + s). |
| 206 | // |
| 207 | // At s = 0: rho = [0, 1, -1]. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 208 | class CERES_EXPORT CauchyLoss : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 209 | public: |
| 210 | explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { } |
| 211 | virtual void Evaluate(double, double*) const; |
Sameer Agarwal | 0c714a7 | 2012-08-20 11:18:16 -0700 | [diff] [blame] | 212 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 213 | private: |
| 214 | // b = a^2. |
| 215 | const double b_; |
| 216 | // c = 1 / a^2. |
| 217 | const double c_; |
| 218 | }; |
| 219 | |
Sameer Agarwal | ad1f7b7 | 2012-08-20 11:10:34 -0700 | [diff] [blame] | 220 | // Loss that is capped beyond a certain level using the arc-tangent function. |
| 221 | // The scaling parameter 'a' determines the level where falloff occurs. |
| 222 | // For costs much smaller than 'a', the loss function is linear and behaves like |
| 223 | // TrivialLoss, and for values much larger than 'a' the value asymptotically |
| 224 | // approaches the constant value of a * PI / 2. |
| 225 | // |
| 226 | // rho(s) = a atan(s / a). |
| 227 | // |
| 228 | // At s = 0: rho = [0, 1, 0]. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 229 | class CERES_EXPORT ArctanLoss : public LossFunction { |
Sameer Agarwal | ad1f7b7 | 2012-08-20 11:10:34 -0700 | [diff] [blame] | 230 | public: |
| 231 | explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { } |
| 232 | virtual void Evaluate(double, double*) const; |
Sameer Agarwal | 0c714a7 | 2012-08-20 11:18:16 -0700 | [diff] [blame] | 233 | |
Sameer Agarwal | ad1f7b7 | 2012-08-20 11:10:34 -0700 | [diff] [blame] | 234 | private: |
| 235 | const double a_; |
| 236 | // b = 1 / a^2. |
| 237 | const double b_; |
| 238 | }; |
| 239 | |
| 240 | // Loss function that maps to approximately zero cost in a range around the |
| 241 | // origin, and reverts to linear in error (quadratic in cost) beyond this range. |
| 242 | // The tolerance parameter 'a' sets the nominal point at which the |
| 243 | // transition occurs, and the transition size parameter 'b' sets the nominal |
| 244 | // distance over which most of the transition occurs. Both a and b must be |
| 245 | // greater than zero, and typically b will be set to a fraction of a. |
| 246 | // The slope rho'[s] varies smoothly from about 0 at s <= a - b to |
| 247 | // about 1 at s >= a + b. |
| 248 | // |
| 249 | // The term is computed as: |
| 250 | // |
| 251 | // rho(s) = b log(1 + exp((s - a) / b)) - c0. |
| 252 | // |
| 253 | // where c0 is chosen so that rho(0) == 0 |
| 254 | // |
| 255 | // c0 = b log(1 + exp(-a / b) |
| 256 | // |
| 257 | // This has the following useful properties: |
| 258 | // |
| 259 | // rho(s) == 0 for s = 0 |
| 260 | // rho'(s) ~= 0 for s << a - b |
| 261 | // rho'(s) ~= 1 for s >> a + b |
| 262 | // rho''(s) > 0 for all s |
| 263 | // |
| 264 | // In addition, all derivatives are continuous, and the curvature is |
| 265 | // concentrated in the range a - b to a + b. |
| 266 | // |
| 267 | // At s = 0: rho = [0, ~0, ~0]. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 268 | class CERES_EXPORT TolerantLoss : public LossFunction { |
Sameer Agarwal | ad1f7b7 | 2012-08-20 11:10:34 -0700 | [diff] [blame] | 269 | public: |
| 270 | explicit TolerantLoss(double a, double b); |
| 271 | virtual void Evaluate(double, double*) const; |
| 272 | |
| 273 | private: |
| 274 | const double a_, b_, c_; |
| 275 | }; |
| 276 | |
| 277 | // Composition of two loss functions. The error is the result of first |
| 278 | // evaluating g followed by f to yield the composition f(g(s)). |
| 279 | // The loss functions must not be NULL. |
| 280 | class ComposedLoss : public LossFunction { |
| 281 | public: |
| 282 | explicit ComposedLoss(const LossFunction* f, Ownership ownership_f, |
| 283 | const LossFunction* g, Ownership ownership_g); |
| 284 | virtual ~ComposedLoss(); |
| 285 | virtual void Evaluate(double, double*) const; |
| 286 | |
| 287 | private: |
| 288 | internal::scoped_ptr<const LossFunction> f_, g_; |
| 289 | const Ownership ownership_f_, ownership_g_; |
| 290 | }; |
| 291 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 292 | // The discussion above has to do with length scaling: it affects the space |
| 293 | // in which s is measured. Sometimes you want to simply scale the output |
| 294 | // value of the robustifier. For example, you might want to weight |
| 295 | // different error terms differently (e.g., weight pixel reprojection |
| 296 | // errors differently from terrain errors). |
| 297 | // |
| 298 | // If rho is the wrapped robustifier, then this simply outputs |
| 299 | // s -> a * rho(s) |
| 300 | // |
| 301 | // The first and second derivatives are, not surprisingly |
| 302 | // s -> a * rho'(s) |
| 303 | // s -> a * rho''(s) |
| 304 | // |
| 305 | // Since we treat the a NULL Loss function as the Identity loss |
| 306 | // function, rho = NULL is a valid input and will result in the input |
| 307 | // being scaled by a. This provides a simple way of implementing a |
| 308 | // scaled ResidualBlock. |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 309 | class CERES_EXPORT ScaledLoss : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 310 | public: |
| 311 | // Constructs a ScaledLoss wrapping another loss function. Takes |
| 312 | // ownership of the wrapped loss function or not depending on the |
| 313 | // ownership parameter. |
| 314 | ScaledLoss(const LossFunction* rho, double a, Ownership ownership) : |
| 315 | rho_(rho), a_(a), ownership_(ownership) { } |
| 316 | |
| 317 | virtual ~ScaledLoss() { |
| 318 | if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { |
| 319 | rho_.release(); |
| 320 | } |
| 321 | } |
| 322 | virtual void Evaluate(double, double*) const; |
| 323 | |
| 324 | private: |
| 325 | internal::scoped_ptr<const LossFunction> rho_; |
| 326 | const double a_; |
| 327 | const Ownership ownership_; |
Sameer Agarwal | 237d659 | 2012-05-30 20:34:49 -0700 | [diff] [blame] | 328 | CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss); |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 329 | }; |
| 330 | |
| 331 | // Sometimes after the optimization problem has been constructed, we |
| 332 | // wish to mutate the scale of the loss function. For example, when |
| 333 | // performing estimation from data which has substantial outliers, |
| 334 | // convergence can be improved by starting out with a large scale, |
| 335 | // optimizing the problem and then reducing the scale. This can have |
| 336 | // better convergence behaviour than just using a loss function with a |
| 337 | // small scale. |
| 338 | // |
| 339 | // This templated class allows the user to implement a loss function |
| 340 | // whose scale can be mutated after an optimization problem has been |
| 341 | // constructed. |
| 342 | // |
| 343 | // Example usage |
| 344 | // |
| 345 | // Problem problem; |
| 346 | // |
| 347 | // // Add parameter blocks |
| 348 | // |
| 349 | // CostFunction* cost_function = |
| 350 | // new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>( |
Sameer Agarwal | ebbb984 | 2013-05-26 12:40:12 -0700 | [diff] [blame] | 351 | // new UW_Camera_Mapper(feature_x, feature_y)); |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 352 | // |
| 353 | // LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP); |
| 354 | // |
| 355 | // problem.AddResidualBlock(cost_function, loss_function, parameters); |
| 356 | // |
| 357 | // Solver::Options options; |
Sameer Agarwal | ebbb984 | 2013-05-26 12:40:12 -0700 | [diff] [blame] | 358 | // Solger::Summary summary; |
| 359 | // |
| 360 | // Solve(options, &problem, &summary) |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 361 | // |
| 362 | // loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP); |
| 363 | // |
Sameer Agarwal | ebbb984 | 2013-05-26 12:40:12 -0700 | [diff] [blame] | 364 | // Solve(options, &problem, &summary) |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 365 | // |
Björn Piltz | 5d7eed8 | 2014-04-23 22:13:37 +0200 | [diff] [blame] | 366 | class CERES_EXPORT LossFunctionWrapper : public LossFunction { |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 367 | public: |
| 368 | LossFunctionWrapper(LossFunction* rho, Ownership ownership) |
| 369 | : rho_(rho), ownership_(ownership) { |
| 370 | } |
| 371 | |
| 372 | virtual ~LossFunctionWrapper() { |
| 373 | if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { |
| 374 | rho_.release(); |
| 375 | } |
| 376 | } |
| 377 | |
| 378 | virtual void Evaluate(double sq_norm, double out[3]) const { |
| 379 | CHECK_NOTNULL(rho_.get()); |
| 380 | rho_->Evaluate(sq_norm, out); |
| 381 | } |
| 382 | |
| 383 | void Reset(LossFunction* rho, Ownership ownership) { |
| 384 | if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { |
| 385 | rho_.release(); |
| 386 | } |
| 387 | rho_.reset(rho); |
| 388 | ownership_ = ownership; |
| 389 | } |
| 390 | |
| 391 | private: |
| 392 | internal::scoped_ptr<const LossFunction> rho_; |
| 393 | Ownership ownership_; |
Sameer Agarwal | 237d659 | 2012-05-30 20:34:49 -0700 | [diff] [blame] | 394 | CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper); |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 395 | }; |
| 396 | |
| 397 | } // namespace ceres |
| 398 | |
Björn Piltz | c0b8838 | 2014-05-19 08:21:00 +0200 | [diff] [blame] | 399 | #include "ceres/internal/disable_warnings.h" |
| 400 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 401 | #endif // CERES_PUBLIC_LOSS_FUNCTION_H_ |