1// Ceres Solver - A fast non-linear least squares minimizer 2// Copyright 2010, 2011, 2012 Google Inc. All rights reserved. 3// http://code.google.com/p/ceres-solver/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are met: 7// 8// * Redistributions of source code must retain the above copyright notice, 9// this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above copyright notice, 11// this list of conditions and the following disclaimer in the documentation 12// and/or other materials provided with the distribution. 13// * Neither the name of Google Inc. nor the names of its contributors may be 14// used to endorse or promote products derived from this software without 15// specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27// POSSIBILITY OF SUCH DAMAGE. 28// 29// Author: sameeragarwal@google.com (Sameer Agarwal) 30// 31// The LossFunction interface is the way users describe how residuals 32// are converted to cost terms for the overall problem cost function. 33// For the exact manner in which loss functions are converted to the 34// overall cost for a problem, see problem.h. 35// 36// For least squares problem where there are no outliers and standard 37// squared loss is expected, it is not necessary to create a loss 38// function; instead passing a NULL to the problem when adding 39// residuals implies a standard squared loss. 40// 41// For least squares problems where the minimization may encounter 42// input terms that contain outliers, that is, completely bogus 43// measurements, it is important to use a loss function that reduces 44// their associated penalty. 45// 46// Consider a structure from motion problem. The unknowns are 3D 47// points and camera parameters, and the measurements are image 48// coordinates describing the expected reprojected position for a 49// point in a camera. For example, we want to model the geometry of a 50// street scene with fire hydrants and cars, observed by a moving 51// camera with unknown parameters, and the only 3D points we care 52// about are the pointy tippy-tops of the fire hydrants. Our magic 53// image processing algorithm, which is responsible for producing the 54// measurements that are input to Ceres, has found and matched all 55// such tippy-tops in all image frames, except that in one of the 56// frame it mistook a car's headlight for a hydrant. If we didn't do 57// anything special (i.e. if we used a basic quadratic loss), the 58// residual for the erroneous measurement will result in extreme error 59// due to the quadratic nature of squared loss. This results in the 60// entire solution getting pulled away from the optimimum to reduce 61// the large error that would otherwise be attributed to the wrong 62// measurement. 63// 64// Using a robust loss function, the cost for large residuals is 65// reduced. In the example above, this leads to outlier terms getting 66// downweighted so they do not overly influence the final solution. 67// 68// What cost function is best? 69// 70// In general, there isn't a principled way to select a robust loss 71// function. The authors suggest starting with a non-robust cost, then 72// only experimenting with robust loss functions if standard squared 73// loss doesn't work. 74 75#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_ 76#define CERES_PUBLIC_LOSS_FUNCTION_H_ 77 78#include "ceres/internal/macros.h" 79#include "ceres/internal/scoped_ptr.h" 80#include "ceres/types.h" 81#include "glog/logging.h" 82 83namespace ceres { 84 85class LossFunction { 86 public: 87 virtual ~LossFunction() {} 88 89 // For a residual vector with squared 2-norm 'sq_norm', this method 90 // is required to fill in the value and derivatives of the loss 91 // function (rho in this example): 92 // 93 // out[0] = rho(sq_norm), 94 // out[1] = rho'(sq_norm), 95 // out[2] = rho''(sq_norm), 96 // 97 // Here the convention is that the contribution of a term to the 98 // cost function is given by 1/2 rho(s), where 99 // 100 // s = ||residuals||^2. 101 // 102 // Calling the method with a negative value of 's' is an error and 103 // the implementations are not required to handle that case. 104 // 105 // Most sane choices of rho() satisfy: 106 // 107 // rho(0) = 0, 108 // rho'(0) = 1, 109 // rho'(s) < 1 in outlier region, 110 // rho''(s) < 0 in outlier region, 111 // 112 // so that they mimic the least squares cost for small residuals. 113 virtual void Evaluate(double sq_norm, double out[3]) const = 0; 114}; 115 116// Some common implementations follow below. 117// 118// Note: in the region of interest (i.e. s < 3) we have: 119// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss 120 121 122// This corresponds to no robustification. 123// 124// rho(s) = s 125// 126// At s = 0: rho = [0, 1, 0]. 127// 128// It is not normally necessary to use this, as passing NULL for the 129// loss function when building the problem accomplishes the same 130// thing. 131class TrivialLoss : public LossFunction { 132 public: 133 virtual void Evaluate(double, double*) const; 134}; 135 136// Scaling 137// ------- 138// Given one robustifier 139// s -> rho(s) 140// one can change the length scale at which robustification takes 141// place, by adding a scale factor 'a' as follows: 142// 143// s -> a^2 rho(s / a^2). 144// 145// The first and second derivatives are: 146// 147// s -> rho'(s / a^2), 148// s -> (1 / a^2) rho''(s / a^2), 149// 150// but the behaviour near s = 0 is the same as the original function, 151// i.e. 152// 153// rho(s) = s + higher order terms, 154// a^2 rho(s / a^2) = s + higher order terms. 155// 156// The scalar 'a' should be positive. 157// 158// The reason for the appearance of squaring is that 'a' is in the 159// units of the residual vector norm whereas 's' is a squared 160// norm. For applications it is more convenient to specify 'a' than 161// its square. The commonly used robustifiers below are described in 162// un-scaled format (a = 1) but their implementations work for any 163// non-zero value of 'a'. 164 165// Huber. 166// 167// rho(s) = s for s <= 1, 168// rho(s) = 2 sqrt(s) - 1 for s >= 1. 169// 170// At s = 0: rho = [0, 1, 0]. 171// 172// The scaling parameter 'a' corresponds to 'delta' on this page: 173// http://en.wikipedia.org/wiki/Huber_Loss_Function 174class HuberLoss : public LossFunction { 175 public: 176 explicit HuberLoss(double a) : a_(a), b_(a * a) { } 177 virtual void Evaluate(double, double*) const; 178 179 private: 180 const double a_; 181 // b = a^2. 182 const double b_; 183}; 184 185// Soft L1, similar to Huber but smooth. 186// 187// rho(s) = 2 (sqrt(1 + s) - 1). 188// 189// At s = 0: rho = [0, 1, -1/2]. 190class SoftLOneLoss : public LossFunction { 191 public: 192 explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { } 193 virtual void Evaluate(double, double*) const; 194 195 private: 196 // b = a^2. 197 const double b_; 198 // c = 1 / a^2. 199 const double c_; 200}; 201 202// Inspired by the Cauchy distribution 203// 204// rho(s) = log(1 + s). 205// 206// At s = 0: rho = [0, 1, -1]. 207class CauchyLoss : public LossFunction { 208 public: 209 explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { } 210 virtual void Evaluate(double, double*) const; 211 212 private: 213 // b = a^2. 214 const double b_; 215 // c = 1 / a^2. 216 const double c_; 217}; 218 219// Loss that is capped beyond a certain level using the arc-tangent function. 220// The scaling parameter 'a' determines the level where falloff occurs. 221// For costs much smaller than 'a', the loss function is linear and behaves like 222// TrivialLoss, and for values much larger than 'a' the value asymptotically 223// approaches the constant value of a * PI / 2. 224// 225// rho(s) = a atan(s / a). 226// 227// At s = 0: rho = [0, 1, 0]. 228class ArctanLoss : public LossFunction { 229 public: 230 explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { } 231 virtual void Evaluate(double, double*) const; 232 233 private: 234 const double a_; 235 // b = 1 / a^2. 236 const double b_; 237}; 238 239// Loss function that maps to approximately zero cost in a range around the 240// origin, and reverts to linear in error (quadratic in cost) beyond this range. 241// The tolerance parameter 'a' sets the nominal point at which the 242// transition occurs, and the transition size parameter 'b' sets the nominal 243// distance over which most of the transition occurs. Both a and b must be 244// greater than zero, and typically b will be set to a fraction of a. 245// The slope rho'[s] varies smoothly from about 0 at s <= a - b to 246// about 1 at s >= a + b. 247// 248// The term is computed as: 249// 250// rho(s) = b log(1 + exp((s - a) / b)) - c0. 251// 252// where c0 is chosen so that rho(0) == 0 253// 254// c0 = b log(1 + exp(-a / b) 255// 256// This has the following useful properties: 257// 258// rho(s) == 0 for s = 0 259// rho'(s) ~= 0 for s << a - b 260// rho'(s) ~= 1 for s >> a + b 261// rho''(s) > 0 for all s 262// 263// In addition, all derivatives are continuous, and the curvature is 264// concentrated in the range a - b to a + b. 265// 266// At s = 0: rho = [0, ~0, ~0]. 267class TolerantLoss : public LossFunction { 268 public: 269 explicit TolerantLoss(double a, double b); 270 virtual void Evaluate(double, double*) const; 271 272 private: 273 const double a_, b_, c_; 274}; 275 276// Composition of two loss functions. The error is the result of first 277// evaluating g followed by f to yield the composition f(g(s)). 278// The loss functions must not be NULL. 279class ComposedLoss : public LossFunction { 280 public: 281 explicit ComposedLoss(const LossFunction* f, Ownership ownership_f, 282 const LossFunction* g, Ownership ownership_g); 283 virtual ~ComposedLoss(); 284 virtual void Evaluate(double, double*) const; 285 286 private: 287 internal::scoped_ptr<const LossFunction> f_, g_; 288 const Ownership ownership_f_, ownership_g_; 289}; 290 291// The discussion above has to do with length scaling: it affects the space 292// in which s is measured. Sometimes you want to simply scale the output 293// value of the robustifier. For example, you might want to weight 294// different error terms differently (e.g., weight pixel reprojection 295// errors differently from terrain errors). 296// 297// If rho is the wrapped robustifier, then this simply outputs 298// s -> a * rho(s) 299// 300// The first and second derivatives are, not surprisingly 301// s -> a * rho'(s) 302// s -> a * rho''(s) 303// 304// Since we treat the a NULL Loss function as the Identity loss 305// function, rho = NULL is a valid input and will result in the input 306// being scaled by a. This provides a simple way of implementing a 307// scaled ResidualBlock. 308class ScaledLoss : public LossFunction { 309 public: 310 // Constructs a ScaledLoss wrapping another loss function. Takes 311 // ownership of the wrapped loss function or not depending on the 312 // ownership parameter. 313 ScaledLoss(const LossFunction* rho, double a, Ownership ownership) : 314 rho_(rho), a_(a), ownership_(ownership) { } 315 316 virtual ~ScaledLoss() { 317 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 318 rho_.release(); 319 } 320 } 321 virtual void Evaluate(double, double*) const; 322 323 private: 324 internal::scoped_ptr<const LossFunction> rho_; 325 const double a_; 326 const Ownership ownership_; 327 CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss); 328}; 329 330// Sometimes after the optimization problem has been constructed, we 331// wish to mutate the scale of the loss function. For example, when 332// performing estimation from data which has substantial outliers, 333// convergence can be improved by starting out with a large scale, 334// optimizing the problem and then reducing the scale. This can have 335// better convergence behaviour than just using a loss function with a 336// small scale. 337// 338// This templated class allows the user to implement a loss function 339// whose scale can be mutated after an optimization problem has been 340// constructed. 341// 342// Example usage 343// 344// Problem problem; 345// 346// // Add parameter blocks 347// 348// CostFunction* cost_function = 349// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>( 350// new UW_Camera_Mapper(feature_x, feature_y)); 351// 352// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP); 353// 354// problem.AddResidualBlock(cost_function, loss_function, parameters); 355// 356// Solver::Options options; 357// Solger::Summary summary; 358// 359// Solve(options, &problem, &summary) 360// 361// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP); 362// 363// Solve(options, &problem, &summary) 364// 365class LossFunctionWrapper : public LossFunction { 366 public: 367 LossFunctionWrapper(LossFunction* rho, Ownership ownership) 368 : rho_(rho), ownership_(ownership) { 369 } 370 371 virtual ~LossFunctionWrapper() { 372 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 373 rho_.release(); 374 } 375 } 376 377 virtual void Evaluate(double sq_norm, double out[3]) const { 378 CHECK_NOTNULL(rho_.get()); 379 rho_->Evaluate(sq_norm, out); 380 } 381 382 void Reset(LossFunction* rho, Ownership ownership) { 383 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 384 rho_.release(); 385 } 386 rho_.reset(rho); 387 ownership_ = ownership; 388 } 389 390 private: 391 internal::scoped_ptr<const LossFunction> rho_; 392 Ownership ownership_; 393 CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper); 394}; 395 396} // namespace ceres 397 398#endif // CERES_PUBLIC_LOSS_FUNCTION_H_ 399