Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 1 | // Ceres Solver - A fast non-linear least squares minimizer |
| 2 | // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. |
| 3 | // http://code.google.com/p/ceres-solver/ |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright notice, |
| 9 | // this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 11 | // this list of conditions and the following disclaimer in the documentation |
| 12 | // and/or other materials provided with the distribution. |
| 13 | // * Neither the name of Google Inc. nor the names of its contributors may be |
| 14 | // used to endorse or promote products derived from this software without |
| 15 | // specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | // POSSIBILITY OF SUCH DAMAGE. |
| 28 | // |
| 29 | // Author: sameeragarwal@google.com (Sameer Agarwal) |
| 30 | |
| 31 | #ifndef CERES_PUBLIC_SOLVER_H_ |
| 32 | #define CERES_PUBLIC_SOLVER_H_ |
| 33 | |
| 34 | #include <cmath> |
| 35 | #include <string> |
| 36 | #include <vector> |
Sameer Agarwal | 4997cbc | 2012-07-02 12:44:34 -0700 | [diff] [blame] | 37 | #include "ceres/crs_matrix.h" |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 38 | #include "ceres/internal/macros.h" |
| 39 | #include "ceres/internal/port.h" |
Sameer Agarwal | 4997cbc | 2012-07-02 12:44:34 -0700 | [diff] [blame] | 40 | #include "ceres/iteration_callback.h" |
Sameer Agarwal | 2c94eed | 2012-10-01 16:34:37 -0700 | [diff] [blame] | 41 | #include "ceres/ordered_groups.h" |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 42 | #include "ceres/types.h" |
| 43 | |
| 44 | namespace ceres { |
| 45 | |
| 46 | class Problem; |
| 47 | |
| 48 | // Interface for non-linear least squares solvers. |
| 49 | class Solver { |
| 50 | public: |
| 51 | virtual ~Solver(); |
| 52 | |
| 53 | // The options structure contains, not surprisingly, options that control how |
| 54 | // the solver operates. The defaults should be suitable for a wide range of |
| 55 | // problems; however, better performance is often obtainable with tweaking. |
| 56 | // |
| 57 | // The constants are defined inside types.h |
| 58 | struct Options { |
| 59 | // Default constructor that sets up a generic sparse problem. |
| 60 | Options() { |
Sameer Agarwal | f4d0164 | 2012-11-26 12:55:58 -0800 | [diff] [blame] | 61 | minimizer_type = TRUST_REGION; |
Sameer Agarwal | d010de5 | 2013-02-15 14:26:56 -0800 | [diff] [blame] | 62 | line_search_direction_type = LBFGS; |
Sameer Agarwal | f4d0164 | 2012-11-26 12:55:58 -0800 | [diff] [blame] | 63 | line_search_type = ARMIJO; |
| 64 | nonlinear_conjugate_gradient_type = FLETCHER_REEVES; |
Sameer Agarwal | aed9961 | 2012-11-29 10:33:19 -0800 | [diff] [blame] | 65 | max_lbfgs_rank = 20; |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 66 | trust_region_strategy_type = LEVENBERG_MARQUARDT; |
Markus Moll | 51cf7cb | 2012-08-20 20:10:20 +0200 | [diff] [blame] | 67 | dogleg_type = TRADITIONAL_DOGLEG; |
Sameer Agarwal | a8f87d7 | 2012-08-08 10:38:31 -0700 | [diff] [blame] | 68 | use_nonmonotonic_steps = false; |
| 69 | max_consecutive_nonmonotonic_steps = 5; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 70 | max_num_iterations = 50; |
Sameer Agarwal | fa01519 | 2012-06-11 14:21:42 -0700 | [diff] [blame] | 71 | max_solver_time_in_seconds = 1e9; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 72 | num_threads = 1; |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 73 | initial_trust_region_radius = 1e4; |
| 74 | max_trust_region_radius = 1e16; |
| 75 | min_trust_region_radius = 1e-32; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 76 | min_relative_decrease = 1e-3; |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 77 | lm_min_diagonal = 1e-6; |
| 78 | lm_max_diagonal = 1e32; |
| 79 | max_num_consecutive_invalid_steps = 5; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 80 | function_tolerance = 1e-6; |
| 81 | gradient_tolerance = 1e-10; |
| 82 | parameter_tolerance = 1e-8; |
Sameer Agarwal | b051873 | 2012-05-29 00:27:57 -0700 | [diff] [blame] | 83 | |
| 84 | #if defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CXSPARSE) |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 85 | linear_solver_type = DENSE_QR; |
Sameer Agarwal | b051873 | 2012-05-29 00:27:57 -0700 | [diff] [blame] | 86 | #else |
| 87 | linear_solver_type = SPARSE_NORMAL_CHOLESKY; |
| 88 | #endif |
| 89 | |
Sameer Agarwal | 97fb6d9 | 2012-06-17 10:08:19 -0700 | [diff] [blame] | 90 | preconditioner_type = JACOBI; |
| 91 | |
Sameer Agarwal | b051873 | 2012-05-29 00:27:57 -0700 | [diff] [blame] | 92 | sparse_linear_algebra_library = SUITE_SPARSE; |
| 93 | #if defined(CERES_NO_SUITESPARSE) && !defined(CERES_NO_CXSPARSE) |
| 94 | sparse_linear_algebra_library = CX_SPARSE; |
| 95 | #endif |
| 96 | |
Sameer Agarwal | 97fb6d9 | 2012-06-17 10:08:19 -0700 | [diff] [blame] | 97 | num_linear_solver_threads = 1; |
Sameer Agarwal | 97fb6d9 | 2012-06-17 10:08:19 -0700 | [diff] [blame] | 98 | |
Sameer Agarwal | 7a3c43b | 2012-06-05 23:10:59 -0700 | [diff] [blame] | 99 | #if defined(CERES_NO_SUITESPARSE) |
| 100 | use_block_amd = false; |
| 101 | #else |
| 102 | use_block_amd = true; |
| 103 | #endif |
Sameer Agarwal | 68b32a9 | 2012-10-06 23:10:51 -0700 | [diff] [blame] | 104 | linear_solver_ordering = NULL; |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 105 | use_inner_iterations = false; |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 106 | inner_iteration_ordering = NULL; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 107 | linear_solver_min_num_iterations = 1; |
| 108 | linear_solver_max_num_iterations = 500; |
| 109 | eta = 1e-1; |
| 110 | jacobi_scaling = true; |
| 111 | logging_type = PER_MINIMIZER_ITERATION; |
| 112 | minimizer_progress_to_stdout = false; |
Sameer Agarwal | 82f4b88 | 2012-05-06 21:05:28 -0700 | [diff] [blame] | 113 | lsqp_dump_directory = "/tmp"; |
| 114 | lsqp_dump_format_type = TEXTFILE; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 115 | check_gradients = false; |
| 116 | gradient_check_relative_precision = 1e-8; |
| 117 | numeric_derivative_relative_step_size = 1e-6; |
| 118 | update_state_every_iteration = false; |
| 119 | } |
| 120 | |
Sameer Agarwal | 65625f7 | 2012-09-17 12:06:57 -0700 | [diff] [blame] | 121 | ~Options(); |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 122 | // Minimizer options ---------------------------------------- |
| 123 | |
Sameer Agarwal | f4d0164 | 2012-11-26 12:55:58 -0800 | [diff] [blame] | 124 | // Ceres supports the two major families of optimization strategies - |
| 125 | // Trust Region and Line Search. |
| 126 | // |
| 127 | // 1. The line search approach first finds a descent direction |
| 128 | // along which the objective function will be reduced and then |
| 129 | // computes a step size that decides how far should move along |
| 130 | // that direction. The descent direction can be computed by |
| 131 | // various methods, such as gradient descent, Newton's method and |
| 132 | // Quasi-Newton method. The step size can be determined either |
| 133 | // exactly or inexactly. |
| 134 | // |
| 135 | // 2. The trust region approach approximates the objective |
| 136 | // function using using a model function (often a quadratic) over |
| 137 | // a subset of the search space known as the trust region. If the |
| 138 | // model function succeeds in minimizing the true objective |
| 139 | // function the trust region is expanded; conversely, otherwise it |
| 140 | // is contracted and the model optimization problem is solved |
| 141 | // again. |
| 142 | // |
| 143 | // Trust region methods are in some sense dual to line search methods: |
| 144 | // trust region methods first choose a step size (the size of the |
| 145 | // trust region) and then a step direction while line search methods |
| 146 | // first choose a step direction and then a step size. |
| 147 | MinimizerType minimizer_type; |
| 148 | |
| 149 | LineSearchDirectionType line_search_direction_type; |
| 150 | LineSearchType line_search_type; |
| 151 | NonlinearConjugateGradientType nonlinear_conjugate_gradient_type; |
| 152 | |
Sameer Agarwal | aed9961 | 2012-11-29 10:33:19 -0800 | [diff] [blame] | 153 | // The LBFGS hessian approximation is a low rank approximation to |
| 154 | // the inverse of the Hessian matrix. The rank of the |
| 155 | // approximation determines (linearly) the space and time |
| 156 | // complexity of using the approximation. Higher the rank, the |
Sameer Agarwal | 2293cb5 | 2012-11-29 16:00:18 -0800 | [diff] [blame] | 157 | // better is the quality of the approximation. The increase in |
| 158 | // quality is however is bounded for a number of reasons. |
| 159 | // |
| 160 | // 1. The method only uses secant information and not actual |
| 161 | // derivatives. |
| 162 | // |
| 163 | // 2. The Hessian approximation is constrained to be positive |
| 164 | // definite. |
| 165 | // |
| 166 | // So increasing this rank to a large number will cost time and |
| 167 | // space complexity without the corresponding increase in solution |
| 168 | // quality. There are no hard and fast rules for choosing the |
| 169 | // maximum rank. The best choice usually requires some problem |
| 170 | // specific experimentation. |
| 171 | // |
| 172 | // For more theoretical and implementation details of the LBFGS |
| 173 | // method, please see: |
Sameer Agarwal | aed9961 | 2012-11-29 10:33:19 -0800 | [diff] [blame] | 174 | // |
| 175 | // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with |
| 176 | // Limited Storage". Mathematics of Computation 35 (151): 773–782. |
| 177 | int max_lbfgs_rank; |
| 178 | |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 179 | TrustRegionStrategyType trust_region_strategy_type; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 180 | |
Markus Moll | 51cf7cb | 2012-08-20 20:10:20 +0200 | [diff] [blame] | 181 | // Type of dogleg strategy to use. |
| 182 | DoglegType dogleg_type; |
| 183 | |
Sameer Agarwal | a8f87d7 | 2012-08-08 10:38:31 -0700 | [diff] [blame] | 184 | // The classical trust region methods are descent methods, in that |
| 185 | // they only accept a point if it strictly reduces the value of |
| 186 | // the objective function. |
| 187 | // |
| 188 | // Relaxing this requirement allows the algorithm to be more |
| 189 | // efficient in the long term at the cost of some local increase |
| 190 | // in the value of the objective function. |
| 191 | // |
| 192 | // This is because allowing for non-decreasing objective function |
| 193 | // values in a princpled manner allows the algorithm to "jump over |
| 194 | // boulders" as the method is not restricted to move into narrow |
| 195 | // valleys while preserving its convergence properties. |
| 196 | // |
| 197 | // Setting use_nonmonotonic_steps to true enables the |
| 198 | // non-monotonic trust region algorithm as described by Conn, |
| 199 | // Gould & Toint in "Trust Region Methods", Section 10.1. |
| 200 | // |
| 201 | // The parameter max_consecutive_nonmonotonic_steps controls the |
| 202 | // window size used by the step selection algorithm to accept |
| 203 | // non-monotonic steps. |
| 204 | // |
| 205 | // Even though the value of the objective function may be larger |
| 206 | // than the minimum value encountered over the course of the |
| 207 | // optimization, the final parameters returned to the user are the |
| 208 | // ones corresponding to the minimum cost over all iterations. |
| 209 | bool use_nonmonotonic_steps; |
| 210 | int max_consecutive_nonmonotonic_steps; |
| 211 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 212 | // Maximum number of iterations for the minimizer to run for. |
| 213 | int max_num_iterations; |
| 214 | |
| 215 | // Maximum time for which the minimizer should run for. |
Sameer Agarwal | fa01519 | 2012-06-11 14:21:42 -0700 | [diff] [blame] | 216 | double max_solver_time_in_seconds; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 217 | |
| 218 | // Number of threads used by Ceres for evaluating the cost and |
| 219 | // jacobians. |
| 220 | int num_threads; |
| 221 | |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 222 | // Trust region minimizer settings. |
| 223 | double initial_trust_region_radius; |
| 224 | double max_trust_region_radius; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 225 | |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 226 | // Minimizer terminates when the trust region radius becomes |
| 227 | // smaller than this value. |
| 228 | double min_trust_region_radius; |
Sameer Agarwal | 835911e | 2012-05-14 12:41:10 -0700 | [diff] [blame] | 229 | |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 230 | // Lower bound for the relative decrease before a step is |
| 231 | // accepted. |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 232 | double min_relative_decrease; |
| 233 | |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 234 | // For the Levenberg-Marquadt algorithm, the scaled diagonal of |
| 235 | // the normal equations J'J is used to control the size of the |
| 236 | // trust region. Extremely small and large values along the |
| 237 | // diagonal can make this regularization scheme |
| 238 | // fail. lm_max_diagonal and lm_min_diagonal, clamp the values of |
| 239 | // diag(J'J) from above and below. In the normal course of |
| 240 | // operation, the user should not have to modify these parameters. |
| 241 | double lm_min_diagonal; |
| 242 | double lm_max_diagonal; |
| 243 | |
| 244 | // Sometimes due to numerical conditioning problems or linear |
| 245 | // solver flakiness, the trust region strategy may return a |
| 246 | // numerically invalid step that can be fixed by reducing the |
| 247 | // trust region size. So the TrustRegionMinimizer allows for a few |
| 248 | // successive invalid steps before it declares NUMERICAL_FAILURE. |
| 249 | int max_num_consecutive_invalid_steps; |
| 250 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 251 | // Minimizer terminates when |
| 252 | // |
| 253 | // (new_cost - old_cost) < function_tolerance * old_cost; |
| 254 | // |
| 255 | double function_tolerance; |
| 256 | |
| 257 | // Minimizer terminates when |
| 258 | // |
| 259 | // max_i |gradient_i| < gradient_tolerance * max_i|initial_gradient_i| |
| 260 | // |
| 261 | // This value should typically be 1e-4 * function_tolerance. |
| 262 | double gradient_tolerance; |
| 263 | |
| 264 | // Minimizer terminates when |
| 265 | // |
| 266 | // |step|_2 <= parameter_tolerance * ( |x|_2 + parameter_tolerance) |
| 267 | // |
| 268 | double parameter_tolerance; |
| 269 | |
| 270 | // Linear least squares solver options ------------------------------------- |
| 271 | |
| 272 | LinearSolverType linear_solver_type; |
| 273 | |
| 274 | // Type of preconditioner to use with the iterative linear solvers. |
| 275 | PreconditionerType preconditioner_type; |
| 276 | |
Sameer Agarwal | b051873 | 2012-05-29 00:27:57 -0700 | [diff] [blame] | 277 | // Ceres supports using multiple sparse linear algebra libraries |
| 278 | // for sparse matrix ordering and factorizations. Currently, |
| 279 | // SUITE_SPARSE and CX_SPARSE are the valid choices, depending on |
| 280 | // whether they are linked into Ceres at build time. |
| 281 | SparseLinearAlgebraLibraryType sparse_linear_algebra_library; |
| 282 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 283 | // Number of threads used by Ceres to solve the Newton |
| 284 | // step. Currently only the SPARSE_SCHUR solver is capable of |
| 285 | // using this setting. |
| 286 | int num_linear_solver_threads; |
| 287 | |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 288 | // The order in which variables are eliminated in a linear solver |
| 289 | // can have a significant of impact on the efficiency and accuracy |
| 290 | // of the method. e.g., when doing sparse Cholesky factorization, |
| 291 | // there are matrices for which a good ordering will give a |
| 292 | // Cholesky factor with O(n) storage, where as a bad ordering will |
| 293 | // result in an completely dense factor. |
| 294 | // |
| 295 | // Ceres allows the user to provide varying amounts of hints to |
| 296 | // the solver about the variable elimination ordering to use. This |
| 297 | // can range from no hints, where the solver is free to decide the |
| 298 | // best possible ordering based on the user's choices like the |
| 299 | // linear solver being used, to an exact order in which the |
| 300 | // variables should be eliminated, and a variety of possibilities |
| 301 | // in between. |
| 302 | // |
Sameer Agarwal | 2c94eed | 2012-10-01 16:34:37 -0700 | [diff] [blame] | 303 | // Instances of the ParameterBlockOrdering class are used to |
| 304 | // communicate this information to Ceres. |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 305 | // |
Sameer Agarwal | 2c94eed | 2012-10-01 16:34:37 -0700 | [diff] [blame] | 306 | // Formally an ordering is an ordered partitioning of the |
| 307 | // parameter blocks, i.e, each parameter block belongs to exactly |
| 308 | // one group, and each group has a unique non-negative integer |
| 309 | // associated with it, that determines its order in the set of |
| 310 | // groups. |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 311 | // |
| 312 | // Given such an ordering, Ceres ensures that the parameter blocks in |
| 313 | // the lowest numbered group are eliminated first, and then the |
| 314 | // parmeter blocks in the next lowest numbered group and so on. Within |
| 315 | // each group, Ceres is free to order the parameter blocks as it |
| 316 | // chooses. |
| 317 | // |
Sameer Agarwal | 65625f7 | 2012-09-17 12:06:57 -0700 | [diff] [blame] | 318 | // If NULL, then all parameter blocks are assumed to be in the |
| 319 | // same group and the solver is free to decide the best |
Sameer Agarwal | 2c94eed | 2012-10-01 16:34:37 -0700 | [diff] [blame] | 320 | // ordering. |
| 321 | // |
| 322 | // e.g. Consider the linear system |
| 323 | // |
| 324 | // x + y = 3 |
| 325 | // 2x + 3y = 7 |
| 326 | // |
| 327 | // There are two ways in which it can be solved. First eliminating x |
| 328 | // from the two equations, solving for y and then back substituting |
| 329 | // for x, or first eliminating y, solving for x and back substituting |
| 330 | // for y. The user can construct three orderings here. |
| 331 | // |
| 332 | // {0: x}, {1: y} - eliminate x first. |
| 333 | // {0: y}, {1: x} - eliminate y first. |
| 334 | // {0: x, y} - Solver gets to decide the elimination order. |
| 335 | // |
| 336 | // Thus, to have Ceres determine the ordering automatically using |
| 337 | // heuristics, put all the variables in group 0 and to control the |
| 338 | // ordering for every variable, create groups 0..N-1, one per |
| 339 | // variable, in the desired order. |
| 340 | // |
| 341 | // Bundle Adjustment |
| 342 | // ----------------- |
| 343 | // |
| 344 | // A particular case of interest is bundle adjustment, where the user |
| 345 | // has two options. The default is to not specify an ordering at all, |
| 346 | // the solver will see that the user wants to use a Schur type solver |
| 347 | // and figure out the right elimination ordering. |
| 348 | // |
| 349 | // But if the user already knows what parameter blocks are points and |
| 350 | // what are cameras, they can save preprocessing time by partitioning |
| 351 | // the parameter blocks into two groups, one for the points and one |
| 352 | // for the cameras, where the group containing the points has an id |
| 353 | // smaller than the group containing cameras. |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 354 | // |
| 355 | // Once assigned, Solver::Options owns this pointer and will |
| 356 | // deallocate the memory when destroyed. |
Sameer Agarwal | 68b32a9 | 2012-10-06 23:10:51 -0700 | [diff] [blame] | 357 | ParameterBlockOrdering* linear_solver_ordering; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 358 | |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 359 | // By virtue of the modeling layer in Ceres being block oriented, |
| 360 | // all the matrices used by Ceres are also block oriented. When |
| 361 | // doing sparse direct factorization of these matrices (for |
| 362 | // SPARSE_NORMAL_CHOLESKY, SPARSE_SCHUR and ITERATIVE in |
| 363 | // conjunction with CLUSTER_TRIDIAGONAL AND CLUSTER_JACOBI |
| 364 | // preconditioners), the fill-reducing ordering algorithms can |
| 365 | // either be run on the block or the scalar form of these matrices. |
| 366 | // Running it on the block form exposes more of the super-nodal |
| 367 | // structure of the matrix to the factorization routines. Setting |
| 368 | // this parameter to true runs the ordering algorithms in block |
| 369 | // form. Currently this option only makes sense with |
| 370 | // sparse_linear_algebra_library = SUITE_SPARSE. |
| 371 | bool use_block_amd; |
| 372 | |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 373 | // Some non-linear least squares problems have additional |
| 374 | // structure in the way the parameter blocks interact that it is |
| 375 | // beneficial to modify the way the trust region step is computed. |
| 376 | // |
| 377 | // e.g., consider the following regression problem |
| 378 | // |
| 379 | // y = a_1 exp(b_1 x) + a_2 exp(b_3 x^2 + c_1) |
| 380 | // |
| 381 | // Given a set of pairs{(x_i, y_i)}, the user wishes to estimate |
| 382 | // a_1, a_2, b_1, b_2, and c_1. |
| 383 | // |
| 384 | // Notice here that the expression on the left is linear in a_1 |
| 385 | // and a_2, and given any value for b_1, b_2 and c_1, it is |
| 386 | // possible to use linear regression to estimate the optimal |
| 387 | // values of a_1 and a_2. Indeed, its possible to analytically |
| 388 | // eliminate the variables a_1 and a_2 from the problem all |
| 389 | // together. Problems like these are known as separable least |
| 390 | // squares problem and the most famous algorithm for solving them |
| 391 | // is the Variable Projection algorithm invented by Golub & |
| 392 | // Pereyra. |
| 393 | // |
| 394 | // Similar structure can be found in the matrix factorization with |
| 395 | // missing data problem. There the corresponding algorithm is |
| 396 | // known as Wiberg's algorithm. |
| 397 | // |
| 398 | // Ruhe & Wedin (Algorithms for Separable Nonlinear Least Squares |
| 399 | // Problems, SIAM Reviews, 22(3), 1980) present an analyis of |
| 400 | // various algorithms for solving separable non-linear least |
| 401 | // squares problems and refer to "Variable Projection" as |
| 402 | // Algorithm I in their paper. |
| 403 | // |
| 404 | // Implementing Variable Projection is tedious and expensive, and |
| 405 | // they present a simpler algorithm, which they refer to as |
| 406 | // Algorithm II, where once the Newton/Trust Region step has been |
| 407 | // computed for the whole problem (a_1, a_2, b_1, b_2, c_1) and |
| 408 | // additional optimization step is performed to estimate a_1 and |
| 409 | // a_2 exactly. |
| 410 | // |
| 411 | // This idea can be generalized to cases where the residual is not |
| 412 | // linear in a_1 and a_2, i.e., Solve for the trust region step |
| 413 | // for the full problem, and then use it as the starting point to |
| 414 | // further optimize just a_1 and a_2. For the linear case, this |
| 415 | // amounts to doing a single linear least squares solve. For |
| 416 | // non-linear problems, any method for solving the a_1 and a_2 |
| 417 | // optimization problems will do. The only constraint on a_1 and |
| 418 | // a_2 is that they do not co-occur in any residual block. |
| 419 | // |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 420 | // This idea can be further generalized, by not just optimizing |
| 421 | // (a_1, a_2), but decomposing the graph corresponding to the |
| 422 | // Hessian matrix's sparsity structure in a collection of |
| 423 | // non-overlapping independent sets and optimizing each of them. |
| 424 | // |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 425 | // Setting "use_inner_iterations" to true enables the use of this |
| 426 | // non-linear generalization of Ruhe & Wedin's Algorithm II. This |
| 427 | // version of Ceres has a higher iteration complexity, but also |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 428 | // displays better convergence behaviour per iteration. Setting |
| 429 | // Solver::Options::num_threads to the maximum number possible is |
| 430 | // highly recommended. |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 431 | bool use_inner_iterations; |
| 432 | |
| 433 | // If inner_iterations is true, then the user has two choices. |
| 434 | // |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 435 | // 1. Let the solver heuristically decide which parameter blocks |
| 436 | // to optimize in each inner iteration. To do this leave |
| 437 | // Solver::Options::inner_iteration_ordering untouched. |
Sameer Agarwal | 9123e2f | 2012-09-18 21:49:06 -0700 | [diff] [blame] | 438 | // |
Sameer Agarwal | ba8d967 | 2012-10-02 00:48:57 -0700 | [diff] [blame] | 439 | // 2. Specify a collection of of ordered independent sets. Where |
| 440 | // the lower numbered groups are optimized before the higher |
| 441 | // number groups. Each group must be an independent set. |
| 442 | ParameterBlockOrdering* inner_iteration_ordering; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 443 | |
| 444 | // Minimum number of iterations for which the linear solver should |
| 445 | // run, even if the convergence criterion is satisfied. |
| 446 | int linear_solver_min_num_iterations; |
| 447 | |
| 448 | // Maximum number of iterations for which the linear solver should |
| 449 | // run. If the solver does not converge in less than |
| 450 | // linear_solver_max_num_iterations, then it returns |
| 451 | // MAX_ITERATIONS, as its termination type. |
| 452 | int linear_solver_max_num_iterations; |
| 453 | |
| 454 | // Forcing sequence parameter. The truncated Newton solver uses |
| 455 | // this number to control the relative accuracy with which the |
| 456 | // Newton step is computed. |
| 457 | // |
| 458 | // This constant is passed to ConjugateGradientsSolver which uses |
| 459 | // it to terminate the iterations when |
| 460 | // |
| 461 | // (Q_i - Q_{i-1})/Q_i < eta/i |
| 462 | double eta; |
| 463 | |
| 464 | // Normalize the jacobian using Jacobi scaling before calling |
| 465 | // the linear least squares solver. |
| 466 | bool jacobi_scaling; |
| 467 | |
| 468 | // Logging options --------------------------------------------------------- |
| 469 | |
| 470 | LoggingType logging_type; |
| 471 | |
| 472 | // By default the Minimizer progress is logged to VLOG(1), which |
| 473 | // is sent to STDERR depending on the vlog level. If this flag is |
| 474 | // set to true, and logging_type is not SILENT, the logging output |
| 475 | // is sent to STDOUT. |
| 476 | bool minimizer_progress_to_stdout; |
| 477 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 478 | // List of iterations at which the optimizer should dump the |
| 479 | // linear least squares problem to disk. Useful for testing and |
| 480 | // benchmarking. If empty (default), no problems are dumped. |
| 481 | // |
| 482 | // This is ignored if protocol buffers are disabled. |
| 483 | vector<int> lsqp_iterations_to_dump; |
Sameer Agarwal | 82f4b88 | 2012-05-06 21:05:28 -0700 | [diff] [blame] | 484 | string lsqp_dump_directory; |
| 485 | DumpFormatType lsqp_dump_format_type; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 486 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 487 | // Finite differences options ---------------------------------------------- |
| 488 | |
| 489 | // Check all jacobians computed by each residual block with finite |
| 490 | // differences. This is expensive since it involves computing the |
| 491 | // derivative by normal means (e.g. user specified, autodiff, |
| 492 | // etc), then also computing it using finite differences. The |
| 493 | // results are compared, and if they differ substantially, details |
| 494 | // are printed to the log. |
| 495 | bool check_gradients; |
| 496 | |
| 497 | // Relative precision to check for in the gradient checker. If the |
| 498 | // relative difference between an element in a jacobian exceeds |
| 499 | // this number, then the jacobian for that cost term is dumped. |
| 500 | double gradient_check_relative_precision; |
| 501 | |
| 502 | // Relative shift used for taking numeric derivatives. For finite |
| 503 | // differencing, each dimension is evaluated at slightly shifted |
| 504 | // values; for the case of central difference, this is what gets |
| 505 | // evaluated: |
| 506 | // |
| 507 | // delta = numeric_derivative_relative_step_size; |
| 508 | // f_initial = f(x) |
| 509 | // f_forward = f((1 + delta) * x) |
| 510 | // f_backward = f((1 - delta) * x) |
| 511 | // |
| 512 | // The finite differencing is done along each dimension. The |
| 513 | // reason to use a relative (rather than absolute) step size is |
| 514 | // that this way, numeric differentation works for functions where |
| 515 | // the arguments are typically large (e.g. 1e9) and when the |
| 516 | // values are small (e.g. 1e-5). It is possible to construct |
| 517 | // "torture cases" which break this finite difference heuristic, |
| 518 | // but they do not come up often in practice. |
| 519 | // |
| 520 | // TODO(keir): Pick a smarter number than the default above! In |
| 521 | // theory a good choice is sqrt(eps) * x, which for doubles means |
| 522 | // about 1e-8 * x. However, I have found this number too |
| 523 | // optimistic. This number should be exposed for users to change. |
| 524 | double numeric_derivative_relative_step_size; |
| 525 | |
| 526 | // If true, the user's parameter blocks are updated at the end of |
| 527 | // every Minimizer iteration, otherwise they are updated when the |
| 528 | // Minimizer terminates. This is useful if, for example, the user |
| 529 | // wishes to visualize the state of the optimization every |
| 530 | // iteration. |
| 531 | bool update_state_every_iteration; |
| 532 | |
| 533 | // Callbacks that are executed at the end of each iteration of the |
Sameer Agarwal | aa9a83c | 2012-05-29 17:40:17 -0700 | [diff] [blame] | 534 | // Minimizer. An iteration may terminate midway, either due to |
| 535 | // numerical failures or because one of the convergence tests has |
| 536 | // been satisfied. In this case none of the callbacks are |
| 537 | // executed. |
| 538 | |
| 539 | // Callbacks are executed in the order that they are specified in |
| 540 | // this vector. By default, parameter blocks are updated only at |
| 541 | // the end of the optimization, i.e when the Minimizer |
| 542 | // terminates. This behaviour is controlled by |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 543 | // update_state_every_variable. If the user wishes to have access |
| 544 | // to the update parameter blocks when his/her callbacks are |
| 545 | // executed, then set update_state_every_iteration to true. |
| 546 | // |
| 547 | // The solver does NOT take ownership of these pointers. |
| 548 | vector<IterationCallback*> callbacks; |
Sameer Agarwal | 1b7f3b5 | 2012-08-09 21:46:19 -0700 | [diff] [blame] | 549 | |
| 550 | // If non-empty, a summary of the execution of the solver is |
| 551 | // recorded to this file. |
| 552 | string solver_log; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 553 | }; |
| 554 | |
| 555 | struct Summary { |
| 556 | Summary(); |
| 557 | |
| 558 | // A brief one line description of the state of the solver after |
| 559 | // termination. |
| 560 | string BriefReport() const; |
| 561 | |
| 562 | // A full multiline description of the state of the solver after |
| 563 | // termination. |
| 564 | string FullReport() const; |
| 565 | |
| 566 | // Minimizer summary ------------------------------------------------- |
Sameer Agarwal | d010de5 | 2013-02-15 14:26:56 -0800 | [diff] [blame] | 567 | MinimizerType minimizer_type; |
| 568 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 569 | SolverTerminationType termination_type; |
| 570 | |
| 571 | // If the solver did not run, or there was a failure, a |
| 572 | // description of the error. |
| 573 | string error; |
| 574 | |
| 575 | // Cost of the problem before and after the optimization. See |
| 576 | // problem.h for definition of the cost of a problem. |
| 577 | double initial_cost; |
| 578 | double final_cost; |
| 579 | |
| 580 | // The part of the total cost that comes from residual blocks that |
| 581 | // were held fixed by the preprocessor because all the parameter |
| 582 | // blocks that they depend on were fixed. |
| 583 | double fixed_cost; |
| 584 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 585 | vector<IterationSummary> iterations; |
| 586 | |
| 587 | int num_successful_steps; |
| 588 | int num_unsuccessful_steps; |
| 589 | |
Sameer Agarwal | fa01519 | 2012-06-11 14:21:42 -0700 | [diff] [blame] | 590 | // When the user calls Solve, before the actual optimization |
| 591 | // occurs, Ceres performs a number of preprocessing steps. These |
| 592 | // include error checks, memory allocations, and reorderings. This |
| 593 | // time is accounted for as preprocessing time. |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 594 | double preprocessor_time_in_seconds; |
Sameer Agarwal | fa01519 | 2012-06-11 14:21:42 -0700 | [diff] [blame] | 595 | |
| 596 | // Time spent in the TrustRegionMinimizer. |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 597 | double minimizer_time_in_seconds; |
Sameer Agarwal | fa01519 | 2012-06-11 14:21:42 -0700 | [diff] [blame] | 598 | |
| 599 | // After the Minimizer is finished, some time is spent in |
| 600 | // re-evaluating residuals etc. This time is accounted for in the |
| 601 | // postprocessor time. |
| 602 | double postprocessor_time_in_seconds; |
| 603 | |
| 604 | // Some total of all time spent inside Ceres when Solve is called. |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 605 | double total_time_in_seconds; |
| 606 | |
Sameer Agarwal | 42a84b8 | 2013-02-01 12:22:53 -0800 | [diff] [blame] | 607 | double linear_solver_time_in_seconds; |
| 608 | double residual_evaluation_time_in_seconds; |
| 609 | double jacobian_evaluation_time_in_seconds; |
| 610 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 611 | // Preprocessor summary. |
| 612 | int num_parameter_blocks; |
| 613 | int num_parameters; |
Keir Mierle | ba94421 | 2013-02-25 12:46:44 -0800 | [diff] [blame] | 614 | int num_effective_parameters; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 615 | int num_residual_blocks; |
| 616 | int num_residuals; |
| 617 | |
| 618 | int num_parameter_blocks_reduced; |
| 619 | int num_parameters_reduced; |
Keir Mierle | ba94421 | 2013-02-25 12:46:44 -0800 | [diff] [blame] | 620 | int num_effective_parameters_reduced; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 621 | int num_residual_blocks_reduced; |
| 622 | int num_residuals_reduced; |
| 623 | |
| 624 | int num_eliminate_blocks_given; |
| 625 | int num_eliminate_blocks_used; |
| 626 | |
| 627 | int num_threads_given; |
| 628 | int num_threads_used; |
| 629 | |
| 630 | int num_linear_solver_threads_given; |
| 631 | int num_linear_solver_threads_used; |
| 632 | |
| 633 | LinearSolverType linear_solver_type_given; |
| 634 | LinearSolverType linear_solver_type_used; |
| 635 | |
Sameer Agarwal | 977be7c | 2013-01-26 16:01:54 -0800 | [diff] [blame] | 636 | vector<int> linear_solver_ordering_given; |
| 637 | vector<int> linear_solver_ordering_used; |
| 638 | |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 639 | PreconditionerType preconditioner_type; |
Sameer Agarwal | 97fb6d9 | 2012-06-17 10:08:19 -0700 | [diff] [blame] | 640 | |
| 641 | TrustRegionStrategyType trust_region_strategy_type; |
Sameer Agarwal | 1e28920 | 2012-08-21 18:00:54 -0700 | [diff] [blame] | 642 | DoglegType dogleg_type; |
Sameer Agarwal | d010de5 | 2013-02-15 14:26:56 -0800 | [diff] [blame] | 643 | bool inner_iterations; |
| 644 | |
Sameer Agarwal | 97fb6d9 | 2012-06-17 10:08:19 -0700 | [diff] [blame] | 645 | SparseLinearAlgebraLibraryType sparse_linear_algebra_library; |
Sameer Agarwal | 977be7c | 2013-01-26 16:01:54 -0800 | [diff] [blame] | 646 | |
Sameer Agarwal | d010de5 | 2013-02-15 14:26:56 -0800 | [diff] [blame] | 647 | LineSearchDirectionType line_search_direction_type; |
| 648 | LineSearchType line_search_type; |
| 649 | int max_lbfgs_rank; |
| 650 | |
Sameer Agarwal | 977be7c | 2013-01-26 16:01:54 -0800 | [diff] [blame] | 651 | vector<int> inner_iteration_ordering_given; |
| 652 | vector<int> inner_iteration_ordering_used; |
Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 653 | }; |
| 654 | |
| 655 | // Once a least squares problem has been built, this function takes |
| 656 | // the problem and optimizes it based on the values of the options |
| 657 | // parameters. Upon return, a detailed summary of the work performed |
| 658 | // by the preprocessor, the non-linear minmizer and the linear |
| 659 | // solver are reported in the summary object. |
| 660 | virtual void Solve(const Options& options, |
| 661 | Problem* problem, |
| 662 | Solver::Summary* summary); |
| 663 | }; |
| 664 | |
| 665 | // Helper function which avoids going through the interface. |
| 666 | void Solve(const Solver::Options& options, |
| 667 | Problem* problem, |
| 668 | Solver::Summary* summary); |
| 669 | |
| 670 | } // namespace ceres |
| 671 | |
| 672 | #endif // CERES_PUBLIC_SOLVER_H_ |