blob: c85c8e5cbf59b8fd3ef72423e752a8357cb8f0b8 [file] [log] [blame]
Sameer Agarwalfa015192012-06-11 14:21:42 -07001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2012 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
31#include "ceres/dogleg_strategy.h"
32
33#include <cmath>
Markus Moll51cf7cb2012-08-20 20:10:20 +020034#include "Eigen/Dense"
Sameer Agarwalfa015192012-06-11 14:21:42 -070035#include "ceres/array_utils.h"
36#include "ceres/internal/eigen.h"
Sameer Agarwalc4a32912013-06-13 22:00:48 -070037#include "ceres/linear_least_squares_problems.h"
Sameer Agarwalfa015192012-06-11 14:21:42 -070038#include "ceres/linear_solver.h"
Sameer Agarwale7295c22012-11-23 18:56:50 -080039#include "ceres/polynomial.h"
Sameer Agarwalfa015192012-06-11 14:21:42 -070040#include "ceres/sparse_matrix.h"
41#include "ceres/trust_region_strategy.h"
42#include "ceres/types.h"
Sameer Agarwal0beab862012-08-13 15:12:01 -070043#include "glog/logging.h"
Sameer Agarwalfa015192012-06-11 14:21:42 -070044
45namespace ceres {
46namespace internal {
47namespace {
48const double kMaxMu = 1.0;
49const double kMinMu = 1e-8;
50}
51
52DoglegStrategy::DoglegStrategy(const TrustRegionStrategy::Options& options)
53 : linear_solver_(options.linear_solver),
54 radius_(options.initial_radius),
55 max_radius_(options.max_radius),
Sameer Agarwaleeedd2e2013-07-07 23:04:31 -070056 min_diagonal_(options.min_lm_diagonal),
57 max_diagonal_(options.max_lm_diagonal),
Sameer Agarwalfa015192012-06-11 14:21:42 -070058 mu_(kMinMu),
59 min_mu_(kMinMu),
60 max_mu_(kMaxMu),
61 mu_increase_factor_(10.0),
62 increase_threshold_(0.75),
63 decrease_threshold_(0.25),
64 dogleg_step_norm_(0.0),
Markus Moll51cf7cb2012-08-20 20:10:20 +020065 reuse_(false),
66 dogleg_type_(options.dogleg_type) {
Sameer Agarwalfa015192012-06-11 14:21:42 -070067 CHECK_NOTNULL(linear_solver_);
68 CHECK_GT(min_diagonal_, 0.0);
Markus Moll0c3a7482012-08-21 14:44:59 +020069 CHECK_LE(min_diagonal_, max_diagonal_);
Sameer Agarwalfa015192012-06-11 14:21:42 -070070 CHECK_GT(max_radius_, 0.0);
71}
72
73// If the reuse_ flag is not set, then the Cauchy point (scaled
74// gradient) and the new Gauss-Newton step are computed from
75// scratch. The Dogleg step is then computed as interpolation of these
76// two vectors.
Sameer Agarwal05292bf2012-08-20 07:40:45 -070077TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
Sameer Agarwalfa015192012-06-11 14:21:42 -070078 const TrustRegionStrategy::PerSolveOptions& per_solve_options,
79 SparseMatrix* jacobian,
80 const double* residuals,
81 double* step) {
82 CHECK_NOTNULL(jacobian);
83 CHECK_NOTNULL(residuals);
84 CHECK_NOTNULL(step);
85
86 const int n = jacobian->num_cols();
87 if (reuse_) {
88 // Gauss-Newton and gradient vectors are always available, only a
Markus Moll51cf7cb2012-08-20 20:10:20 +020089 // new interpolant need to be computed. For the subspace case,
90 // the subspace and the two-dimensional model are also still valid.
Sameer Agarwal509f68c2013-02-20 01:39:03 -080091 switch (dogleg_type_) {
Markus Moll51cf7cb2012-08-20 20:10:20 +020092 case TRADITIONAL_DOGLEG:
93 ComputeTraditionalDoglegStep(step);
94 break;
95
96 case SUBSPACE_DOGLEG:
97 ComputeSubspaceDoglegStep(step);
98 break;
99 }
Sameer Agarwal05292bf2012-08-20 07:40:45 -0700100 TrustRegionStrategy::Summary summary;
101 summary.num_iterations = 0;
102 summary.termination_type = TOLERANCE;
103 return summary;
Sameer Agarwalfa015192012-06-11 14:21:42 -0700104 }
105
106 reuse_ = true;
107 // Check that we have the storage needed to hold the various
108 // temporary vectors.
109 if (diagonal_.rows() != n) {
110 diagonal_.resize(n, 1);
111 gradient_.resize(n, 1);
112 gauss_newton_step_.resize(n, 1);
113 }
114
115 // Vector used to form the diagonal matrix that is used to
Markus Molla3fb17c2012-08-15 15:37:27 +0200116 // regularize the Gauss-Newton solve and that defines the
117 // elliptical trust region
118 //
119 // || D * step || <= radius_ .
120 //
Sameer Agarwalfa015192012-06-11 14:21:42 -0700121 jacobian->SquaredColumnNorm(diagonal_.data());
122 for (int i = 0; i < n; ++i) {
123 diagonal_[i] = min(max(diagonal_[i], min_diagonal_), max_diagonal_);
124 }
Markus Moll51cf7cb2012-08-20 20:10:20 +0200125 diagonal_ = diagonal_.array().sqrt();
Sameer Agarwalfa015192012-06-11 14:21:42 -0700126
Markus Molla3fb17c2012-08-15 15:37:27 +0200127 ComputeGradient(jacobian, residuals);
128 ComputeCauchyPoint(jacobian);
Sameer Agarwalfa015192012-06-11 14:21:42 -0700129
130 LinearSolver::Summary linear_solver_summary =
Sameer Agarwalc4a32912013-06-13 22:00:48 -0700131 ComputeGaussNewtonStep(per_solve_options, jacobian, residuals);
Sameer Agarwalfa015192012-06-11 14:21:42 -0700132
Sameer Agarwal05292bf2012-08-20 07:40:45 -0700133 TrustRegionStrategy::Summary summary;
134 summary.residual_norm = linear_solver_summary.residual_norm;
135 summary.num_iterations = linear_solver_summary.num_iterations;
136 summary.termination_type = linear_solver_summary.termination_type;
Markus Moll51cf7cb2012-08-20 20:10:20 +0200137
138 if (linear_solver_summary.termination_type != FAILURE) {
Sameer Agarwal509f68c2013-02-20 01:39:03 -0800139 switch (dogleg_type_) {
Markus Moll51cf7cb2012-08-20 20:10:20 +0200140 // Interpolate the Cauchy point and the Gauss-Newton step.
141 case TRADITIONAL_DOGLEG:
142 ComputeTraditionalDoglegStep(step);
143 break;
144
145 // Find the minimum in the subspace defined by the
146 // Cauchy point and the (Gauss-)Newton step.
147 case SUBSPACE_DOGLEG:
148 if (!ComputeSubspaceModel(jacobian)) {
149 summary.termination_type = FAILURE;
150 break;
151 }
152 ComputeSubspaceDoglegStep(step);
153 break;
154 }
155 }
156
Sameer Agarwal05292bf2012-08-20 07:40:45 -0700157 return summary;
Sameer Agarwalfa015192012-06-11 14:21:42 -0700158}
159
Markus Molla3fb17c2012-08-15 15:37:27 +0200160// The trust region is assumed to be elliptical with the
161// diagonal scaling matrix D defined by sqrt(diagonal_).
162// It is implemented by substituting step' = D * step.
163// The trust region for step' is spherical.
164// The gradient, the Gauss-Newton step, the Cauchy point,
165// and all calculations involving the Jacobian have to
166// be adjusted accordingly.
167void DoglegStrategy::ComputeGradient(
168 SparseMatrix* jacobian,
169 const double* residuals) {
170 gradient_.setZero();
171 jacobian->LeftMultiply(residuals, gradient_.data());
172 gradient_.array() /= diagonal_.array();
173}
174
Markus Moll51cf7cb2012-08-20 20:10:20 +0200175// The Cauchy point is the global minimizer of the quadratic model
176// along the one-dimensional subspace spanned by the gradient.
Markus Molla3fb17c2012-08-15 15:37:27 +0200177void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
Markus Moll47d26bc2012-08-16 00:23:38 +0200178 // alpha * -gradient is the Cauchy point.
Markus Molla3fb17c2012-08-15 15:37:27 +0200179 Vector Jg(jacobian->num_rows());
180 Jg.setZero();
181 // The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
182 // instead of (J * D^-1) * (D^-1 * g).
183 Vector scaled_gradient =
184 (gradient_.array() / diagonal_.array()).matrix();
185 jacobian->RightMultiply(scaled_gradient.data(), Jg.data());
186 alpha_ = gradient_.squaredNorm() / Jg.squaredNorm();
187}
188
Markus Moll51cf7cb2012-08-20 20:10:20 +0200189// The dogleg step is defined as the intersection of the trust region
190// boundary with the piecewise linear path from the origin to the Cauchy
191// point and then from there to the Gauss-Newton point (global minimizer
192// of the model function). The Gauss-Newton point is taken if it lies
193// within the trust region.
194void DoglegStrategy::ComputeTraditionalDoglegStep(double* dogleg) {
Sameer Agarwalfa015192012-06-11 14:21:42 -0700195 VectorRef dogleg_step(dogleg, gradient_.rows());
196
197 // Case 1. The Gauss-Newton step lies inside the trust region, and
198 // is therefore the optimal solution to the trust-region problem.
199 const double gradient_norm = gradient_.norm();
200 const double gauss_newton_norm = gauss_newton_step_.norm();
201 if (gauss_newton_norm <= radius_) {
202 dogleg_step = gauss_newton_step_;
203 dogleg_step_norm_ = gauss_newton_norm;
Markus Molla3fb17c2012-08-15 15:37:27 +0200204 dogleg_step.array() /= diagonal_.array();
Sameer Agarwalfa015192012-06-11 14:21:42 -0700205 VLOG(3) << "GaussNewton step size: " << dogleg_step_norm_
206 << " radius: " << radius_;
207 return;
208 }
209
210 // Case 2. The Cauchy point and the Gauss-Newton steps lie outside
211 // the trust region. Rescale the Cauchy point to the trust region
212 // and return.
213 if (gradient_norm * alpha_ >= radius_) {
Markus Moll47d26bc2012-08-16 00:23:38 +0200214 dogleg_step = -(radius_ / gradient_norm) * gradient_;
Sameer Agarwalfa015192012-06-11 14:21:42 -0700215 dogleg_step_norm_ = radius_;
Markus Molla3fb17c2012-08-15 15:37:27 +0200216 dogleg_step.array() /= diagonal_.array();
Sameer Agarwalfa015192012-06-11 14:21:42 -0700217 VLOG(3) << "Cauchy step size: " << dogleg_step_norm_
218 << " radius: " << radius_;
219 return;
220 }
221
222 // Case 3. The Cauchy point is inside the trust region and the
223 // Gauss-Newton step is outside. Compute the line joining the two
224 // points and the point on it which intersects the trust region
225 // boundary.
226
Markus Moll47d26bc2012-08-16 00:23:38 +0200227 // a = alpha * -gradient
Sameer Agarwalfa015192012-06-11 14:21:42 -0700228 // b = gauss_newton_step
Markus Moll47d26bc2012-08-16 00:23:38 +0200229 const double b_dot_a = -alpha_ * gradient_.dot(gauss_newton_step_);
Sameer Agarwalfa015192012-06-11 14:21:42 -0700230 const double a_squared_norm = pow(alpha_ * gradient_norm, 2.0);
231 const double b_minus_a_squared_norm =
232 a_squared_norm - 2 * b_dot_a + pow(gauss_newton_norm, 2);
233
234 // c = a' (b - a)
Markus Moll47d26bc2012-08-16 00:23:38 +0200235 // = alpha * -gradient' gauss_newton_step - alpha^2 |gradient|^2
Sameer Agarwalfa015192012-06-11 14:21:42 -0700236 const double c = b_dot_a - a_squared_norm;
237 const double d = sqrt(c * c + b_minus_a_squared_norm *
238 (pow(radius_, 2.0) - a_squared_norm));
239
240 double beta =
241 (c <= 0)
242 ? (d - c) / b_minus_a_squared_norm
243 : (radius_ * radius_ - a_squared_norm) / (d + c);
Markus Moll51cf7cb2012-08-20 20:10:20 +0200244 dogleg_step = (-alpha_ * (1.0 - beta)) * gradient_
245 + beta * gauss_newton_step_;
Sameer Agarwalfa015192012-06-11 14:21:42 -0700246 dogleg_step_norm_ = dogleg_step.norm();
Markus Molla3fb17c2012-08-15 15:37:27 +0200247 dogleg_step.array() /= diagonal_.array();
Sameer Agarwalfa015192012-06-11 14:21:42 -0700248 VLOG(3) << "Dogleg step size: " << dogleg_step_norm_
249 << " radius: " << radius_;
250}
251
Markus Moll51cf7cb2012-08-20 20:10:20 +0200252// The subspace method finds the minimum of the two-dimensional problem
253//
254// min. 1/2 x' B' H B x + g' B x
255// s.t. || B x ||^2 <= r^2
256//
257// where r is the trust region radius and B is the matrix with unit columns
258// spanning the subspace defined by the steepest descent and Newton direction.
259// This subspace by definition includes the Gauss-Newton point, which is
260// therefore taken if it lies within the trust region.
261void DoglegStrategy::ComputeSubspaceDoglegStep(double* dogleg) {
262 VectorRef dogleg_step(dogleg, gradient_.rows());
263
264 // The Gauss-Newton point is inside the trust region if |GN| <= radius_.
265 // This test is valid even though radius_ is a length in the two-dimensional
266 // subspace while gauss_newton_step_ is expressed in the (scaled)
267 // higher dimensional original space. This is because
268 //
269 // 1. gauss_newton_step_ by definition lies in the subspace, and
270 // 2. the subspace basis is orthonormal.
271 //
272 // As a consequence, the norm of the gauss_newton_step_ in the subspace is
273 // the same as its norm in the original space.
274 const double gauss_newton_norm = gauss_newton_step_.norm();
275 if (gauss_newton_norm <= radius_) {
276 dogleg_step = gauss_newton_step_;
277 dogleg_step_norm_ = gauss_newton_norm;
278 dogleg_step.array() /= diagonal_.array();
279 VLOG(3) << "GaussNewton step size: " << dogleg_step_norm_
280 << " radius: " << radius_;
281 return;
282 }
283
284 // The optimum lies on the boundary of the trust region. The above problem
285 // therefore becomes
286 //
287 // min. 1/2 x^T B^T H B x + g^T B x
288 // s.t. || B x ||^2 = r^2
289 //
290 // Notice the equality in the constraint.
291 //
292 // This can be solved by forming the Lagrangian, solving for x(y), where
293 // y is the Lagrange multiplier, using the gradient of the objective, and
294 // putting x(y) back into the constraint. This results in a fourth order
295 // polynomial in y, which can be solved using e.g. the companion matrix.
296 // See the description of MakePolynomialForBoundaryConstrainedProblem for
297 // details. The result is up to four real roots y*, not all of which
298 // correspond to feasible points. The feasible points x(y*) have to be
299 // tested for optimality.
300
301 if (subspace_is_one_dimensional_) {
302 // The subspace is one-dimensional, so both the gradient and
303 // the Gauss-Newton step point towards the same direction.
304 // In this case, we move along the gradient until we reach the trust
305 // region boundary.
306 dogleg_step = -(radius_ / gradient_.norm()) * gradient_;
307 dogleg_step_norm_ = radius_;
308 dogleg_step.array() /= diagonal_.array();
309 VLOG(3) << "Dogleg subspace step size (1D): " << dogleg_step_norm_
310 << " radius: " << radius_;
311 return;
312 }
313
314 Vector2d minimum(0.0, 0.0);
315 if (!FindMinimumOnTrustRegionBoundary(&minimum)) {
316 // For the positive semi-definite case, a traditional dogleg step
317 // is taken in this case.
318 LOG(WARNING) << "Failed to compute polynomial roots. "
319 << "Taking traditional dogleg step instead.";
320 ComputeTraditionalDoglegStep(dogleg);
321 return;
322 }
323
324 // Test first order optimality at the minimum.
325 // The first order KKT conditions state that the minimum x*
326 // has to satisfy either || x* ||^2 < r^2 (i.e. has to lie within
327 // the trust region), or
328 //
329 // (B x* + g) + y x* = 0
330 //
331 // for some positive scalar y.
332 // Here, as it is already known that the minimum lies on the boundary, the
333 // latter condition is tested. To allow for small imprecisions, we test if
334 // the angle between (B x* + g) and -x* is smaller than acos(0.99).
335 // The exact value of the cosine is arbitrary but should be close to 1.
336 //
337 // This condition should not be violated. If it is, the minimum was not
338 // correctly determined.
339 const double kCosineThreshold = 0.99;
340 const Vector2d grad_minimum = subspace_B_ * minimum + subspace_g_;
341 const double cosine_angle = -minimum.dot(grad_minimum) /
342 (minimum.norm() * grad_minimum.norm());
343 if (cosine_angle < kCosineThreshold) {
344 LOG(WARNING) << "First order optimality seems to be violated "
345 << "in the subspace method!\n"
346 << "Cosine of angle between x and B x + g is "
347 << cosine_angle << ".\n"
348 << "Taking a regular dogleg step instead.\n"
349 << "Please consider filing a bug report if this "
350 << "happens frequently or consistently.\n";
351 ComputeTraditionalDoglegStep(dogleg);
352 return;
353 }
354
355 // Create the full step from the optimal 2d solution.
356 dogleg_step = subspace_basis_ * minimum;
357 dogleg_step_norm_ = radius_;
358 dogleg_step.array() /= diagonal_.array();
359 VLOG(3) << "Dogleg subspace step size: " << dogleg_step_norm_
360 << " radius: " << radius_;
361}
362
363// Build the polynomial that defines the optimal Lagrange multipliers.
364// Let the Lagrangian be
365//
366// L(x, y) = 0.5 x^T B x + x^T g + y (0.5 x^T x - 0.5 r^2). (1)
367//
368// Stationary points of the Lagrangian are given by
369//
370// 0 = d L(x, y) / dx = Bx + g + y x (2)
371// 0 = d L(x, y) / dy = 0.5 x^T x - 0.5 r^2 (3)
372//
373// For any given y, we can solve (2) for x as
374//
375// x(y) = -(B + y I)^-1 g . (4)
376//
377// As B + y I is 2x2, we form the inverse explicitly:
378//
379// (B + y I)^-1 = (1 / det(B + y I)) adj(B + y I) (5)
380//
381// where adj() denotes adjugation. This should be safe, as B is positive
382// semi-definite and y is necessarily positive, so (B + y I) is indeed
383// invertible.
384// Plugging (5) into (4) and the result into (3), then dividing by 0.5 we
385// obtain
386//
387// 0 = (1 / det(B + y I))^2 g^T adj(B + y I)^T adj(B + y I) g - r^2
388// (6)
389//
390// or
391//
392// det(B + y I)^2 r^2 = g^T adj(B + y I)^T adj(B + y I) g (7a)
393// = g^T adj(B)^T adj(B) g
394// + 2 y g^T adj(B)^T g + y^2 g^T g (7b)
395//
396// as
397//
398// adj(B + y I) = adj(B) + y I = adj(B)^T + y I . (8)
399//
400// The left hand side can be expressed explicitly using
401//
402// det(B + y I) = det(B) + y tr(B) + y^2 . (9)
403//
404// So (7) is a polynomial in y of degree four.
405// Bringing everything back to the left hand side, the coefficients can
406// be read off as
407//
408// y^4 r^2
409// + y^3 2 r^2 tr(B)
410// + y^2 (r^2 tr(B)^2 + 2 r^2 det(B) - g^T g)
411// + y^1 (2 r^2 det(B) tr(B) - 2 g^T adj(B)^T g)
412// + y^0 (r^2 det(B)^2 - g^T adj(B)^T adj(B) g)
413//
414Vector DoglegStrategy::MakePolynomialForBoundaryConstrainedProblem() const {
415 const double detB = subspace_B_.determinant();
416 const double trB = subspace_B_.trace();
417 const double r2 = radius_ * radius_;
418 Matrix2d B_adj;
Sameer Agarwal509f68c2013-02-20 01:39:03 -0800419 B_adj << subspace_B_(1, 1) , -subspace_B_(0, 1),
420 -subspace_B_(1, 0) , subspace_B_(0, 0);
Markus Moll51cf7cb2012-08-20 20:10:20 +0200421
422 Vector polynomial(5);
423 polynomial(0) = r2;
424 polynomial(1) = 2.0 * r2 * trB;
Sameer Agarwal509f68c2013-02-20 01:39:03 -0800425 polynomial(2) = r2 * (trB * trB + 2.0 * detB) - subspace_g_.squaredNorm();
426 polynomial(3) = -2.0 * (subspace_g_.transpose() * B_adj * subspace_g_
427 - r2 * detB * trB);
Markus Moll51cf7cb2012-08-20 20:10:20 +0200428 polynomial(4) = r2 * detB * detB - (B_adj * subspace_g_).squaredNorm();
429
430 return polynomial;
431}
432
433// Given a Lagrange multiplier y that corresponds to a stationary point
434// of the Lagrangian L(x, y), compute the corresponding x from the
435// equation
436//
437// 0 = d L(x, y) / dx
438// = B * x + g + y * x
439// = (B + y * I) * x + g
440//
441DoglegStrategy::Vector2d DoglegStrategy::ComputeSubspaceStepFromRoot(
442 double y) const {
443 const Matrix2d B_i = subspace_B_ + y * Matrix2d::Identity();
444 return -B_i.partialPivLu().solve(subspace_g_);
445}
446
447// This function evaluates the quadratic model at a point x in the
448// subspace spanned by subspace_basis_.
449double DoglegStrategy::EvaluateSubspaceModel(const Vector2d& x) const {
450 return 0.5 * x.dot(subspace_B_ * x) + subspace_g_.dot(x);
451}
452
453// This function attempts to solve the boundary-constrained subspace problem
454//
455// min. 1/2 x^T B^T H B x + g^T B x
456// s.t. || B x ||^2 = r^2
457//
458// where B is an orthonormal subspace basis and r is the trust-region radius.
459//
460// This is done by finding the roots of a fourth degree polynomial. If the
461// root finding fails, the function returns false and minimum will be set
462// to (0, 0). If it succeeds, true is returned.
463//
464// In the failure case, another step should be taken, such as the traditional
465// dogleg step.
466bool DoglegStrategy::FindMinimumOnTrustRegionBoundary(Vector2d* minimum) const {
467 CHECK_NOTNULL(minimum);
468
469 // Return (0, 0) in all error cases.
470 minimum->setZero();
471
472 // Create the fourth-degree polynomial that is a necessary condition for
473 // optimality.
474 const Vector polynomial = MakePolynomialForBoundaryConstrainedProblem();
475
476 // Find the real parts y_i of its roots (not only the real roots).
477 Vector roots_real;
478 if (!FindPolynomialRoots(polynomial, &roots_real, NULL)) {
479 // Failed to find the roots of the polynomial, i.e. the candidate
480 // solutions of the constrained problem. Report this back to the caller.
481 return false;
482 }
483
484 // For each root y, compute B x(y) and check for feasibility.
485 // Notice that there should always be four roots, as the leading term of
486 // the polynomial is r^2 and therefore non-zero. However, as some roots
487 // may be complex, the real parts are not necessarily unique.
488 double minimum_value = std::numeric_limits<double>::max();
489 bool valid_root_found = false;
490 for (int i = 0; i < roots_real.size(); ++i) {
491 const Vector2d x_i = ComputeSubspaceStepFromRoot(roots_real(i));
492
493 // Not all roots correspond to points on the trust region boundary.
494 // There are at most four candidate solutions. As we are interested
495 // in the minimum, it is safe to consider all of them after projecting
496 // them onto the trust region boundary.
497 if (x_i.norm() > 0) {
498 const double f_i = EvaluateSubspaceModel((radius_ / x_i.norm()) * x_i);
499 valid_root_found = true;
500 if (f_i < minimum_value) {
501 minimum_value = f_i;
502 *minimum = x_i;
503 }
504 }
505 }
506
507 return valid_root_found;
508}
509
Sameer Agarwalfa015192012-06-11 14:21:42 -0700510LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
Sameer Agarwalc4a32912013-06-13 22:00:48 -0700511 const PerSolveOptions& per_solve_options,
Sameer Agarwalfa015192012-06-11 14:21:42 -0700512 SparseMatrix* jacobian,
513 const double* residuals) {
514 const int n = jacobian->num_cols();
515 LinearSolver::Summary linear_solver_summary;
516 linear_solver_summary.termination_type = FAILURE;
517
518 // The Jacobian matrix is often quite poorly conditioned. Thus it is
519 // necessary to add a diagonal matrix at the bottom to prevent the
520 // linear solver from failing.
521 //
522 // We do this by computing the same diagonal matrix as the one used
523 // by Levenberg-Marquardt (other choices are possible), and scaling
524 // it by a small constant (independent of the trust region radius).
525 //
526 // If the solve fails, the multiplier to the diagonal is increased
527 // up to max_mu_ by a factor of mu_increase_factor_ every time. If
528 // the linear solver is still not successful, the strategy returns
529 // with FAILURE.
530 //
531 // Next time when a new Gauss-Newton step is requested, the
532 // multiplier starts out from the last successful solve.
533 //
534 // When a step is declared successful, the multiplier is decreased
535 // by half of mu_increase_factor_.
Markus Moll51cf7cb2012-08-20 20:10:20 +0200536
Sameer Agarwalfa015192012-06-11 14:21:42 -0700537 while (mu_ < max_mu_) {
538 // Dogleg, as far as I (sameeragarwal) understand it, requires a
539 // reasonably good estimate of the Gauss-Newton step. This means
540 // that we need to solve the normal equations more or less
541 // exactly. This is reflected in the values of the tolerances set
542 // below.
543 //
544 // For now, this strategy should only be used with exact
545 // factorization based solvers, for which these tolerances are
546 // automatically satisfied.
547 //
548 // The right way to combine inexact solves with trust region
549 // methods is to use Stiehaug's method.
550 LinearSolver::PerSolveOptions solve_options;
551 solve_options.q_tolerance = 0.0;
552 solve_options.r_tolerance = 0.0;
553
Markus Molla3fb17c2012-08-15 15:37:27 +0200554 lm_diagonal_ = diagonal_ * std::sqrt(mu_);
Sameer Agarwalfa015192012-06-11 14:21:42 -0700555 solve_options.D = lm_diagonal_.data();
556
Markus Moll47d26bc2012-08-16 00:23:38 +0200557 // As in the LevenbergMarquardtStrategy, solve Jy = r instead
558 // of Jx = -r and later set x = -y to avoid having to modify
559 // either jacobian or residuals.
Sameer Agarwalfa015192012-06-11 14:21:42 -0700560 InvalidateArray(n, gauss_newton_step_.data());
561 linear_solver_summary = linear_solver_->Solve(jacobian,
562 residuals,
563 solve_options,
564 gauss_newton_step_.data());
565
Sameer Agarwalc4a32912013-06-13 22:00:48 -0700566 if (per_solve_options.dump_format_type == CONSOLE ||
567 (per_solve_options.dump_format_type != CONSOLE &&
568 !per_solve_options.dump_filename_base.empty())) {
569 if (!DumpLinearLeastSquaresProblem(per_solve_options.dump_filename_base,
570 per_solve_options.dump_format_type,
571 jacobian,
572 solve_options.D,
573 residuals,
574 gauss_newton_step_.data(),
575 0)) {
576 LOG(ERROR) << "Unable to dump trust region problem."
Sameer Agarwala7eb1d52013-06-14 10:25:33 -0700577 << " Filename base: "
578 << per_solve_options.dump_filename_base;
Sameer Agarwalc4a32912013-06-13 22:00:48 -0700579 }
580 }
581
Sameer Agarwalfa015192012-06-11 14:21:42 -0700582 if (linear_solver_summary.termination_type == FAILURE ||
583 !IsArrayValid(n, gauss_newton_step_.data())) {
584 mu_ *= mu_increase_factor_;
585 VLOG(2) << "Increasing mu " << mu_;
586 linear_solver_summary.termination_type = FAILURE;
587 continue;
588 }
589 break;
590 }
591
Markus Moll51cf7cb2012-08-20 20:10:20 +0200592 if (linear_solver_summary.termination_type != FAILURE) {
593 // The scaled Gauss-Newton step is D * GN:
594 //
595 // - (D^-1 J^T J D^-1)^-1 (D^-1 g)
596 // = - D (J^T J)^-1 D D^-1 g
597 // = D -(J^T J)^-1 g
598 //
599 gauss_newton_step_.array() *= -diagonal_.array();
600 }
Markus Molla3fb17c2012-08-15 15:37:27 +0200601
Sameer Agarwalfa015192012-06-11 14:21:42 -0700602 return linear_solver_summary;
603}
604
605void DoglegStrategy::StepAccepted(double step_quality) {
606 CHECK_GT(step_quality, 0.0);
Markus Moll51cf7cb2012-08-20 20:10:20 +0200607
Sameer Agarwalfa015192012-06-11 14:21:42 -0700608 if (step_quality < decrease_threshold_) {
609 radius_ *= 0.5;
Sameer Agarwalfa015192012-06-11 14:21:42 -0700610 }
611
612 if (step_quality > increase_threshold_) {
613 radius_ = max(radius_, 3.0 * dogleg_step_norm_);
614 }
615
616 // Reduce the regularization multiplier, in the hope that whatever
617 // was causing the rank deficiency has gone away and we can return
618 // to doing a pure Gauss-Newton solve.
Sameer Agarwal509f68c2013-02-20 01:39:03 -0800619 mu_ = max(min_mu_, 2.0 * mu_ / mu_increase_factor_);
Sameer Agarwalfa015192012-06-11 14:21:42 -0700620 reuse_ = false;
621}
622
623void DoglegStrategy::StepRejected(double step_quality) {
624 radius_ *= 0.5;
625 reuse_ = true;
626}
627
628void DoglegStrategy::StepIsInvalid() {
629 mu_ *= mu_increase_factor_;
630 reuse_ = false;
631}
632
633double DoglegStrategy::Radius() const {
634 return radius_;
635}
636
Markus Moll51cf7cb2012-08-20 20:10:20 +0200637bool DoglegStrategy::ComputeSubspaceModel(SparseMatrix* jacobian) {
638 // Compute an orthogonal basis for the subspace using QR decomposition.
639 Matrix basis_vectors(jacobian->num_cols(), 2);
640 basis_vectors.col(0) = gradient_;
641 basis_vectors.col(1) = gauss_newton_step_;
642 Eigen::ColPivHouseholderQR<Matrix> basis_qr(basis_vectors);
643
644 switch (basis_qr.rank()) {
645 case 0:
646 // This should never happen, as it implies that both the gradient
647 // and the Gauss-Newton step are zero. In this case, the minimizer should
648 // have stopped due to the gradient being too small.
649 LOG(ERROR) << "Rank of subspace basis is 0. "
650 << "This means that the gradient at the current iterate is "
651 << "zero but the optimization has not been terminated. "
652 << "You may have found a bug in Ceres.";
653 return false;
654
655 case 1:
656 // Gradient and Gauss-Newton step coincide, so we lie on one of the
657 // major axes of the quadratic problem. In this case, we simply move
658 // along the gradient until we reach the trust region boundary.
659 subspace_is_one_dimensional_ = true;
660 return true;
661
662 case 2:
663 subspace_is_one_dimensional_ = false;
664 break;
665
666 default:
667 LOG(ERROR) << "Rank of the subspace basis matrix is reported to be "
668 << "greater than 2. As the matrix contains only two "
669 << "columns this cannot be true and is indicative of "
670 << "a bug.";
671 return false;
672 }
673
674 // The subspace is two-dimensional, so compute the subspace model.
675 // Given the basis U, this is
676 //
677 // subspace_g_ = g_scaled^T U
678 //
679 // and
680 //
681 // subspace_B_ = U^T (J_scaled^T J_scaled) U
682 //
683 // As J_scaled = J * D^-1, the latter becomes
684 //
685 // subspace_B_ = ((U^T D^-1) J^T) (J (D^-1 U))
686 // = (J (D^-1 U))^T (J (D^-1 U))
687
688 subspace_basis_ =
689 basis_qr.householderQ() * Matrix::Identity(jacobian->num_cols(), 2);
690
691 subspace_g_ = subspace_basis_.transpose() * gradient_;
692
693 Eigen::Matrix<double, 2, Eigen::Dynamic, Eigen::RowMajor>
694 Jb(2, jacobian->num_rows());
695 Jb.setZero();
696
697 Vector tmp;
698 tmp = (subspace_basis_.col(0).array() / diagonal_.array()).matrix();
699 jacobian->RightMultiply(tmp.data(), Jb.row(0).data());
700 tmp = (subspace_basis_.col(1).array() / diagonal_.array()).matrix();
701 jacobian->RightMultiply(tmp.data(), Jb.row(1).data());
702
703 subspace_B_ = Jb * Jb.transpose();
704
705 return true;
706}
707
Sameer Agarwalfa015192012-06-11 14:21:42 -0700708} // namespace internal
709} // namespace ceres