Use elliptical trust region in DoglegStrategy.
In the Levenberg-Marquardt algorithm, the diagonal of J^T J is used to
regularize the problem. This corresponds to an elliptical trust region
|| D step || <= r, where D = sqrt(diag(J^T J)).
This commit adds the same elliptical trust region to the dogleg
strategy. The trust region problem becomes
min. x^T H x + g^T x
s.t. || D x || <= r
By substituting y = D x, it becomes
min. y^T D^-1 H D^-1 y + g^T D^-1 y
s.t. || y || <= r
which is the traditional spherical trust region problem.
This commit changes the DoglegStrategy so that the Gauss-Newton point,
the gradient, and the Cauchy point are scaled correctly (without
modifying the Jacobian directly). Then the dogleg step is computed the
same way as before, and finally the step is rescaled to obtain
x = D^-1 y.
Change-Id: Iea25a9113ecba911b746e269bc6e6fe51cb59003
diff --git a/internal/ceres/dogleg_strategy.cc b/internal/ceres/dogleg_strategy.cc
index a62a7bd..1687bcd 100644
--- a/internal/ceres/dogleg_strategy.cc
+++ b/internal/ceres/dogleg_strategy.cc
@@ -101,20 +101,19 @@
}
// Vector used to form the diagonal matrix that is used to
- // regularize the Gauss-Newton solve.
+ // regularize the Gauss-Newton solve and that defines the
+ // elliptical trust region
+ //
+ // || D * step || <= radius_ .
+ //
jacobian->SquaredColumnNorm(diagonal_.data());
for (int i = 0; i < n; ++i) {
diagonal_[i] = min(max(diagonal_[i], min_diagonal_), max_diagonal_);
+ diagonal_[i] = std::sqrt(diagonal_[i]);
}
- gradient_.setZero();
- jacobian->LeftMultiply(residuals, gradient_.data());
-
- // alpha * gradient is the Cauchy point.
- Vector Jg(jacobian->num_rows());
- Jg.setZero();
- jacobian->RightMultiply(gradient_.data(), Jg.data());
- alpha_ = gradient_.squaredNorm() / Jg.squaredNorm();
+ ComputeGradient(jacobian, residuals);
+ ComputeCauchyPoint(jacobian);
LinearSolver::Summary linear_solver_summary =
ComputeGaussNewtonStep(jacobian, residuals);
@@ -127,6 +126,33 @@
return linear_solver_summary;
}
+// The trust region is assumed to be elliptical with the
+// diagonal scaling matrix D defined by sqrt(diagonal_).
+// It is implemented by substituting step' = D * step.
+// The trust region for step' is spherical.
+// The gradient, the Gauss-Newton step, the Cauchy point,
+// and all calculations involving the Jacobian have to
+// be adjusted accordingly.
+void DoglegStrategy::ComputeGradient(
+ SparseMatrix* jacobian,
+ const double* residuals) {
+ gradient_.setZero();
+ jacobian->LeftMultiply(residuals, gradient_.data());
+ gradient_.array() /= diagonal_.array();
+}
+
+void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
+ // alpha * gradient is the Cauchy point.
+ Vector Jg(jacobian->num_rows());
+ Jg.setZero();
+ // The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
+ // instead of (J * D^-1) * (D^-1 * g).
+ Vector scaled_gradient =
+ (gradient_.array() / diagonal_.array()).matrix();
+ jacobian->RightMultiply(scaled_gradient.data(), Jg.data());
+ alpha_ = gradient_.squaredNorm() / Jg.squaredNorm();
+}
+
void DoglegStrategy::ComputeDoglegStep(double* dogleg) {
VectorRef dogleg_step(dogleg, gradient_.rows());
@@ -137,6 +163,7 @@
if (gauss_newton_norm <= radius_) {
dogleg_step = gauss_newton_step_;
dogleg_step_norm_ = gauss_newton_norm;
+ dogleg_step.array() /= diagonal_.array();
VLOG(3) << "GaussNewton step size: " << dogleg_step_norm_
<< " radius: " << radius_;
return;
@@ -148,6 +175,7 @@
if (gradient_norm * alpha_ >= radius_) {
dogleg_step = (radius_ / gradient_norm) * gradient_;
dogleg_step_norm_ = radius_;
+ dogleg_step.array() /= diagonal_.array();
VLOG(3) << "Cauchy step size: " << dogleg_step_norm_
<< " radius: " << radius_;
return;
@@ -177,6 +205,7 @@
: (radius_ * radius_ - a_squared_norm) / (d + c);
dogleg_step = (alpha_ * (1.0 - beta)) * gradient_ + beta * gauss_newton_step_;
dogleg_step_norm_ = dogleg_step.norm();
+ dogleg_step.array() /= diagonal_.array();
VLOG(3) << "Dogleg step size: " << dogleg_step_norm_
<< " radius: " << radius_;
}
@@ -223,7 +252,7 @@
solve_options.q_tolerance = 0.0;
solve_options.r_tolerance = 0.0;
- lm_diagonal_ = (diagonal_ * mu_).array().sqrt();
+ lm_diagonal_ = diagonal_ * std::sqrt(mu_);
solve_options.D = lm_diagonal_.data();
InvalidateArray(n, gauss_newton_step_.data());
@@ -242,6 +271,14 @@
break;
}
+ // The scaled Gauss-Newton step is D * GN:
+ //
+ // - (D^-1 J^T J D^-1)^-1 (D^-1 g)
+ // = - D (J^T J)^-1 D D^-1 g
+ // = D -(J^T J)^-1 g
+ //
+ gauss_newton_step_.array() *= diagonal_.array();
+
return linear_solver_summary;
}
diff --git a/internal/ceres/dogleg_strategy.h b/internal/ceres/dogleg_strategy.h
index 17eb075..1505655 100644
--- a/internal/ceres/dogleg_strategy.h
+++ b/internal/ceres/dogleg_strategy.h
@@ -65,9 +65,10 @@
virtual double Radius() const;
private:
- void ComputeCauchyStep();
LinearSolver::Summary ComputeGaussNewtonStep(SparseMatrix* jacobian,
const double* residuals);
+ void ComputeCauchyPoint(SparseMatrix* jacobian);
+ void ComputeGradient(SparseMatrix* jacobian, const double* residuals);
void ComputeDoglegStep(double* step);
LinearSolver* linear_solver_;
@@ -92,7 +93,7 @@
const double increase_threshold_;
const double decrease_threshold_;
- Vector diagonal_;
+ Vector diagonal_; // sqrt(diag(J^T J))
Vector lm_diagonal_;
Vector gradient_;