blob: c24fe435f5449c7d8cd23abb0133853f431f95ee [file] [log] [blame]
Keir Mierle8ebb0732012-04-30 23:09:08 -07001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
31#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
32#define CERES_INTERNAL_SCHUR_ELIMINATOR_H_
33
34#include <map>
35#include <vector>
36#include "ceres/mutex.h"
37#include "ceres/block_random_access_matrix.h"
38#include "ceres/block_sparse_matrix.h"
39#include "ceres/block_structure.h"
40#include "ceres/linear_solver.h"
41#include "ceres/internal/eigen.h"
42#include "ceres/internal/scoped_ptr.h"
43
44namespace ceres {
45namespace internal {
46
47// Classes implementing the SchurEliminatorBase interface implement
48// variable elimination for linear least squares problems. Assuming
49// that the input linear system Ax = b can be partitioned into
50//
51// E y + F z = b
52//
53// Where x = [y;z] is a partition of the variables. The paritioning
54// of the variables is such that, E'E is a block diagonal matrix. Or
55// in other words, the parameter blocks in E form an independent set
56// of the of the graph implied by the block matrix A'A. Then, this
57// class provides the functionality to compute the Schur complement
58// system
59//
60// S z = r
61//
62// where
63//
64// S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
65//
66// This is the Eliminate operation, i.e., construct the linear system
67// obtained by eliminating the variables in E.
68//
69// The eliminator also provides the reverse functionality, i.e. given
70// values for z it can back substitute for the values of y, by solving the
71// linear system
72//
73// Ey = b - F z
74//
75// which is done by observing that
76//
77// y = (E'E)^(-1) [E'b - E'F z]
78//
79// The eliminator has a number of requirements.
80//
81// The rows of A are ordered so that for every variable block in y,
82// all the rows containing that variable block occur as a vertically
83// contiguous block. i.e the matrix A looks like
84//
85// E F chunk
86// A = [ y1 0 0 0 | z1 0 0 0 z5] 1
87// [ y1 0 0 0 | z1 z2 0 0 0] 1
88// [ 0 y2 0 0 | 0 0 z3 0 0] 2
89// [ 0 0 y3 0 | z1 z2 z3 z4 z5] 3
90// [ 0 0 y3 0 | z1 0 0 0 z5] 3
91// [ 0 0 0 y4 | 0 0 0 0 z5] 4
92// [ 0 0 0 y4 | 0 z2 0 0 0] 4
93// [ 0 0 0 y4 | 0 0 0 0 0] 4
94// [ 0 0 0 0 | z1 0 0 0 0] non chunk blocks
95// [ 0 0 0 0 | 0 0 z3 z4 z5] non chunk blocks
96//
97// This structure should be reflected in the corresponding
98// CompressedRowBlockStructure object associated with A. The linear
99// system Ax = b should either be well posed or the array D below
100// should be non-null and the diagonal matrix corresponding to it
101// should be non-singular. For simplicity of exposition only the case
102// with a null D is described.
103//
104// The usual way to do the elimination is as follows. Starting with
105//
106// E y + F z = b
107//
108// we can form the normal equations,
109//
110// E'E y + E'F z = E'b
111// F'E y + F'F z = F'b
112//
113// multiplying both sides of the first equation by (E'E)^(-1) and then
114// by F'E we get
115//
116// F'E y + F'E (E'E)^(-1) E'F z = F'E (E'E)^(-1) E'b
117// F'E y + F'F z = F'b
118//
119// now subtracting the two equations we get
120//
121// [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
122//
123// Instead of forming the normal equations and operating on them as
124// general sparse matrices, the algorithm here deals with one
125// parameter block in y at a time. The rows corresponding to a single
126// parameter block yi are known as a chunk, and the algorithm operates
127// on one chunk at a time. The mathematics remains the same since the
128// reduced linear system can be shown to be the sum of the reduced
129// linear systems for each chunk. This can be seen by observing two
130// things.
131//
132// 1. E'E is a block diagonal matrix.
133//
134// 2. When E'F is computed, only the terms within a single chunk
135// interact, i.e for y1 column blocks when transposed and multiplied
136// with F, the only non-zero contribution comes from the blocks in
137// chunk1.
138//
139// Thus, the reduced linear system
140//
141// FF' - F'E (E'E)^(-1) E'F
142//
143// can be re-written as
144//
145// sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
146//
147// Where the sum is over chunks and E_k'E_k is dense matrix of size y1
148// x y1.
149//
150// Advanced usage. Uptil now it has been assumed that the user would
151// be interested in all of the Schur Complement S. However, it is also
152// possible to use this eliminator to obtain an arbitrary submatrix of
153// the full Schur complement. When the eliminator is generating the
154// blocks of S, it asks the RandomAccessBlockMatrix instance passed to
155// it if it has storage for that block. If it does, the eliminator
156// computes/updates it, if not it is skipped. This is useful when one
157// is interested in constructing a preconditioner based on the Schur
158// Complement, e.g., computing the block diagonal of S so that it can
159// be used as a preconditioner for an Iterative Substructuring based
160// solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
161// 2008 for an example of such use].
162//
163// Example usage: Please see schur_complement_solver.cc
164class SchurEliminatorBase {
165 public:
166 virtual ~SchurEliminatorBase() {}
167
168 // Initialize the eliminator. It is the user's responsibilty to call
169 // this function before calling Eliminate or BackSubstitute. It is
170 // also the caller's responsibilty to ensure that the
171 // CompressedRowBlockStructure object passed to this method is the
172 // same one (or is equivalent to) the one associated with the
173 // BlockSparseMatrixBase objects below.
174 virtual void Init(int num_eliminate_blocks,
175 const CompressedRowBlockStructure* bs) = 0;
176
177 // Compute the Schur complement system from the augmented linear
178 // least squares problem [A;D] x = [b;0]. The left hand side and the
179 // right hand side of the reduced linear system are returned in lhs
180 // and rhs respectively.
181 //
182 // It is the caller's responsibility to construct and initialize
183 // lhs. Depending upon the structure of the lhs object passed here,
184 // the full or a submatrix of the Schur complement will be computed.
185 //
186 // Since the Schur complement is a symmetric matrix, only the upper
187 // triangular part of the Schur complement is computed.
188 virtual void Eliminate(const BlockSparseMatrixBase* A,
189 const double* b,
190 const double* D,
191 BlockRandomAccessMatrix* lhs,
192 double* rhs) = 0;
193
194 // Given values for the variables z in the F block of A, solve for
195 // the optimal values of the variables y corresponding to the E
196 // block in A.
197 virtual void BackSubstitute(const BlockSparseMatrixBase* A,
198 const double* b,
199 const double* D,
200 const double* z,
201 double* y) = 0;
202 // Factory
203 static SchurEliminatorBase* Create(const LinearSolver::Options& options);
204};
205
206// Templated implementation of the SchurEliminatorBase interface. The
207// templating is on the sizes of the row, e and f blocks sizes in the
208// input matrix. In many problems, the sizes of one or more of these
209// blocks are constant, in that case, its worth passing these
210// parameters as template arguments so that they are visible to the
211// compiler and can be used for compile time optimization of the low
212// level linear algebra routines.
213//
214// This implementation is mulithreaded using OpenMP. The level of
215// parallelism is controlled by LinearSolver::Options::num_threads.
216template <int kRowBlockSize = Dynamic,
217 int kEBlockSize = Dynamic,
218 int kFBlockSize = Dynamic >
219class SchurEliminator : public SchurEliminatorBase {
220 public:
221 explicit SchurEliminator(const LinearSolver::Options& options)
222 : num_threads_(options.num_threads) {
223 }
224
225 // SchurEliminatorBase Interface
226 virtual ~SchurEliminator();
227 virtual void Init(int num_eliminate_blocks,
228 const CompressedRowBlockStructure* bs);
229 virtual void Eliminate(const BlockSparseMatrixBase* A,
230 const double* b,
231 const double* D,
232 BlockRandomAccessMatrix* lhs,
233 double* rhs);
234 virtual void BackSubstitute(const BlockSparseMatrixBase* A,
235 const double* b,
236 const double* D,
237 const double* z,
238 double* y);
239
240 private:
241 // Chunk objects store combinatorial information needed to
242 // efficiently eliminate a whole chunk out of the least squares
243 // problem. Consider the first chunk in the example matrix above.
244 //
245 // [ y1 0 0 0 | z1 0 0 0 z5]
246 // [ y1 0 0 0 | z1 z2 0 0 0]
247 //
248 // One of the intermediate quantities that needs to be calculated is
249 // for each row the product of the y block transposed with the
250 // non-zero z block, and the sum of these blocks across rows. A
251 // temporary array "buffer_" is used for computing and storing them
252 // and the buffer_layout maps the indices of the z-blocks to
253 // position in the buffer_ array. The size of the chunk is the
254 // number of row blocks/residual blocks for the particular y block
255 // being considered.
256 //
257 // For the example chunk shown above,
258 //
259 // size = 2
260 //
261 // The entries of buffer_layout will be filled in the following order.
262 //
263 // buffer_layout[z1] = 0
264 // buffer_layout[z5] = y1 * z1
265 // buffer_layout[z2] = y1 * z1 + y1 * z5
266 typedef map<int, int> BufferLayoutType;
267 struct Chunk {
268 Chunk() : size(0) {}
269 int size;
270 int start;
271 BufferLayoutType buffer_layout;
272 };
273
274 void ChunkDiagonalBlockAndGradient(
275 const Chunk& chunk,
276 const BlockSparseMatrixBase* A,
277 const double* b,
278 int row_block_counter,
279 typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
280 typename EigenTypes<kEBlockSize>::Vector* g,
281 double* buffer,
282 BlockRandomAccessMatrix* lhs);
283
284 void UpdateRhs(const Chunk& chunk,
285 const BlockSparseMatrixBase* A,
286 const double* b,
287 int row_block_counter,
288 const Vector& inverse_ete_g,
289 double* rhs);
290
291 void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
292 const Matrix& inverse_eet,
293 const double* buffer,
294 const BufferLayoutType& buffer_layout,
295 BlockRandomAccessMatrix* lhs);
296 void EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
297 int row_block_index,
298 BlockRandomAccessMatrix* lhs);
299
300
301 void NoEBlockRowsUpdate(const BlockSparseMatrixBase* A,
302 const double* b,
303 int row_block_counter,
304 BlockRandomAccessMatrix* lhs,
305 double* rhs);
306
307 void NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
308 int row_block_index,
309 BlockRandomAccessMatrix* lhs);
310
311 int num_eliminate_blocks_;
312
313 // Block layout of the columns of the reduced linear system. Since
314 // the f blocks can be of varying size, this vector stores the
315 // position of each f block in the row/col of the reduced linear
316 // system. Thus lhs_row_layout_[i] is the row/col position of the
317 // i^th f block.
318 vector<int> lhs_row_layout_;
319
320 // Combinatorial structure of the chunks in A. For more information
321 // see the documentation of the Chunk object above.
322 vector<Chunk> chunks_;
323
324 // Buffer to store the products of the y and z blocks generated
325 // during the elimination phase.
326 scoped_array<double> buffer_;
327 int buffer_size_;
328 int num_threads_;
329 int uneliminated_row_begins_;
330
331 // Locks for the blocks in the right hand side of the reduced linear
332 // system.
333 vector<Mutex*> rhs_locks_;
334};
335
336} // namespace internal
337} // namespace ceres
338
339#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_H_