Blame - internal/ceres/schur_eliminator.h - ceres-solver

blob: c24fe435f5449c7d8cd23abb0133853f431f95ee [file] [log] [blame]

Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30
				31	#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
				32	#define CERES_INTERNAL_SCHUR_ELIMINATOR_H_
				33
				34	#include <map>
				35	#include <vector>
				36	#include "ceres/mutex.h"
				37	#include "ceres/block_random_access_matrix.h"
				38	#include "ceres/block_sparse_matrix.h"
				39	#include "ceres/block_structure.h"
				40	#include "ceres/linear_solver.h"
				41	#include "ceres/internal/eigen.h"
				42	#include "ceres/internal/scoped_ptr.h"
				43
				44	namespace ceres {
				45	namespace internal {
				46
				47	// Classes implementing the SchurEliminatorBase interface implement
				48	// variable elimination for linear least squares problems. Assuming
				49	// that the input linear system Ax = b can be partitioned into
				50	//
				51	// E y + F z = b
				52	//
				53	// Where x = [y;z] is a partition of the variables. The paritioning
				54	// of the variables is such that, E'E is a block diagonal matrix. Or
				55	// in other words, the parameter blocks in E form an independent set
				56	// of the of the graph implied by the block matrix A'A. Then, this
				57	// class provides the functionality to compute the Schur complement
				58	// system
				59	//
				60	// S z = r
				61	//
				62	// where
				63	//
				64	// S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
				65	//
				66	// This is the Eliminate operation, i.e., construct the linear system
				67	// obtained by eliminating the variables in E.
				68	//
				69	// The eliminator also provides the reverse functionality, i.e. given
				70	// values for z it can back substitute for the values of y, by solving the
				71	// linear system
				72	//
				73	// Ey = b - F z
				74	//
				75	// which is done by observing that
				76	//
				77	// y = (E'E)^(-1) [E'b - E'F z]
				78	//
				79	// The eliminator has a number of requirements.
				80	//
				81	// The rows of A are ordered so that for every variable block in y,
				82	// all the rows containing that variable block occur as a vertically
				83	// contiguous block. i.e the matrix A looks like
				84	//
				85	// E F chunk
				86	// A = [ y1 0 0 0 \| z1 0 0 0 z5] 1
				87	// [ y1 0 0 0 \| z1 z2 0 0 0] 1
				88	// [ 0 y2 0 0 \| 0 0 z3 0 0] 2
				89	// [ 0 0 y3 0 \| z1 z2 z3 z4 z5] 3
				90	// [ 0 0 y3 0 \| z1 0 0 0 z5] 3
				91	// [ 0 0 0 y4 \| 0 0 0 0 z5] 4
				92	// [ 0 0 0 y4 \| 0 z2 0 0 0] 4
				93	// [ 0 0 0 y4 \| 0 0 0 0 0] 4
				94	// [ 0 0 0 0 \| z1 0 0 0 0] non chunk blocks
				95	// [ 0 0 0 0 \| 0 0 z3 z4 z5] non chunk blocks
				96	//
				97	// This structure should be reflected in the corresponding
				98	// CompressedRowBlockStructure object associated with A. The linear
				99	// system Ax = b should either be well posed or the array D below
				100	// should be non-null and the diagonal matrix corresponding to it
				101	// should be non-singular. For simplicity of exposition only the case
				102	// with a null D is described.
				103	//
				104	// The usual way to do the elimination is as follows. Starting with
				105	//
				106	// E y + F z = b
				107	//
				108	// we can form the normal equations,
				109	//
				110	// E'E y + E'F z = E'b
				111	// F'E y + F'F z = F'b
				112	//
				113	// multiplying both sides of the first equation by (E'E)^(-1) and then
				114	// by F'E we get
				115	//
				116	// F'E y + F'E (E'E)^(-1) E'F z = F'E (E'E)^(-1) E'b
				117	// F'E y + F'F z = F'b
				118	//
				119	// now subtracting the two equations we get
				120	//
				121	// [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
				122	//
				123	// Instead of forming the normal equations and operating on them as
				124	// general sparse matrices, the algorithm here deals with one
				125	// parameter block in y at a time. The rows corresponding to a single
				126	// parameter block yi are known as a chunk, and the algorithm operates
				127	// on one chunk at a time. The mathematics remains the same since the
				128	// reduced linear system can be shown to be the sum of the reduced
				129	// linear systems for each chunk. This can be seen by observing two
				130	// things.
				131	//
				132	// 1. E'E is a block diagonal matrix.
				133	//
				134	// 2. When E'F is computed, only the terms within a single chunk
				135	// interact, i.e for y1 column blocks when transposed and multiplied
				136	// with F, the only non-zero contribution comes from the blocks in
				137	// chunk1.
				138	//
				139	// Thus, the reduced linear system
				140	//
				141	// FF' - F'E (E'E)^(-1) E'F
				142	//
				143	// can be re-written as
				144	//
				145	// sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
				146	//
				147	// Where the sum is over chunks and E_k'E_k is dense matrix of size y1
				148	// x y1.
				149	//
				150	// Advanced usage. Uptil now it has been assumed that the user would
				151	// be interested in all of the Schur Complement S. However, it is also
				152	// possible to use this eliminator to obtain an arbitrary submatrix of
				153	// the full Schur complement. When the eliminator is generating the
				154	// blocks of S, it asks the RandomAccessBlockMatrix instance passed to
				155	// it if it has storage for that block. If it does, the eliminator
				156	// computes/updates it, if not it is skipped. This is useful when one
				157	// is interested in constructing a preconditioner based on the Schur
				158	// Complement, e.g., computing the block diagonal of S so that it can
				159	// be used as a preconditioner for an Iterative Substructuring based
				160	// solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
				161	// 2008 for an example of such use].
				162	//
				163	// Example usage: Please see schur_complement_solver.cc
				164	class SchurEliminatorBase {
				165	public:
				166	virtual ~SchurEliminatorBase() {}
				167
				168	// Initialize the eliminator. It is the user's responsibilty to call
				169	// this function before calling Eliminate or BackSubstitute. It is
				170	// also the caller's responsibilty to ensure that the
				171	// CompressedRowBlockStructure object passed to this method is the
				172	// same one (or is equivalent to) the one associated with the
				173	// BlockSparseMatrixBase objects below.
				174	virtual void Init(int num_eliminate_blocks,
				175	const CompressedRowBlockStructure* bs) = 0;
				176
				177	// Compute the Schur complement system from the augmented linear
				178	// least squares problem [A;D] x = [b;0]. The left hand side and the
				179	// right hand side of the reduced linear system are returned in lhs
				180	// and rhs respectively.
				181	//
				182	// It is the caller's responsibility to construct and initialize
				183	// lhs. Depending upon the structure of the lhs object passed here,
				184	// the full or a submatrix of the Schur complement will be computed.
				185	//
				186	// Since the Schur complement is a symmetric matrix, only the upper
				187	// triangular part of the Schur complement is computed.
				188	virtual void Eliminate(const BlockSparseMatrixBase* A,
				189	const double* b,
				190	const double* D,
				191	BlockRandomAccessMatrix* lhs,
				192	double* rhs) = 0;
				193
				194	// Given values for the variables z in the F block of A, solve for
				195	// the optimal values of the variables y corresponding to the E
				196	// block in A.
				197	virtual void BackSubstitute(const BlockSparseMatrixBase* A,
				198	const double* b,
				199	const double* D,
				200	const double* z,
				201	double* y) = 0;
				202	// Factory
				203	static SchurEliminatorBase* Create(const LinearSolver::Options& options);
				204	};
				205
				206	// Templated implementation of the SchurEliminatorBase interface. The
				207	// templating is on the sizes of the row, e and f blocks sizes in the
				208	// input matrix. In many problems, the sizes of one or more of these
				209	// blocks are constant, in that case, its worth passing these
				210	// parameters as template arguments so that they are visible to the
				211	// compiler and can be used for compile time optimization of the low
				212	// level linear algebra routines.
				213	//
				214	// This implementation is mulithreaded using OpenMP. The level of
				215	// parallelism is controlled by LinearSolver::Options::num_threads.
				216	template <int kRowBlockSize = Dynamic,
				217	int kEBlockSize = Dynamic,
				218	int kFBlockSize = Dynamic >
				219	class SchurEliminator : public SchurEliminatorBase {
				220	public:
				221	explicit SchurEliminator(const LinearSolver::Options& options)
				222	: num_threads_(options.num_threads) {
				223	}
				224
				225	// SchurEliminatorBase Interface
				226	virtual ~SchurEliminator();
				227	virtual void Init(int num_eliminate_blocks,
				228	const CompressedRowBlockStructure* bs);
				229	virtual void Eliminate(const BlockSparseMatrixBase* A,
				230	const double* b,
				231	const double* D,
				232	BlockRandomAccessMatrix* lhs,
				233	double* rhs);
				234	virtual void BackSubstitute(const BlockSparseMatrixBase* A,
				235	const double* b,
				236	const double* D,
				237	const double* z,
				238	double* y);
				239
				240	private:
				241	// Chunk objects store combinatorial information needed to
				242	// efficiently eliminate a whole chunk out of the least squares
				243	// problem. Consider the first chunk in the example matrix above.
				244	//
				245	// [ y1 0 0 0 \| z1 0 0 0 z5]
				246	// [ y1 0 0 0 \| z1 z2 0 0 0]
				247	//
				248	// One of the intermediate quantities that needs to be calculated is
				249	// for each row the product of the y block transposed with the
				250	// non-zero z block, and the sum of these blocks across rows. A
				251	// temporary array "buffer_" is used for computing and storing them
				252	// and the buffer_layout maps the indices of the z-blocks to
				253	// position in the buffer_ array. The size of the chunk is the
				254	// number of row blocks/residual blocks for the particular y block
				255	// being considered.
				256	//
				257	// For the example chunk shown above,
				258	//
				259	// size = 2
				260	//
				261	// The entries of buffer_layout will be filled in the following order.
				262	//
				263	// buffer_layout[z1] = 0
				264	// buffer_layout[z5] = y1 * z1
				265	// buffer_layout[z2] = y1 * z1 + y1 * z5
				266	typedef map<int, int> BufferLayoutType;
				267	struct Chunk {
				268	Chunk() : size(0) {}
				269	int size;
				270	int start;
				271	BufferLayoutType buffer_layout;
				272	};
				273
				274	void ChunkDiagonalBlockAndGradient(
				275	const Chunk& chunk,
				276	const BlockSparseMatrixBase* A,
				277	const double* b,
				278	int row_block_counter,
				279	typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
				280	typename EigenTypes<kEBlockSize>::Vector* g,
				281	double* buffer,
				282	BlockRandomAccessMatrix* lhs);
				283
				284	void UpdateRhs(const Chunk& chunk,
				285	const BlockSparseMatrixBase* A,
				286	const double* b,
				287	int row_block_counter,
				288	const Vector& inverse_ete_g,
				289	double* rhs);
				290
				291	void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
				292	const Matrix& inverse_eet,
				293	const double* buffer,
				294	const BufferLayoutType& buffer_layout,
				295	BlockRandomAccessMatrix* lhs);
				296	void EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
				297	int row_block_index,
				298	BlockRandomAccessMatrix* lhs);
				299
				300
				301	void NoEBlockRowsUpdate(const BlockSparseMatrixBase* A,
				302	const double* b,
				303	int row_block_counter,
				304	BlockRandomAccessMatrix* lhs,
				305	double* rhs);
				306
				307	void NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
				308	int row_block_index,
				309	BlockRandomAccessMatrix* lhs);
				310
				311	int num_eliminate_blocks_;
				312
				313	// Block layout of the columns of the reduced linear system. Since
				314	// the f blocks can be of varying size, this vector stores the
				315	// position of each f block in the row/col of the reduced linear
				316	// system. Thus lhs_row_layout_[i] is the row/col position of the
				317	// i^th f block.
				318	vector<int> lhs_row_layout_;
				319
				320	// Combinatorial structure of the chunks in A. For more information
				321	// see the documentation of the Chunk object above.
				322	vector<Chunk> chunks_;
				323
				324	// Buffer to store the products of the y and z blocks generated
				325	// during the elimination phase.
				326	scoped_array<double> buffer_;
				327	int buffer_size_;
				328	int num_threads_;
				329	int uneliminated_row_begins_;
				330
				331	// Locks for the blocks in the right hand side of the reduced linear
				332	// system.
				333	vector<Mutex*> rhs_locks_;
				334	};
				335
				336	} // namespace internal
				337	} // namespace ceres
				338
				339	#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_H_