Blame - internal/ceres/schur_eliminator_impl.h - ceres-solver

blob: c09b7fb3a77bd2ec2220806936e5aaa9c9f2ff27 [file] [log] [blame]

Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
				3	// http://code.google.com/p/ceres-solver/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// TODO(sameeragarwal): row_block_counter can perhaps be replaced by
				32	// Chunk::start ?
				33
				34	#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
				35	#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
				36
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	37	// Eigen has an internal threshold switching between different matrix
				38	// multiplication algorithms. In particular for matrices larger than
				39	// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
				40	// matrix matrix product algorithm that has a higher setup cost. For
				41	// matrix sizes close to this threshold, especially when the matrices
				42	// are thin and long, the default choice may not be optimal. This is
				43	// the case for us, as the default choice causes a 30% performance
				44	// regression when we moved from Eigen2 to Eigen3.
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	45
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	46	#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
				47
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	48	#ifdef CERES_USE_OPENMP
				49	#include <omp.h>
				50	#endif
				51
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	52	#include <algorithm>
				53	#include <map>
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	54	#include "ceres/block_random_access_matrix.h"
				55	#include "ceres/block_sparse_matrix.h"
				56	#include "ceres/block_structure.h"
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	57	#include "ceres/internal/eigen.h"
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	58	#include "ceres/internal/fixed_array.h"
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	59	#include "ceres/internal/scoped_ptr.h"
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	60	#include "ceres/map_util.h"
				61	#include "ceres/schur_eliminator.h"
Sameer Agarwal	367b65e	2013-08-09 10:35:37 -0700	[diff] [blame^]	62	#include "ceres/small_blas.h"
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	63	#include "ceres/stl_util.h"
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	64	#include "Eigen/Dense"
Sameer Agarwal	509f68c	2013-02-20 01:39:03 -0800	[diff] [blame]	65	#include "glog/logging.h"
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	66
				67	namespace ceres {
				68	namespace internal {
				69
				70	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				71	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() {
				72	STLDeleteElements(&rhs_locks_);
				73	}
				74
				75	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				76	void
				77	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
				78	Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
				79	CHECK_GT(num_eliminate_blocks, 0)
				80	<< "SchurComplementSolver cannot be initialized with "
				81	<< "num_eliminate_blocks = 0.";
				82
				83	num_eliminate_blocks_ = num_eliminate_blocks;
				84
				85	const int num_col_blocks = bs->cols.size();
				86	const int num_row_blocks = bs->rows.size();
				87
				88	buffer_size_ = 1;
				89	chunks_.clear();
				90	lhs_row_layout_.clear();
				91
				92	int lhs_num_rows = 0;
				93	// Add a map object for each block in the reduced linear system
				94	// and build the row/column block structure of the reduced linear
				95	// system.
				96	lhs_row_layout_.resize(num_col_blocks - num_eliminate_blocks_);
				97	for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) {
				98	lhs_row_layout_[i - num_eliminate_blocks_] = lhs_num_rows;
				99	lhs_num_rows += bs->cols[i].size;
				100	}
				101
				102	int r = 0;
				103	// Iterate over the row blocks of A, and detect the chunks. The
				104	// matrix should already have been ordered so that all rows
				105	// containing the same y block are vertically contiguous. Along
				106	// the way also compute the amount of space each chunk will need
				107	// to perform the elimination.
				108	while (r < num_row_blocks) {
				109	const int chunk_block_id = bs->rows[r].cells.front().block_id;
				110	if (chunk_block_id >= num_eliminate_blocks_) {
				111	break;
				112	}
				113
				114	chunks_.push_back(Chunk());
				115	Chunk& chunk = chunks_.back();
				116	chunk.size = 0;
				117	chunk.start = r;
				118	int buffer_size = 0;
				119	const int e_block_size = bs->cols[chunk_block_id].size;
				120
				121	// Add to the chunk until the first block in the row is
				122	// different than the one in the first row for the chunk.
				123	while (r + chunk.size < num_row_blocks) {
				124	const CompressedRow& row = bs->rows[r + chunk.size];
				125	if (row.cells.front().block_id != chunk_block_id) {
				126	break;
				127	}
				128
				129	// Iterate over the blocks in the row, ignoring the first
				130	// block since it is the one to be eliminated.
				131	for (int c = 1; c < row.cells.size(); ++c) {
				132	const Cell& cell = row.cells[c];
				133	if (InsertIfNotPresent(
				134	&(chunk.buffer_layout), cell.block_id, buffer_size)) {
				135	buffer_size += e_block_size * bs->cols[cell.block_id].size;
				136	}
				137	}
				138
				139	buffer_size_ = max(buffer_size, buffer_size_);
				140	++chunk.size;
				141	}
				142
				143	CHECK_GT(chunk.size, 0);
				144	r += chunk.size;
				145	}
				146	const Chunk& chunk = chunks_.back();
				147
				148	uneliminated_row_begins_ = chunk.start + chunk.size;
				149	if (num_threads_ > 1) {
				150	random_shuffle(chunks_.begin(), chunks_.end());
				151	}
				152
				153	buffer_.reset(new double[buffer_size_ * num_threads_]);
				154
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	155	// chunk_outer_product_buffer_ only needs to store e_block_size *
				156	// f_block_size, which is always less than buffer_size_, so we just
				157	// allocate buffer_size_ per thread.
				158	chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
				159
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	160	STLDeleteElements(&rhs_locks_);
				161	rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
				162	for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
				163	rhs_locks_[i] = new Mutex;
				164	}
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	165	}
				166
				167	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				168	void
				169	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	170	Eliminate(const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	171	const double* b,
				172	const double* D,
				173	BlockRandomAccessMatrix* lhs,
				174	double* rhs) {
				175	if (lhs->num_rows() > 0) {
				176	lhs->SetZero();
				177	VectorRef(rhs, lhs->num_rows()).setZero();
				178	}
				179
				180	const CompressedRowBlockStructure* bs = A->block_structure();
				181	const int num_col_blocks = bs->cols.size();
				182
				183	// Add the diagonal to the schur complement.
				184	if (D != NULL) {
				185	#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
				186	for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) {
				187	const int block_id = i - num_eliminate_blocks_;
				188	int r, c, row_stride, col_stride;
				189	CellInfo* cell_info = lhs->GetCell(block_id, block_id,
				190	&r, &c,
				191	&row_stride, &col_stride);
				192	if (cell_info != NULL) {
				193	const int block_size = bs->cols[i].size;
				194	typename EigenTypes<kFBlockSize>::ConstVectorRef
				195	diag(D + bs->cols[i].position, block_size);
				196
Keir Mierle	ff71d74	2012-08-10 17:05:15 -0700	[diff] [blame]	197	CeresMutexLock l(&cell_info->m);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	198	MatrixRef m(cell_info->values, row_stride, col_stride);
				199	m.block(r, c, block_size, block_size).diagonal()
				200	+= diag.array().square().matrix();
				201	}
				202	}
				203	}
				204
				205	// Eliminate y blocks one chunk at a time. For each chunk,x3
				206	// compute the entries of the normal equations and the gradient
				207	// vector block corresponding to the y block and then apply
				208	// Gaussian elimination to them. The matrix ete stores the normal
				209	// matrix corresponding to the block being eliminated and array
				210	// buffer_ contains the non-zero blocks in the row corresponding
				211	// to this y block in the normal equations. This computation is
				212	// done in ChunkDiagonalBlockAndGradient. UpdateRhs then applies
				213	// gaussian elimination to the rhs of the normal equations,
				214	// updating the rhs of the reduced linear system by modifying rhs
				215	// blocks for all the z blocks that share a row block/residual
				216	// term with the y block. EliminateRowOuterProduct does the
				217	// corresponding operation for the lhs of the reduced linear
				218	// system.
				219	#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
				220	for (int i = 0; i < chunks_.size(); ++i) {
				221	#ifdef CERES_USE_OPENMP
				222	int thread_id = omp_get_thread_num();
				223	#else
				224	int thread_id = 0;
				225	#endif
				226	double* buffer = buffer_.get() + thread_id * buffer_size_;
				227	const Chunk& chunk = chunks_[i];
				228	const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
				229	const int e_block_size = bs->cols[e_block_id].size;
				230
				231	VectorRef(buffer, buffer_size_).setZero();
				232
				233	typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
				234	ete(e_block_size, e_block_size);
				235
				236	if (D != NULL) {
				237	const typename EigenTypes<kEBlockSize>::ConstVectorRef
				238	diag(D + bs->cols[e_block_id].position, e_block_size);
				239	ete = diag.array().square().matrix().asDiagonal();
				240	} else {
				241	ete.setZero();
				242	}
				243
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	244	FixedArray<double, 8> g(e_block_size);
				245	typename EigenTypes<kEBlockSize>::VectorRef gref(g.get(), e_block_size);
				246	gref.setZero();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	247
				248	// We are going to be computing
				249	//
				250	// S += F'F - F'E(E'E)^{-1}E'F
				251	//
				252	// for each Chunk. The computation is broken down into a number of
				253	// function calls as below.
				254
				255	// Compute the outer product of the e_blocks with themselves (ete
				256	// = E'E). Compute the product of the e_blocks with the
				257	// corresonding f_blocks (buffer = E'F), the gradient of the terms
				258	// in this chunk (g) and add the outer product of the f_blocks to
				259	// Schur complement (S += F'F).
				260	ChunkDiagonalBlockAndGradient(
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	261	chunk, A, b, chunk.start, &ete, g.get(), buffer, lhs);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	262
				263	// Normally one wouldn't compute the inverse explicitly, but
				264	// e_block_size will typically be a small number like 3, in
				265	// which case its much faster to compute the inverse once and
				266	// use it to multiply other matrices/vectors instead of doing a
				267	// Solve call over and over again.
				268	typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
				269	ete
				270	.template selfadjointView<Eigen::Upper>()
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	271	.llt()
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	272	.solve(Matrix::Identity(e_block_size, e_block_size));
				273
				274	// For the current chunk compute and update the rhs of the reduced
				275	// linear system.
				276	//
				277	// rhs = F'b - F'E(E'E)^(-1) E'b
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	278
				279	FixedArray<double, 8> inverse_ete_g(e_block_size);
				280	MatrixVectorMultiply<kEBlockSize, kEBlockSize, 0>(
Sameer Agarwal	e6707b2	2013-04-16 15:44:23 -0700	[diff] [blame]	281	inverse_ete.data(),
				282	e_block_size,
				283	e_block_size,
				284	g.get(),
				285	inverse_ete_g.get());
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	286
				287	UpdateRhs(chunk, A, b, chunk.start, inverse_ete_g.get(), rhs);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	288
				289	// S -= F'E(E'E)^{-1}E'F
				290	ChunkOuterProduct(bs, inverse_ete, buffer, chunk.buffer_layout, lhs);
				291	}
				292
				293	// For rows with no e_blocks, the schur complement update reduces to
				294	// S += F'F.
				295	NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs);
				296	}
				297
				298	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				299	void
				300	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	301	BackSubstitute(const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	302	const double* b,
				303	const double* D,
				304	const double* z,
				305	double* y) {
				306	const CompressedRowBlockStructure* bs = A->block_structure();
				307	#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
				308	for (int i = 0; i < chunks_.size(); ++i) {
				309	const Chunk& chunk = chunks_[i];
				310	const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
				311	const int e_block_size = bs->cols[e_block_id].size;
				312
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	313	double* y_ptr = y + bs->cols[e_block_id].position;
				314	typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	315
				316	typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
				317	ete(e_block_size, e_block_size);
				318	if (D != NULL) {
				319	const typename EigenTypes<kEBlockSize>::ConstVectorRef
				320	diag(D + bs->cols[e_block_id].position, e_block_size);
				321	ete = diag.array().square().matrix().asDiagonal();
				322	} else {
				323	ete.setZero();
				324	}
				325
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	326	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	327	for (int j = 0; j < chunk.size; ++j) {
				328	const CompressedRow& row = bs->rows[chunk.start + j];
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	329	const Cell& e_cell = row.cells.front();
				330	DCHECK_EQ(e_block_id, e_cell.block_id);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	331
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	332	FixedArray<double, 8> sj(row.block.size);
				333
				334	typename EigenTypes<kRowBlockSize>::VectorRef(sj.get(), row.block.size) =
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	335	typename EigenTypes<kRowBlockSize>::ConstVectorRef
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	336	(b + bs->rows[chunk.start + j].block.position, row.block.size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	337
				338	for (int c = 1; c < row.cells.size(); ++c) {
				339	const int f_block_id = row.cells[c].block_id;
				340	const int f_block_size = bs->cols[f_block_id].size;
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	341	const int r_block = f_block_id - num_eliminate_blocks_;
				342
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	343	MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	344	values + row.cells[c].position, row.block.size, f_block_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	345	z + lhs_row_layout_[r_block],
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	346	sj.get());
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	347	}
				348
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	349	MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	350	values + e_cell.position, row.block.size, e_block_size,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	351	sj.get(),
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	352	y_ptr);
				353
				354	MatrixTransposeMatrixMultiply
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	355	<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	356	values + e_cell.position, row.block.size, e_block_size,
				357	values + e_cell.position, row.block.size, e_block_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	358	ete.data(), 0, 0, e_block_size, e_block_size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	359	}
				360
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	361	ete.llt().solveInPlace(y_block);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	362	}
				363	}
				364
				365	// Update the rhs of the reduced linear system. Compute
				366	//
				367	// F'b - F'E(E'E)^(-1) E'b
				368	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				369	void
				370	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
				371	UpdateRhs(const Chunk& chunk,
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	372	const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	373	const double* b,
				374	int row_block_counter,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	375	const double* inverse_ete_g,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	376	double* rhs) {
				377	const CompressedRowBlockStructure* bs = A->block_structure();
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	378	const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
				379	const int e_block_size = bs->cols[e_block_id].size;
				380
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	381	int b_pos = bs->rows[row_block_counter].block.position;
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	382	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	383	for (int j = 0; j < chunk.size; ++j) {
				384	const CompressedRow& row = bs->rows[row_block_counter + j];
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	385	const Cell& e_cell = row.cells.front();
				386
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	387	typename EigenTypes<kRowBlockSize>::Vector sj =
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	388	typename EigenTypes<kRowBlockSize>::ConstVectorRef
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	389	(b + b_pos, row.block.size);
				390
				391	MatrixVectorMultiply<kRowBlockSize, kEBlockSize, -1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	392	values + e_cell.position, row.block.size, e_block_size,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	393	inverse_ete_g, sj.data());
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	394
				395	for (int c = 1; c < row.cells.size(); ++c) {
				396	const int block_id = row.cells[c].block_id;
				397	const int block_size = bs->cols[block_id].size;
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	398	const int block = block_id - num_eliminate_blocks_;
Keir Mierle	ff71d74	2012-08-10 17:05:15 -0700	[diff] [blame]	399	CeresMutexLock l(rhs_locks_[block]);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	400	MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	401	values + row.cells[c].position,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	402	row.block.size, block_size,
				403	sj.data(), rhs + lhs_row_layout_[block]);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	404	}
				405	b_pos += row.block.size;
				406	}
				407	}
				408
				409	// Given a Chunk - set of rows with the same e_block, e.g. in the
				410	// following Chunk with two rows.
				411	//
				412	// E F
				413	// [ y11 0 0 0 \| z11 0 0 0 z51]
				414	// [ y12 0 0 0 \| z12 z22 0 0 0]
				415	//
				416	// this function computes twp matrices. The diagonal block matrix
				417	//
				418	// ete = y11 * y11' + y12 * y12'
				419	//
				420	// and the off diagonal blocks in the Guass Newton Hessian.
				421	//
				422	// buffer = [y11'(z11 + z12), y12' * z22, y11' * z51]
				423	//
				424	// which are zero compressed versions of the block sparse matrices E'E
				425	// and E'F.
				426	//
				427	// and the gradient of the e_block, E'b.
				428	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				429	void
				430	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
				431	ChunkDiagonalBlockAndGradient(
				432	const Chunk& chunk,
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	433	const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	434	const double* b,
				435	int row_block_counter,
				436	typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* ete,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	437	double* g,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	438	double* buffer,
				439	BlockRandomAccessMatrix* lhs) {
				440	const CompressedRowBlockStructure* bs = A->block_structure();
				441
				442	int b_pos = bs->rows[row_block_counter].block.position;
				443	const int e_block_size = ete->rows();
				444
				445	// Iterate over the rows in this chunk, for each row, compute the
				446	// contribution of its F blocks to the Schur complement, the
				447	// contribution of its E block to the matrix EE' (ete), and the
				448	// corresponding block in the gradient vector.
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	449	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	450	for (int j = 0; j < chunk.size; ++j) {
				451	const CompressedRow& row = bs->rows[row_block_counter + j];
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	452
				453	if (row.cells.size() > 1) {
				454	EBlockRowOuterProduct(A, row_block_counter + j, lhs);
				455	}
				456
				457	// Extract the e_block, ETE += E_i' E_i
				458	const Cell& e_cell = row.cells.front();
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	459	MatrixTransposeMatrixMultiply
				460	<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	461	values + e_cell.position, row.block.size, e_block_size,
				462	values + e_cell.position, row.block.size, e_block_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	463	ete->data(), 0, 0, e_block_size, e_block_size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	464
				465	// g += E_i' b_i
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	466	MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	467	values + e_cell.position, row.block.size, e_block_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	468	b + b_pos,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	469	g);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	470
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	471
				472	// buffer = E'F. This computation is done by iterating over the
				473	// f_blocks for each row in the chunk.
				474	for (int c = 1; c < row.cells.size(); ++c) {
				475	const int f_block_id = row.cells[c].block_id;
				476	const int f_block_size = bs->cols[f_block_id].size;
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	477	double* buffer_ptr =
				478	buffer + FindOrDie(chunk.buffer_layout, f_block_id);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	479	MatrixTransposeMatrixMultiply
				480	<kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	481	values + e_cell.position, row.block.size, e_block_size,
				482	values + row.cells[c].position, row.block.size, f_block_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	483	buffer_ptr, 0, 0, e_block_size, f_block_size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	484	}
				485	b_pos += row.block.size;
				486	}
				487	}
				488
				489	// Compute the outer product F'E(E'E)^{-1}E'F and subtract it from the
				490	// Schur complement matrix, i.e
				491	//
				492	// S -= F'E(E'E)^{-1}E'F.
				493	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				494	void
				495	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
				496	ChunkOuterProduct(const CompressedRowBlockStructure* bs,
				497	const Matrix& inverse_ete,
				498	const double* buffer,
				499	const BufferLayoutType& buffer_layout,
				500	BlockRandomAccessMatrix* lhs) {
				501	// This is the most computationally expensive part of this
				502	// code. Profiling experiments reveal that the bottleneck is not the
				503	// computation of the right-hand matrix product, but memory
				504	// references to the left hand side.
				505	const int e_block_size = inverse_ete.rows();
				506	BufferLayoutType::const_iterator it1 = buffer_layout.begin();
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	507
				508	#ifdef CERES_USE_OPENMP
				509	int thread_id = omp_get_thread_num();
				510	#else
				511	int thread_id = 0;
				512	#endif
				513	double* b1_transpose_inverse_ete =
				514	chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
				515
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	516	// S(i,j) -= bi' * ete^{-1} b_j
				517	for (; it1 != buffer_layout.end(); ++it1) {
				518	const int block1 = it1->first - num_eliminate_blocks_;
				519	const int block1_size = bs->cols[it1->first].size;
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	520	MatrixTransposeMatrixMultiply
				521	<kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
				522	buffer + it1->second, e_block_size, block1_size,
				523	inverse_ete.data(), e_block_size, e_block_size,
				524	b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	525
				526	BufferLayoutType::const_iterator it2 = it1;
				527	for (; it2 != buffer_layout.end(); ++it2) {
				528	const int block2 = it2->first - num_eliminate_blocks_;
				529
				530	int r, c, row_stride, col_stride;
				531	CellInfo* cell_info = lhs->GetCell(block1, block2,
				532	&r, &c,
				533	&row_stride, &col_stride);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	534	if (cell_info != NULL) {
				535	const int block2_size = bs->cols[it2->first].size;
				536	CeresMutexLock l(&cell_info->m);
				537	MatrixMatrixMultiply
				538	<kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
				539	b1_transpose_inverse_ete, block1_size, e_block_size,
				540	buffer + it2->second, e_block_size, block2_size,
				541	cell_info->values, r, c, row_stride, col_stride);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	542	}
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	543	}
				544	}
				545	}
				546
				547	// For rows with no e_blocks, the schur complement update reduces to S
				548	// += F'F. This function iterates over the rows of A with no e_block,
				549	// and calls NoEBlockRowOuterProduct on each row.
				550	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				551	void
				552	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	553	NoEBlockRowsUpdate(const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	554	const double* b,
				555	int row_block_counter,
				556	BlockRandomAccessMatrix* lhs,
				557	double* rhs) {
				558	const CompressedRowBlockStructure* bs = A->block_structure();
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	559	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	560	for (; row_block_counter < bs->rows.size(); ++row_block_counter) {
				561	const CompressedRow& row = bs->rows[row_block_counter];
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	562	for (int c = 0; c < row.cells.size(); ++c) {
				563	const int block_id = row.cells[c].block_id;
				564	const int block_size = bs->cols[block_id].size;
				565	const int block = block_id - num_eliminate_blocks_;
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	566	MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	567	values + row.cells[c].position, row.block.size, block_size,
Sameer Agarwal	487250e	2013-04-05 14:20:37 -0700	[diff] [blame]	568	b + row.block.position,
				569	rhs + lhs_row_layout_[block]);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	570	}
				571	NoEBlockRowOuterProduct(A, row_block_counter, lhs);
				572	}
				573	}
				574
				575
				576	// A row r of A, which has no e_blocks gets added to the Schur
				577	// Complement as S += r r'. This function is responsible for computing
				578	// the contribution of a single row r to the Schur complement. It is
				579	// very similar in structure to EBlockRowOuterProduct except for
				580	// one difference. It does not use any of the template
				581	// parameters. This is because the algorithm used for detecting the
				582	// static structure of the matrix A only pays attention to rows with
				583	// e_blocks. This is becase rows without e_blocks are rare and
				584	// typically arise from regularization terms in the original
				585	// optimization problem, and have a very different structure than the
				586	// rows with e_blocks. Including them in the static structure
				587	// detection will lead to most template parameters being set to
				588	// dynamic. Since the number of rows without e_blocks is small, the
				589	// lack of templating is not an issue.
				590	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				591	void
				592	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	593	NoEBlockRowOuterProduct(const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	594	int row_block_index,
				595	BlockRandomAccessMatrix* lhs) {
				596	const CompressedRowBlockStructure* bs = A->block_structure();
				597	const CompressedRow& row = bs->rows[row_block_index];
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	598	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	599	for (int i = 0; i < row.cells.size(); ++i) {
				600	const int block1 = row.cells[i].block_id - num_eliminate_blocks_;
				601	DCHECK_GE(block1, 0);
				602
				603	const int block1_size = bs->cols[row.cells[i].block_id].size;
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	604	int r, c, row_stride, col_stride;
				605	CellInfo* cell_info = lhs->GetCell(block1, block1,
				606	&r, &c,
				607	&row_stride, &col_stride);
				608	if (cell_info != NULL) {
Keir Mierle	ff71d74	2012-08-10 17:05:15 -0700	[diff] [blame]	609	CeresMutexLock l(&cell_info->m);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	610	// This multiply currently ignores the fact that this is a
				611	// symmetric outer product.
				612	MatrixTransposeMatrixMultiply
				613	<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	614	values + row.cells[i].position, row.block.size, block1_size,
				615	values + row.cells[i].position, row.block.size, block1_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	616	cell_info->values, r, c, row_stride, col_stride);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	617	}
				618
				619	for (int j = i + 1; j < row.cells.size(); ++j) {
				620	const int block2 = row.cells[j].block_id - num_eliminate_blocks_;
				621	DCHECK_GE(block2, 0);
				622	DCHECK_LT(block1, block2);
				623	int r, c, row_stride, col_stride;
				624	CellInfo* cell_info = lhs->GetCell(block1, block2,
				625	&r, &c,
				626	&row_stride, &col_stride);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	627	if (cell_info != NULL) {
				628	const int block2_size = bs->cols[row.cells[j].block_id].size;
				629	CeresMutexLock l(&cell_info->m);
				630	MatrixTransposeMatrixMultiply
				631	<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	632	values + row.cells[i].position, row.block.size, block1_size,
				633	values + row.cells[j].position, row.block.size, block2_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	634	cell_info->values, r, c, row_stride, col_stride);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	635	}
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	636	}
				637	}
				638	}
				639
				640	// For a row with an e_block, compute the contribition S += F'F. This
				641	// function has the same structure as NoEBlockRowOuterProduct, except
				642	// that this function uses the template parameters.
				643	template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
				644	void
				645	SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	646	EBlockRowOuterProduct(const BlockSparseMatrix* A,
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	647	int row_block_index,
				648	BlockRandomAccessMatrix* lhs) {
				649	const CompressedRowBlockStructure* bs = A->block_structure();
				650	const CompressedRow& row = bs->rows[row_block_index];
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	651	const double* values = A->values();
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	652	for (int i = 1; i < row.cells.size(); ++i) {
				653	const int block1 = row.cells[i].block_id - num_eliminate_blocks_;
				654	DCHECK_GE(block1, 0);
				655
				656	const int block1_size = bs->cols[row.cells[i].block_id].size;
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	657	int r, c, row_stride, col_stride;
				658	CellInfo* cell_info = lhs->GetCell(block1, block1,
				659	&r, &c,
				660	&row_stride, &col_stride);
				661	if (cell_info != NULL) {
Keir Mierle	ff71d74	2012-08-10 17:05:15 -0700	[diff] [blame]	662	CeresMutexLock l(&cell_info->m);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	663	// block += b1.transpose() * b1;
				664	MatrixTransposeMatrixMultiply
				665	<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	666	values + row.cells[i].position, row.block.size, block1_size,
				667	values + row.cells[i].position, row.block.size, block1_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	668	cell_info->values, r, c, row_stride, col_stride);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	669	}
				670
				671	for (int j = i + 1; j < row.cells.size(); ++j) {
				672	const int block2 = row.cells[j].block_id - num_eliminate_blocks_;
				673	DCHECK_GE(block2, 0);
				674	DCHECK_LT(block1, block2);
				675	const int block2_size = bs->cols[row.cells[j].block_id].size;
				676	int r, c, row_stride, col_stride;
				677	CellInfo* cell_info = lhs->GetCell(block1, block2,
				678	&r, &c,
				679	&row_stride, &col_stride);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	680	if (cell_info != NULL) {
				681	// block += b1.transpose() * b2;
Sameer Agarwal	c7e69be	2013-04-16 09:39:16 -0700	[diff] [blame]	682	CeresMutexLock l(&cell_info->m);
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	683	MatrixTransposeMatrixMultiply
				684	<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
Sameer Agarwal	c1e10d9	2013-04-24 11:58:24 -0700	[diff] [blame]	685	values + row.cells[i].position, row.block.size, block1_size,
				686	values + row.cells[j].position, row.block.size, block2_size,
Sameer Agarwal	296fa9b	2013-04-02 09:44:15 -0700	[diff] [blame]	687	cell_info->values, r, c, row_stride, col_stride);
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	688	}
Keir Mierle	8ebb073	2012-04-30 23:09:08 -0700	[diff] [blame]	689	}
				690	}
				691	}
				692
				693	} // namespace internal
				694	} // namespace ceres
				695
				696	#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_