Keir Mierle | 8ebb073 | 2012-04-30 23:09:08 -0700 | [diff] [blame] | 1 | // Ceres Solver - A fast non-linear least squares minimizer |
| 2 | // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. |
| 3 | // http://code.google.com/p/ceres-solver/ |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright notice, |
| 9 | // this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 11 | // this list of conditions and the following disclaimer in the documentation |
| 12 | // and/or other materials provided with the distribution. |
| 13 | // * Neither the name of Google Inc. nor the names of its contributors may be |
| 14 | // used to endorse or promote products derived from this software without |
| 15 | // specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | // POSSIBILITY OF SUCH DAMAGE. |
| 28 | // |
| 29 | // Author: sameeragarwal@google.com (Sameer Agarwal) |
| 30 | |
| 31 | #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_ |
| 32 | #define CERES_INTERNAL_SCHUR_ELIMINATOR_H_ |
| 33 | |
| 34 | #include <map> |
| 35 | #include <vector> |
| 36 | #include "ceres/mutex.h" |
| 37 | #include "ceres/block_random_access_matrix.h" |
| 38 | #include "ceres/block_sparse_matrix.h" |
| 39 | #include "ceres/block_structure.h" |
| 40 | #include "ceres/linear_solver.h" |
| 41 | #include "ceres/internal/eigen.h" |
| 42 | #include "ceres/internal/scoped_ptr.h" |
| 43 | |
| 44 | namespace ceres { |
| 45 | namespace internal { |
| 46 | |
| 47 | // Classes implementing the SchurEliminatorBase interface implement |
| 48 | // variable elimination for linear least squares problems. Assuming |
| 49 | // that the input linear system Ax = b can be partitioned into |
| 50 | // |
| 51 | // E y + F z = b |
| 52 | // |
| 53 | // Where x = [y;z] is a partition of the variables. The paritioning |
| 54 | // of the variables is such that, E'E is a block diagonal matrix. Or |
| 55 | // in other words, the parameter blocks in E form an independent set |
| 56 | // of the of the graph implied by the block matrix A'A. Then, this |
| 57 | // class provides the functionality to compute the Schur complement |
| 58 | // system |
| 59 | // |
| 60 | // S z = r |
| 61 | // |
| 62 | // where |
| 63 | // |
| 64 | // S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b |
| 65 | // |
| 66 | // This is the Eliminate operation, i.e., construct the linear system |
| 67 | // obtained by eliminating the variables in E. |
| 68 | // |
| 69 | // The eliminator also provides the reverse functionality, i.e. given |
| 70 | // values for z it can back substitute for the values of y, by solving the |
| 71 | // linear system |
| 72 | // |
| 73 | // Ey = b - F z |
| 74 | // |
| 75 | // which is done by observing that |
| 76 | // |
| 77 | // y = (E'E)^(-1) [E'b - E'F z] |
| 78 | // |
| 79 | // The eliminator has a number of requirements. |
| 80 | // |
| 81 | // The rows of A are ordered so that for every variable block in y, |
| 82 | // all the rows containing that variable block occur as a vertically |
| 83 | // contiguous block. i.e the matrix A looks like |
| 84 | // |
| 85 | // E F chunk |
| 86 | // A = [ y1 0 0 0 | z1 0 0 0 z5] 1 |
| 87 | // [ y1 0 0 0 | z1 z2 0 0 0] 1 |
| 88 | // [ 0 y2 0 0 | 0 0 z3 0 0] 2 |
| 89 | // [ 0 0 y3 0 | z1 z2 z3 z4 z5] 3 |
| 90 | // [ 0 0 y3 0 | z1 0 0 0 z5] 3 |
| 91 | // [ 0 0 0 y4 | 0 0 0 0 z5] 4 |
| 92 | // [ 0 0 0 y4 | 0 z2 0 0 0] 4 |
| 93 | // [ 0 0 0 y4 | 0 0 0 0 0] 4 |
| 94 | // [ 0 0 0 0 | z1 0 0 0 0] non chunk blocks |
| 95 | // [ 0 0 0 0 | 0 0 z3 z4 z5] non chunk blocks |
| 96 | // |
| 97 | // This structure should be reflected in the corresponding |
| 98 | // CompressedRowBlockStructure object associated with A. The linear |
| 99 | // system Ax = b should either be well posed or the array D below |
| 100 | // should be non-null and the diagonal matrix corresponding to it |
| 101 | // should be non-singular. For simplicity of exposition only the case |
| 102 | // with a null D is described. |
| 103 | // |
| 104 | // The usual way to do the elimination is as follows. Starting with |
| 105 | // |
| 106 | // E y + F z = b |
| 107 | // |
| 108 | // we can form the normal equations, |
| 109 | // |
| 110 | // E'E y + E'F z = E'b |
| 111 | // F'E y + F'F z = F'b |
| 112 | // |
| 113 | // multiplying both sides of the first equation by (E'E)^(-1) and then |
| 114 | // by F'E we get |
| 115 | // |
| 116 | // F'E y + F'E (E'E)^(-1) E'F z = F'E (E'E)^(-1) E'b |
| 117 | // F'E y + F'F z = F'b |
| 118 | // |
| 119 | // now subtracting the two equations we get |
| 120 | // |
| 121 | // [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b |
| 122 | // |
| 123 | // Instead of forming the normal equations and operating on them as |
| 124 | // general sparse matrices, the algorithm here deals with one |
| 125 | // parameter block in y at a time. The rows corresponding to a single |
| 126 | // parameter block yi are known as a chunk, and the algorithm operates |
| 127 | // on one chunk at a time. The mathematics remains the same since the |
| 128 | // reduced linear system can be shown to be the sum of the reduced |
| 129 | // linear systems for each chunk. This can be seen by observing two |
| 130 | // things. |
| 131 | // |
| 132 | // 1. E'E is a block diagonal matrix. |
| 133 | // |
| 134 | // 2. When E'F is computed, only the terms within a single chunk |
| 135 | // interact, i.e for y1 column blocks when transposed and multiplied |
| 136 | // with F, the only non-zero contribution comes from the blocks in |
| 137 | // chunk1. |
| 138 | // |
| 139 | // Thus, the reduced linear system |
| 140 | // |
| 141 | // FF' - F'E (E'E)^(-1) E'F |
| 142 | // |
| 143 | // can be re-written as |
| 144 | // |
| 145 | // sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k |
| 146 | // |
| 147 | // Where the sum is over chunks and E_k'E_k is dense matrix of size y1 |
| 148 | // x y1. |
| 149 | // |
| 150 | // Advanced usage. Uptil now it has been assumed that the user would |
| 151 | // be interested in all of the Schur Complement S. However, it is also |
| 152 | // possible to use this eliminator to obtain an arbitrary submatrix of |
| 153 | // the full Schur complement. When the eliminator is generating the |
| 154 | // blocks of S, it asks the RandomAccessBlockMatrix instance passed to |
| 155 | // it if it has storage for that block. If it does, the eliminator |
| 156 | // computes/updates it, if not it is skipped. This is useful when one |
| 157 | // is interested in constructing a preconditioner based on the Schur |
| 158 | // Complement, e.g., computing the block diagonal of S so that it can |
| 159 | // be used as a preconditioner for an Iterative Substructuring based |
| 160 | // solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV |
| 161 | // 2008 for an example of such use]. |
| 162 | // |
| 163 | // Example usage: Please see schur_complement_solver.cc |
| 164 | class SchurEliminatorBase { |
| 165 | public: |
| 166 | virtual ~SchurEliminatorBase() {} |
| 167 | |
| 168 | // Initialize the eliminator. It is the user's responsibilty to call |
| 169 | // this function before calling Eliminate or BackSubstitute. It is |
| 170 | // also the caller's responsibilty to ensure that the |
| 171 | // CompressedRowBlockStructure object passed to this method is the |
| 172 | // same one (or is equivalent to) the one associated with the |
| 173 | // BlockSparseMatrixBase objects below. |
| 174 | virtual void Init(int num_eliminate_blocks, |
| 175 | const CompressedRowBlockStructure* bs) = 0; |
| 176 | |
| 177 | // Compute the Schur complement system from the augmented linear |
| 178 | // least squares problem [A;D] x = [b;0]. The left hand side and the |
| 179 | // right hand side of the reduced linear system are returned in lhs |
| 180 | // and rhs respectively. |
| 181 | // |
| 182 | // It is the caller's responsibility to construct and initialize |
| 183 | // lhs. Depending upon the structure of the lhs object passed here, |
| 184 | // the full or a submatrix of the Schur complement will be computed. |
| 185 | // |
| 186 | // Since the Schur complement is a symmetric matrix, only the upper |
| 187 | // triangular part of the Schur complement is computed. |
| 188 | virtual void Eliminate(const BlockSparseMatrixBase* A, |
| 189 | const double* b, |
| 190 | const double* D, |
| 191 | BlockRandomAccessMatrix* lhs, |
| 192 | double* rhs) = 0; |
| 193 | |
| 194 | // Given values for the variables z in the F block of A, solve for |
| 195 | // the optimal values of the variables y corresponding to the E |
| 196 | // block in A. |
| 197 | virtual void BackSubstitute(const BlockSparseMatrixBase* A, |
| 198 | const double* b, |
| 199 | const double* D, |
| 200 | const double* z, |
| 201 | double* y) = 0; |
| 202 | // Factory |
| 203 | static SchurEliminatorBase* Create(const LinearSolver::Options& options); |
| 204 | }; |
| 205 | |
| 206 | // Templated implementation of the SchurEliminatorBase interface. The |
| 207 | // templating is on the sizes of the row, e and f blocks sizes in the |
| 208 | // input matrix. In many problems, the sizes of one or more of these |
| 209 | // blocks are constant, in that case, its worth passing these |
| 210 | // parameters as template arguments so that they are visible to the |
| 211 | // compiler and can be used for compile time optimization of the low |
| 212 | // level linear algebra routines. |
| 213 | // |
| 214 | // This implementation is mulithreaded using OpenMP. The level of |
| 215 | // parallelism is controlled by LinearSolver::Options::num_threads. |
| 216 | template <int kRowBlockSize = Dynamic, |
| 217 | int kEBlockSize = Dynamic, |
| 218 | int kFBlockSize = Dynamic > |
| 219 | class SchurEliminator : public SchurEliminatorBase { |
| 220 | public: |
| 221 | explicit SchurEliminator(const LinearSolver::Options& options) |
| 222 | : num_threads_(options.num_threads) { |
| 223 | } |
| 224 | |
| 225 | // SchurEliminatorBase Interface |
| 226 | virtual ~SchurEliminator(); |
| 227 | virtual void Init(int num_eliminate_blocks, |
| 228 | const CompressedRowBlockStructure* bs); |
| 229 | virtual void Eliminate(const BlockSparseMatrixBase* A, |
| 230 | const double* b, |
| 231 | const double* D, |
| 232 | BlockRandomAccessMatrix* lhs, |
| 233 | double* rhs); |
| 234 | virtual void BackSubstitute(const BlockSparseMatrixBase* A, |
| 235 | const double* b, |
| 236 | const double* D, |
| 237 | const double* z, |
| 238 | double* y); |
| 239 | |
| 240 | private: |
| 241 | // Chunk objects store combinatorial information needed to |
| 242 | // efficiently eliminate a whole chunk out of the least squares |
| 243 | // problem. Consider the first chunk in the example matrix above. |
| 244 | // |
| 245 | // [ y1 0 0 0 | z1 0 0 0 z5] |
| 246 | // [ y1 0 0 0 | z1 z2 0 0 0] |
| 247 | // |
| 248 | // One of the intermediate quantities that needs to be calculated is |
| 249 | // for each row the product of the y block transposed with the |
| 250 | // non-zero z block, and the sum of these blocks across rows. A |
| 251 | // temporary array "buffer_" is used for computing and storing them |
| 252 | // and the buffer_layout maps the indices of the z-blocks to |
| 253 | // position in the buffer_ array. The size of the chunk is the |
| 254 | // number of row blocks/residual blocks for the particular y block |
| 255 | // being considered. |
| 256 | // |
| 257 | // For the example chunk shown above, |
| 258 | // |
| 259 | // size = 2 |
| 260 | // |
| 261 | // The entries of buffer_layout will be filled in the following order. |
| 262 | // |
| 263 | // buffer_layout[z1] = 0 |
| 264 | // buffer_layout[z5] = y1 * z1 |
| 265 | // buffer_layout[z2] = y1 * z1 + y1 * z5 |
| 266 | typedef map<int, int> BufferLayoutType; |
| 267 | struct Chunk { |
| 268 | Chunk() : size(0) {} |
| 269 | int size; |
| 270 | int start; |
| 271 | BufferLayoutType buffer_layout; |
| 272 | }; |
| 273 | |
| 274 | void ChunkDiagonalBlockAndGradient( |
| 275 | const Chunk& chunk, |
| 276 | const BlockSparseMatrixBase* A, |
| 277 | const double* b, |
| 278 | int row_block_counter, |
| 279 | typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet, |
| 280 | typename EigenTypes<kEBlockSize>::Vector* g, |
| 281 | double* buffer, |
| 282 | BlockRandomAccessMatrix* lhs); |
| 283 | |
| 284 | void UpdateRhs(const Chunk& chunk, |
| 285 | const BlockSparseMatrixBase* A, |
| 286 | const double* b, |
| 287 | int row_block_counter, |
| 288 | const Vector& inverse_ete_g, |
| 289 | double* rhs); |
| 290 | |
| 291 | void ChunkOuterProduct(const CompressedRowBlockStructure* bs, |
| 292 | const Matrix& inverse_eet, |
| 293 | const double* buffer, |
| 294 | const BufferLayoutType& buffer_layout, |
| 295 | BlockRandomAccessMatrix* lhs); |
| 296 | void EBlockRowOuterProduct(const BlockSparseMatrixBase* A, |
| 297 | int row_block_index, |
| 298 | BlockRandomAccessMatrix* lhs); |
| 299 | |
| 300 | |
| 301 | void NoEBlockRowsUpdate(const BlockSparseMatrixBase* A, |
| 302 | const double* b, |
| 303 | int row_block_counter, |
| 304 | BlockRandomAccessMatrix* lhs, |
| 305 | double* rhs); |
| 306 | |
| 307 | void NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A, |
| 308 | int row_block_index, |
| 309 | BlockRandomAccessMatrix* lhs); |
| 310 | |
| 311 | int num_eliminate_blocks_; |
| 312 | |
| 313 | // Block layout of the columns of the reduced linear system. Since |
| 314 | // the f blocks can be of varying size, this vector stores the |
| 315 | // position of each f block in the row/col of the reduced linear |
| 316 | // system. Thus lhs_row_layout_[i] is the row/col position of the |
| 317 | // i^th f block. |
| 318 | vector<int> lhs_row_layout_; |
| 319 | |
| 320 | // Combinatorial structure of the chunks in A. For more information |
| 321 | // see the documentation of the Chunk object above. |
| 322 | vector<Chunk> chunks_; |
| 323 | |
| 324 | // Buffer to store the products of the y and z blocks generated |
| 325 | // during the elimination phase. |
| 326 | scoped_array<double> buffer_; |
| 327 | int buffer_size_; |
| 328 | int num_threads_; |
| 329 | int uneliminated_row_begins_; |
| 330 | |
| 331 | // Locks for the blocks in the right hand side of the reduced linear |
| 332 | // system. |
| 333 | vector<Mutex*> rhs_locks_; |
| 334 | }; |
| 335 | |
| 336 | } // namespace internal |
| 337 | } // namespace ceres |
| 338 | |
| 339 | #endif // CERES_INTERNAL_SCHUR_ELIMINATOR_H_ |