10ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Ceres Solver - A fast non-linear least squares minimizer 20ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Copyright 2010, 2011, 2012 Google Inc. All rights reserved. 30ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// http://code.google.com/p/ceres-solver/ 40ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 50ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Redistribution and use in source and binary forms, with or without 60ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// modification, are permitted provided that the following conditions are met: 70ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 80ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions of source code must retain the above copyright notice, 90ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// this list of conditions and the following disclaimer. 100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions in binary form must reproduce the above copyright notice, 110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// this list of conditions and the following disclaimer in the documentation 120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and/or other materials provided with the distribution. 130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Neither the name of Google Inc. nor the names of its contributors may be 140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// used to endorse or promote products derived from this software without 150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// specific prior written permission. 160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// POSSIBILITY OF SUCH DAMAGE. 280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Author: sameeragarwal@google.com (Sameer Agarwal) 300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// TODO(sameeragarwal): row_block_counter can perhaps be replaced by 320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Chunk::start ? 330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Eigen has an internal threshold switching between different matrix 380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// multiplication algorithms. In particular for matrices larger than 390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly 400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// matrix matrix product algorithm that has a higher setup cost. For 410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// matrix sizes close to this threshold, especially when the matrices 420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// are thin and long, the default choice may not be optimal. This is 430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// the case for us, as the default choice causes a 30% performance 440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// regression when we moved from Eigen2 to Eigen3. 451d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10 470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4879397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez// This include must come before any #ifndef check on Ceres compile options. 4979397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez#include "ceres/internal/port.h" 5079397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez 511d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#ifdef CERES_USE_OPENMP 521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include <omp.h> 531d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#endif 541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include <algorithm> 560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include <map> 570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_random_access_matrix.h" 580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_sparse_matrix.h" 590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_structure.h" 601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/eigen.h" 611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/fixed_array.h" 621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/scoped_ptr.h" 630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/map_util.h" 640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/schur_eliminator.h" 65399f7d09e0c45af54b77b4ab9508d6f23759b927Scott Ettinger#include "ceres/small_blas.h" 660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/stl_util.h" 671d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "Eigen/Dense" 681d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "glog/logging.h" 690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace ceres { 710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace internal { 720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() { 750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong STLDeleteElements(&rhs_locks_); 760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongInit(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) { 820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CHECK_GT(num_eliminate_blocks, 0) 830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong << "SchurComplementSolver cannot be initialized with " 840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong << "num_eliminate_blocks = 0."; 850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong num_eliminate_blocks_ = num_eliminate_blocks; 870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int num_col_blocks = bs->cols.size(); 890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int num_row_blocks = bs->rows.size(); 900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong buffer_size_ = 1; 920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong chunks_.clear(); 930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong lhs_row_layout_.clear(); 940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int lhs_num_rows = 0; 960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Add a map object for each block in the reduced linear system 970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // and build the row/column block structure of the reduced linear 980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // system. 990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong lhs_row_layout_.resize(num_col_blocks - num_eliminate_blocks_); 1000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) { 1010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong lhs_row_layout_[i - num_eliminate_blocks_] = lhs_num_rows; 1020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong lhs_num_rows += bs->cols[i].size; 1030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r = 0; 1060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Iterate over the row blocks of A, and detect the chunks. The 1070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // matrix should already have been ordered so that all rows 1080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // containing the same y block are vertically contiguous. Along 1090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // the way also compute the amount of space each chunk will need 1100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // to perform the elimination. 1110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong while (r < num_row_blocks) { 1120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int chunk_block_id = bs->rows[r].cells.front().block_id; 1130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (chunk_block_id >= num_eliminate_blocks_) { 1140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong break; 1150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong chunks_.push_back(Chunk()); 1180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong Chunk& chunk = chunks_.back(); 1190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong chunk.size = 0; 1200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong chunk.start = r; 1210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int buffer_size = 0; 1220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_size = bs->cols[chunk_block_id].size; 1230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Add to the chunk until the first block in the row is 1250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // different than the one in the first row for the chunk. 1260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong while (r + chunk.size < num_row_blocks) { 1270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[r + chunk.size]; 1280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (row.cells.front().block_id != chunk_block_id) { 1290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong break; 1300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Iterate over the blocks in the row, ignoring the first 1330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // block since it is the one to be eliminated. 1340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int c = 1; c < row.cells.size(); ++c) { 1350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Cell& cell = row.cells[c]; 1360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (InsertIfNotPresent( 1370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &(chunk.buffer_layout), cell.block_id, buffer_size)) { 1380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong buffer_size += e_block_size * bs->cols[cell.block_id].size; 1390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong buffer_size_ = max(buffer_size, buffer_size_); 1430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ++chunk.size; 1440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CHECK_GT(chunk.size, 0); 1470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong r += chunk.size; 1480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Chunk& chunk = chunks_.back(); 1500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong uneliminated_row_begins_ = chunk.start + chunk.size; 1520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (num_threads_ > 1) { 1530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong random_shuffle(chunks_.begin(), chunks_.end()); 1540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong buffer_.reset(new double[buffer_size_ * num_threads_]); 1570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1581d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // chunk_outer_product_buffer_ only needs to store e_block_size * 1591d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // f_block_size, which is always less than buffer_size_, so we just 1601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // allocate buffer_size_ per thread. 1611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]); 1621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 1630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong STLDeleteElements(&rhs_locks_); 1640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_); 1650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) { 1660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong rhs_locks_[i] = new Mutex; 1670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 1690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 1710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 1720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 1731d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingEliminate(const BlockSparseMatrix* A, 1740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* b, 1750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* D, 1760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs, 1770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* rhs) { 1780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (lhs->num_rows() > 0) { 1790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong lhs->SetZero(); 1800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong VectorRef(rhs, lhs->num_rows()).setZero(); 1810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 1820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 1840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int num_col_blocks = bs->cols.size(); 1850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 1860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Add the diagonal to the schur complement. 1870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (D != NULL) { 1880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 1890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) { 1900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_id = i - num_eliminate_blocks_; 1910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r, c, row_stride, col_stride; 1920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CellInfo* cell_info = lhs->GetCell(block_id, block_id, 1930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &r, &c, 1940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &row_stride, &col_stride); 1950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (cell_info != NULL) { 1960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_size = bs->cols[i].size; 1970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kFBlockSize>::ConstVectorRef 1980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong diag(D + bs->cols[i].position, block_size); 1990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CeresMutexLock l(&cell_info->m); 2010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong MatrixRef m(cell_info->values, row_stride, col_stride); 2020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong m.block(r, c, block_size, block_size).diagonal() 2030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong += diag.array().square().matrix(); 2040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 2050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 2060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 2070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Eliminate y blocks one chunk at a time. For each chunk,x3 2090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // compute the entries of the normal equations and the gradient 2100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // vector block corresponding to the y block and then apply 2110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Gaussian elimination to them. The matrix ete stores the normal 2120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // matrix corresponding to the block being eliminated and array 2130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // buffer_ contains the non-zero blocks in the row corresponding 2140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // to this y block in the normal equations. This computation is 2150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // done in ChunkDiagonalBlockAndGradient. UpdateRhs then applies 2160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // gaussian elimination to the rhs of the normal equations, 2170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // updating the rhs of the reduced linear system by modifying rhs 2180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // blocks for all the z blocks that share a row block/residual 2190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // term with the y block. EliminateRowOuterProduct does the 2200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // corresponding operation for the lhs of the reduced linear 2210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // system. 2220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 2230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = 0; i < chunks_.size(); ++i) { 2240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifdef CERES_USE_OPENMP 2250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int thread_id = omp_get_thread_num(); 2260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#else 2270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int thread_id = 0; 2280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif 2290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* buffer = buffer_.get() + thread_id * buffer_size_; 2300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Chunk& chunk = chunks_[i]; 2310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_id = bs->rows[chunk.start].cells.front().block_id; 2320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_size = bs->cols[e_block_id].size; 2330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong VectorRef(buffer, buffer_size_).setZero(); 2350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix 2370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete(e_block_size, e_block_size); 2380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (D != NULL) { 2400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const typename EigenTypes<kEBlockSize>::ConstVectorRef 2410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong diag(D + bs->cols[e_block_id].position, e_block_size); 2420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete = diag.array().square().matrix().asDiagonal(); 2430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } else { 2440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete.setZero(); 2450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 2460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2471d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling FixedArray<double, 8> g(e_block_size); 2481d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling typename EigenTypes<kEBlockSize>::VectorRef gref(g.get(), e_block_size); 2491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling gref.setZero(); 2500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // We are going to be computing 2520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // 2530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // S += F'F - F'E(E'E)^{-1}E'F 2540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // 2550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // for each Chunk. The computation is broken down into a number of 2560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // function calls as below. 2570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Compute the outer product of the e_blocks with themselves (ete 2590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // = E'E). Compute the product of the e_blocks with the 2600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // corresonding f_blocks (buffer = E'F), the gradient of the terms 2610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // in this chunk (g) and add the outer product of the f_blocks to 2620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Schur complement (S += F'F). 2630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ChunkDiagonalBlockAndGradient( 2641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling chunk, A, b, chunk.start, &ete, g.get(), buffer, lhs); 2650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Normally one wouldn't compute the inverse explicitly, but 2670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // e_block_size will typically be a small number like 3, in 2680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // which case its much faster to compute the inverse once and 2690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // use it to multiply other matrices/vectors instead of doing a 2700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Solve call over and over again. 2710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete = 2720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete 2730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong .template selfadjointView<Eigen::Upper>() 2741d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling .llt() 2750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong .solve(Matrix::Identity(e_block_size, e_block_size)); 2760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // For the current chunk compute and update the rhs of the reduced 2780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // linear system. 2790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // 2800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // rhs = F'b - F'E(E'E)^(-1) E'b 2811d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 2821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling FixedArray<double, 8> inverse_ete_g(e_block_size); 2831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixVectorMultiply<kEBlockSize, kEBlockSize, 0>( 2841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling inverse_ete.data(), 2851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling e_block_size, 2861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling e_block_size, 2871d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling g.get(), 2881d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling inverse_ete_g.get()); 2891d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 2901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling UpdateRhs(chunk, A, b, chunk.start, inverse_ete_g.get(), rhs); 2910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // S -= F'E(E'E)^{-1}E'F 2930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ChunkOuterProduct(bs, inverse_ete, buffer, chunk.buffer_layout, lhs); 2940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 2950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 2960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // For rows with no e_blocks, the schur complement update reduces to 2970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // S += F'F. 2980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs); 2990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 3000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 3020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 3030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 3041d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingBackSubstitute(const BlockSparseMatrix* A, 3050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* b, 3060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* D, 3070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* z, 3080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* y) { 3090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 3100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic) 3110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = 0; i < chunks_.size(); ++i) { 3120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Chunk& chunk = chunks_[i]; 3130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_id = bs->rows[chunk.start].cells.front().block_id; 3140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_size = bs->cols[e_block_id].size; 3150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling double* y_ptr = y + bs->cols[e_block_id].position; 3171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size); 3180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix 3200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete(e_block_size, e_block_size); 3210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (D != NULL) { 3220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const typename EigenTypes<kEBlockSize>::ConstVectorRef 3230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong diag(D + bs->cols[e_block_id].position, e_block_size); 3240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete = diag.array().square().matrix().asDiagonal(); 3250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } else { 3260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong ete.setZero(); 3270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 3280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3291d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 3300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int j = 0; j < chunk.size; ++j) { 3310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[chunk.start + j]; 3320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Cell& e_cell = row.cells.front(); 3330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_EQ(e_block_id, e_cell.block_id); 3340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3351d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling FixedArray<double, 8> sj(row.block.size); 3361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 3371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling typename EigenTypes<kRowBlockSize>::VectorRef(sj.get(), row.block.size) = 3380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kRowBlockSize>::ConstVectorRef 3391d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling (b + bs->rows[chunk.start + j].block.position, row.block.size); 3400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int c = 1; c < row.cells.size(); ++c) { 3420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int f_block_id = row.cells[c].block_id; 3430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int f_block_size = bs->cols[f_block_id].size; 3440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int r_block = f_block_id - num_eliminate_blocks_; 3450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3461d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>( 3471d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[c].position, row.block.size, f_block_size, 3481d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling z + lhs_row_layout_[r_block], 3491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling sj.get()); 3500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 3510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( 3531d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 3541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling sj.get(), 3551d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling y_ptr); 3561d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 3571d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 3581d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( 3591d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 3601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 3611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling ete.data(), 0, 0, e_block_size, e_block_size); 3620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 3630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling ete.llt().solveInPlace(y_block); 3650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 3660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 3670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Update the rhs of the reduced linear system. Compute 3690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 3700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// F'b - F'E(E'E)^(-1) E'b 3710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 3720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 3730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 3740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongUpdateRhs(const Chunk& chunk, 3751d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const BlockSparseMatrix* A, 3760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* b, 3770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int row_block_counter, 3781d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* inverse_ete_g, 3790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* rhs) { 3800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 3811d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const int e_block_id = bs->rows[chunk.start].cells.front().block_id; 3821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const int e_block_size = bs->cols[e_block_id].size; 3831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 3840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int b_pos = bs->rows[row_block_counter].block.position; 3851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 3860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int j = 0; j < chunk.size; ++j) { 3870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[row_block_counter + j]; 3880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Cell& e_cell = row.cells.front(); 3890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling typename EigenTypes<kRowBlockSize>::Vector sj = 3910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kRowBlockSize>::ConstVectorRef 3921d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling (b + b_pos, row.block.size); 3931d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 3941d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixVectorMultiply<kRowBlockSize, kEBlockSize, -1>( 3951d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 3961d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling inverse_ete_g, sj.data()); 3970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 3980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int c = 1; c < row.cells.size(); ++c) { 3990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_id = row.cells[c].block_id; 4000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_size = bs->cols[block_id].size; 4010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block = block_id - num_eliminate_blocks_; 4020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CeresMutexLock l(rhs_locks_[block]); 4031d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>( 4041d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[c].position, 4051d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling row.block.size, block_size, 4061d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling sj.data(), rhs + lhs_row_layout_[block]); 4070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 4080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong b_pos += row.block.size; 4090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 4100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 4110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Given a Chunk - set of rows with the same e_block, e.g. in the 4130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// following Chunk with two rows. 4140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// E F 4160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// [ y11 0 0 0 | z11 0 0 0 z51] 4170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// [ y12 0 0 0 | z12 z22 0 0 0] 4180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// this function computes twp matrices. The diagonal block matrix 4200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ete = y11 * y11' + y12 * y12' 4220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and the off diagonal blocks in the Guass Newton Hessian. 4240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// buffer = [y11'(z11 + z12), y12' * z22, y11' * z51] 4260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// which are zero compressed versions of the block sparse matrices E'E 4280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and E'F. 4290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and the gradient of the e_block, E'b. 4310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 4320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 4330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 4340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongChunkDiagonalBlockAndGradient( 4350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Chunk& chunk, 4361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const BlockSparseMatrix* A, 4370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* b, 4380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int row_block_counter, 4390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* ete, 4401d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling double* g, 4410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* buffer, 4420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs) { 4430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 4440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int b_pos = bs->rows[row_block_counter].block.position; 4460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_size = ete->rows(); 4470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Iterate over the rows in this chunk, for each row, compute the 4490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // contribution of its F blocks to the Schur complement, the 4500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // contribution of its E block to the matrix EE' (ete), and the 4510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // corresponding block in the gradient vector. 4521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 4530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int j = 0; j < chunk.size; ++j) { 4540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[row_block_counter + j]; 4550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (row.cells.size() > 1) { 4570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong EBlockRowOuterProduct(A, row_block_counter + j, lhs); 4580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 4590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // Extract the e_block, ETE += E_i' E_i 4610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Cell& e_cell = row.cells.front(); 4621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 4631d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( 4641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 4651d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 4661d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling ete->data(), 0, 0, e_block_size, e_block_size); 4670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // g += E_i' b_i 4691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( 4701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 4711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling b + b_pos, 4721d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling g); 4731d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 4740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // buffer = E'F. This computation is done by iterating over the 4760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // f_blocks for each row in the chunk. 4770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int c = 1; c < row.cells.size(); ++c) { 4780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int f_block_id = row.cells[c].block_id; 4790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int f_block_size = bs->cols[f_block_id].size; 4800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* buffer_ptr = 4810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong buffer + FindOrDie(chunk.buffer_layout, f_block_id); 4821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 4831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>( 4841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + e_cell.position, row.block.size, e_block_size, 4851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[c].position, row.block.size, f_block_size, 4861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling buffer_ptr, 0, 0, e_block_size, f_block_size); 4870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 4880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong b_pos += row.block.size; 4890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 4900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 4910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 4920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Compute the outer product F'E(E'E)^{-1}E'F and subtract it from the 4930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Schur complement matrix, i.e 4940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// 4950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// S -= F'E(E'E)^{-1}E'F. 4960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 4970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 4980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 4990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongChunkOuterProduct(const CompressedRowBlockStructure* bs, 5000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const Matrix& inverse_ete, 5010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* buffer, 5020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const BufferLayoutType& buffer_layout, 5030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs) { 5040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // This is the most computationally expensive part of this 5050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // code. Profiling experiments reveal that the bottleneck is not the 5060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // computation of the right-hand matrix product, but memory 5070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // references to the left hand side. 5080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int e_block_size = inverse_ete.rows(); 5090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BufferLayoutType::const_iterator it1 = buffer_layout.begin(); 5101d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 5111d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#ifdef CERES_USE_OPENMP 5121d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling int thread_id = omp_get_thread_num(); 5131d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#else 5141d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling int thread_id = 0; 5151d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#endif 5161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling double* b1_transpose_inverse_ete = 5171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling chunk_outer_product_buffer_.get() + thread_id * buffer_size_; 5181d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling 5190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong // S(i,j) -= bi' * ete^{-1} b_j 5200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (; it1 != buffer_layout.end(); ++it1) { 5210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1 = it1->first - num_eliminate_blocks_; 5220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1_size = bs->cols[it1->first].size; 5231d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 5241d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>( 5251d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling buffer + it1->second, e_block_size, block1_size, 5261d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling inverse_ete.data(), e_block_size, e_block_size, 5271d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size); 5280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 5290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BufferLayoutType::const_iterator it2 = it1; 5300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (; it2 != buffer_layout.end(); ++it2) { 5310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block2 = it2->first - num_eliminate_blocks_; 5320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 5330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r, c, row_stride, col_stride; 5340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CellInfo* cell_info = lhs->GetCell(block1, block2, 5350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &r, &c, 5360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &row_stride, &col_stride); 5371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling if (cell_info != NULL) { 5381d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const int block2_size = bs->cols[it2->first].size; 5391d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling CeresMutexLock l(&cell_info->m); 5401d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixMatrixMultiply 5411d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>( 5421d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling b1_transpose_inverse_ete, block1_size, e_block_size, 5431d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling buffer + it2->second, e_block_size, block2_size, 5441d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling cell_info->values, r, c, row_stride, col_stride); 5450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 5460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 5470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 5480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 5490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 5500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// For rows with no e_blocks, the schur complement update reduces to S 5510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// += F'F. This function iterates over the rows of A with no e_block, 5520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and calls NoEBlockRowOuterProduct on each row. 5530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 5540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 5550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 5561d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingNoEBlockRowsUpdate(const BlockSparseMatrix* A, 5570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const double* b, 5580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int row_block_counter, 5590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs, 5600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong double* rhs) { 5610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 5621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 5630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (; row_block_counter < bs->rows.size(); ++row_block_counter) { 5640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[row_block_counter]; 5650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int c = 0; c < row.cells.size(); ++c) { 5660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_id = row.cells[c].block_id; 5670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block_size = bs->cols[block_id].size; 5680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block = block_id - num_eliminate_blocks_; 5691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( 5701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[c].position, row.block.size, block_size, 5711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling b + row.block.position, 5721d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling rhs + lhs_row_layout_[block]); 5730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 5740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong NoEBlockRowOuterProduct(A, row_block_counter, lhs); 5750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 5760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 5770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 5780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 5790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// A row r of A, which has no e_blocks gets added to the Schur 5800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Complement as S += r r'. This function is responsible for computing 5810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// the contribution of a single row r to the Schur complement. It is 5820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// very similar in structure to EBlockRowOuterProduct except for 5830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// one difference. It does not use any of the template 5840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// parameters. This is because the algorithm used for detecting the 5850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// static structure of the matrix A only pays attention to rows with 5860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// e_blocks. This is becase rows without e_blocks are rare and 5870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// typically arise from regularization terms in the original 5880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// optimization problem, and have a very different structure than the 5890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// rows with e_blocks. Including them in the static structure 5900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// detection will lead to most template parameters being set to 5910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// dynamic. Since the number of rows without e_blocks is small, the 5920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// lack of templating is not an issue. 5930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 5940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 5950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 5961d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingNoEBlockRowOuterProduct(const BlockSparseMatrix* A, 5970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int row_block_index, 5980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs) { 5990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 6000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[row_block_index]; 6011d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 6020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = 0; i < row.cells.size(); ++i) { 6030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1 = row.cells[i].block_id - num_eliminate_blocks_; 6040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_GE(block1, 0); 6050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1_size = bs->cols[row.cells[i].block_id].size; 6070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r, c, row_stride, col_stride; 6080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CellInfo* cell_info = lhs->GetCell(block1, block1, 6090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &r, &c, 6100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &row_stride, &col_stride); 6110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong if (cell_info != NULL) { 6120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CeresMutexLock l(&cell_info->m); 6131d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // This multiply currently ignores the fact that this is a 6141d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // symmetric outer product. 6151d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 6161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( 6171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6181d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6191d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling cell_info->values, r, c, row_stride, col_stride); 6200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int j = i + 1; j < row.cells.size(); ++j) { 6230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block2 = row.cells[j].block_id - num_eliminate_blocks_; 6240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_GE(block2, 0); 6250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_LT(block1, block2); 6260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r, c, row_stride, col_stride; 6270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CellInfo* cell_info = lhs->GetCell(block1, block2, 6280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &r, &c, 6290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &row_stride, &col_stride); 6301d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling if (cell_info != NULL) { 6311d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const int block2_size = bs->cols[row.cells[j].block_id].size; 6321d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling CeresMutexLock l(&cell_info->m); 6331d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 6341d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( 6351d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[j].position, row.block.size, block2_size, 6371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling cell_info->values, r, c, row_stride, col_stride); 6380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 6420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// For a row with an e_block, compute the contribition S += F'F. This 6440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// function has the same structure as NoEBlockRowOuterProduct, except 6450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// that this function uses the template parameters. 6460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 6470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid 6480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>:: 6491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingEBlockRowOuterProduct(const BlockSparseMatrix* A, 6500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int row_block_index, 6510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong BlockRandomAccessMatrix* lhs) { 6520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRowBlockStructure* bs = A->block_structure(); 6530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const CompressedRow& row = bs->rows[row_block_index]; 6541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling const double* values = A->values(); 6550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int i = 1; i < row.cells.size(); ++i) { 6560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1 = row.cells[i].block_id - num_eliminate_blocks_; 6570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_GE(block1, 0); 6580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block1_size = bs->cols[row.cells[i].block_id].size; 6601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling int r, c, row_stride, col_stride; 6611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling CellInfo* cell_info = lhs->GetCell(block1, block1, 6621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling &r, &c, 6631d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling &row_stride, &col_stride); 6641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling if (cell_info != NULL) { 6650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CeresMutexLock l(&cell_info->m); 6661d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // block += b1.transpose() * b1; 6671d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 6681d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( 6691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling cell_info->values, r, c, row_stride, col_stride); 6720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong for (int j = i + 1; j < row.cells.size(); ++j) { 6750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block2 = row.cells[j].block_id - num_eliminate_blocks_; 6760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_GE(block2, 0); 6770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong DCHECK_LT(block1, block2); 6780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong const int block2_size = bs->cols[row.cells[j].block_id].size; 6790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong int r, c, row_stride, col_stride; 6800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong CellInfo* cell_info = lhs->GetCell(block1, block2, 6810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &r, &c, 6820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong &row_stride, &col_stride); 6831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling if (cell_info != NULL) { 6841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling // block += b1.transpose() * b2; 6851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling CeresMutexLock l(&cell_info->m); 6861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling MatrixTransposeMatrixMultiply 6871d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( 6881d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[i].position, row.block.size, block1_size, 6891d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling values + row.cells[j].position, row.block.size, block2_size, 6901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling cell_info->values, r, c, row_stride, col_stride); 6910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong } 6940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} 6950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} // namespace internal 6970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong} // namespace ceres 6980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong 6990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ 700