10ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Ceres Solver - A fast non-linear least squares minimizer
20ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
30ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// http://code.google.com/p/ceres-solver/
40ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
50ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Redistribution and use in source and binary forms, with or without
60ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// modification, are permitted provided that the following conditions are met:
70ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
80ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions of source code must retain the above copyright notice,
90ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   this list of conditions and the following disclaimer.
100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions in binary form must reproduce the above copyright notice,
110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   this list of conditions and the following disclaimer in the documentation
120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   and/or other materials provided with the distribution.
130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Neither the name of Google Inc. nor the names of its contributors may be
140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   used to endorse or promote products derived from this software without
150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   specific prior written permission.
160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// POSSIBILITY OF SUCH DAMAGE.
280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Author: sameeragarwal@google.com (Sameer Agarwal)
300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// TODO(sameeragarwal): row_block_counter can perhaps be replaced by
320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Chunk::start ?
330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Eigen has an internal threshold switching between different matrix
380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// multiplication algorithms. In particular for matrices larger than
390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// matrix matrix product algorithm that has a higher setup cost. For
410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// matrix sizes close to this threshold, especially when the matrices
420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// are thin and long, the default choice may not be optimal. This is
430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// the case for us, as the default choice causes a 30% performance
440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// regression when we moved from Eigen2 to Eigen3.
451d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4879397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez// This include must come before any #ifndef check on Ceres compile options.
4979397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez#include "ceres/internal/port.h"
5079397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez
511d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#ifdef CERES_USE_OPENMP
521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include <omp.h>
531d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#endif
541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include <algorithm>
560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include <map>
570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_random_access_matrix.h"
580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_sparse_matrix.h"
590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/block_structure.h"
601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/eigen.h"
611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/fixed_array.h"
621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/scoped_ptr.h"
630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/map_util.h"
640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/schur_eliminator.h"
65399f7d09e0c45af54b77b4ab9508d6f23759b927Scott Ettinger#include "ceres/small_blas.h"
660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/stl_util.h"
671d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "Eigen/Dense"
681d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "glog/logging.h"
690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace ceres {
710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace internal {
720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() {
750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  STLDeleteElements(&rhs_locks_);
760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongInit(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  CHECK_GT(num_eliminate_blocks, 0)
830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      << "SchurComplementSolver cannot be initialized with "
840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      << "num_eliminate_blocks = 0.";
850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  num_eliminate_blocks_ = num_eliminate_blocks;
870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const int num_col_blocks = bs->cols.size();
890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const int num_row_blocks = bs->rows.size();
900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  buffer_size_ = 1;
920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  chunks_.clear();
930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  lhs_row_layout_.clear();
940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int lhs_num_rows = 0;
960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Add a map object for each block in the reduced linear system
970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // and build the row/column block structure of the reduced linear
980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // system.
990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  lhs_row_layout_.resize(num_col_blocks - num_eliminate_blocks_);
1000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) {
1010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    lhs_row_layout_[i - num_eliminate_blocks_] = lhs_num_rows;
1020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    lhs_num_rows += bs->cols[i].size;
1030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int r = 0;
1060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Iterate over the row blocks of A, and detect the chunks. The
1070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // matrix should already have been ordered so that all rows
1080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // containing the same y block are vertically contiguous. Along
1090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // the way also compute the amount of space each chunk will need
1100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // to perform the elimination.
1110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  while (r < num_row_blocks) {
1120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int chunk_block_id = bs->rows[r].cells.front().block_id;
1130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (chunk_block_id >= num_eliminate_blocks_) {
1140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      break;
1150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    chunks_.push_back(Chunk());
1180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    Chunk& chunk = chunks_.back();
1190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    chunk.size = 0;
1200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    chunk.start = r;
1210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int buffer_size = 0;
1220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int e_block_size = bs->cols[chunk_block_id].size;
1230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Add to the chunk until the first block in the row is
1250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // different than the one in the first row for the chunk.
1260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    while (r + chunk.size < num_row_blocks) {
1270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const CompressedRow& row = bs->rows[r + chunk.size];
1280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (row.cells.front().block_id != chunk_block_id) {
1290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        break;
1300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
1310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Iterate over the blocks in the row, ignoring the first
1330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // block since it is the one to be eliminated.
1340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      for (int c = 1; c < row.cells.size(); ++c) {
1350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        const Cell& cell = row.cells[c];
1360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        if (InsertIfNotPresent(
1370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                &(chunk.buffer_layout), cell.block_id, buffer_size)) {
1380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          buffer_size += e_block_size * bs->cols[cell.block_id].size;
1390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        }
1400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
1410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      buffer_size_ = max(buffer_size, buffer_size_);
1430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      ++chunk.size;
1440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    CHECK_GT(chunk.size, 0);
1470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    r += chunk.size;
1480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const Chunk& chunk = chunks_.back();
1500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  uneliminated_row_begins_ = chunk.start + chunk.size;
1520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  if (num_threads_ > 1) {
1530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    random_shuffle(chunks_.begin(), chunks_.end());
1540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  buffer_.reset(new double[buffer_size_ * num_threads_]);
1570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1581d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  // chunk_outer_product_buffer_ only needs to store e_block_size *
1591d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  // f_block_size, which is always less than buffer_size_, so we just
1601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  // allocate buffer_size_ per thread.
1611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
1621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
1630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  STLDeleteElements(&rhs_locks_);
1640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
1650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
1660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    rhs_locks_[i] = new Mutex;
1670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
1690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
1710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
1720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
1731d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingEliminate(const BlockSparseMatrix* A,
1740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          const double* b,
1750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          const double* D,
1760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          BlockRandomAccessMatrix* lhs,
1770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          double* rhs) {
1780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  if (lhs->num_rows() > 0) {
1790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    lhs->SetZero();
1800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    VectorRef(rhs, lhs->num_rows()).setZero();
1810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
1840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const int num_col_blocks = bs->cols.size();
1850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Add the diagonal to the schur complement.
1870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  if (D != NULL) {
1880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
1890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int i = num_eliminate_blocks_; i < num_col_blocks; ++i) {
1900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block_id = i - num_eliminate_blocks_;
1910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int r, c, row_stride, col_stride;
1920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CellInfo* cell_info = lhs->GetCell(block_id, block_id,
1930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &r, &c,
1940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &row_stride, &col_stride);
1950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (cell_info != NULL) {
1960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        const int block_size = bs->cols[i].size;
1970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        typename EigenTypes<kFBlockSize>::ConstVectorRef
1980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            diag(D + bs->cols[i].position, block_size);
1990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        CeresMutexLock l(&cell_info->m);
2010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        MatrixRef m(cell_info->values, row_stride, col_stride);
2020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        m.block(r, c, block_size, block_size).diagonal()
2030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            += diag.array().square().matrix();
2040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
2060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Eliminate y blocks one chunk at a time.  For each chunk,x3
2090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // compute the entries of the normal equations and the gradient
2100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // vector block corresponding to the y block and then apply
2110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Gaussian elimination to them. The matrix ete stores the normal
2120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // matrix corresponding to the block being eliminated and array
2130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // buffer_ contains the non-zero blocks in the row corresponding
2140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // to this y block in the normal equations. This computation is
2150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // done in ChunkDiagonalBlockAndGradient. UpdateRhs then applies
2160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // gaussian elimination to the rhs of the normal equations,
2170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // updating the rhs of the reduced linear system by modifying rhs
2180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // blocks for all the z blocks that share a row block/residual
2190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // term with the y block. EliminateRowOuterProduct does the
2200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // corresponding operation for the lhs of the reduced linear
2210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // system.
2220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
2230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = 0; i < chunks_.size(); ++i) {
2240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifdef CERES_USE_OPENMP
2250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int thread_id = omp_get_thread_num();
2260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#else
2270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int thread_id = 0;
2280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif
2290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    double* buffer = buffer_.get() + thread_id * buffer_size_;
2300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const Chunk& chunk = chunks_[i];
2310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
2320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int e_block_size = bs->cols[e_block_id].size;
2330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    VectorRef(buffer, buffer_size_).setZero();
2350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
2370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        ete(e_block_size, e_block_size);
2380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (D != NULL) {
2400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const typename EigenTypes<kEBlockSize>::ConstVectorRef
2410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          diag(D + bs->cols[e_block_id].position, e_block_size);
2420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      ete = diag.array().square().matrix().asDiagonal();
2430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    } else {
2440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      ete.setZero();
2450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
2460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2471d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    FixedArray<double, 8> g(e_block_size);
2481d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    typename EigenTypes<kEBlockSize>::VectorRef gref(g.get(), e_block_size);
2491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    gref.setZero();
2500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // We are going to be computing
2520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    //
2530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    //   S += F'F - F'E(E'E)^{-1}E'F
2540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    //
2550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // for each Chunk. The computation is broken down into a number of
2560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // function calls as below.
2570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Compute the outer product of the e_blocks with themselves (ete
2590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // = E'E). Compute the product of the e_blocks with the
2600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // corresonding f_blocks (buffer = E'F), the gradient of the terms
2610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // in this chunk (g) and add the outer product of the f_blocks to
2620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Schur complement (S += F'F).
2630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    ChunkDiagonalBlockAndGradient(
2641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        chunk, A, b, chunk.start, &ete, g.get(), buffer, lhs);
2650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Normally one wouldn't compute the inverse explicitly, but
2670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // e_block_size will typically be a small number like 3, in
2680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // which case its much faster to compute the inverse once and
2690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // use it to multiply other matrices/vectors instead of doing a
2700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Solve call over and over again.
2710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
2720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        ete
2730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        .template selfadjointView<Eigen::Upper>()
2741d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        .llt()
2750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        .solve(Matrix::Identity(e_block_size, e_block_size));
2760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // For the current chunk compute and update the rhs of the reduced
2780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // linear system.
2790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    //
2800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    //   rhs = F'b - F'E(E'E)^(-1) E'b
2811d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
2821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    FixedArray<double, 8> inverse_ete_g(e_block_size);
2831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    MatrixVectorMultiply<kEBlockSize, kEBlockSize, 0>(
2841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        inverse_ete.data(),
2851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        e_block_size,
2861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        e_block_size,
2871d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        g.get(),
2881d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        inverse_ete_g.get());
2891d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
2901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    UpdateRhs(chunk, A, b, chunk.start, inverse_ete_g.get(), rhs);
2910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // S -= F'E(E'E)^{-1}E'F
2930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    ChunkOuterProduct(bs, inverse_ete, buffer, chunk.buffer_layout, lhs);
2940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // For rows with no e_blocks, the schur complement update reduces to
2970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // S += F'F.
2980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  NoEBlockRowsUpdate(A, b,  uneliminated_row_begins_, lhs, rhs);
2990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
3000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
3020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
3030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
3041d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingBackSubstitute(const BlockSparseMatrix* A,
3050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong               const double* b,
3060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong               const double* D,
3070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong               const double* z,
3080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong               double* y) {
3090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
3100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(num_threads_) schedule(dynamic)
3110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = 0; i < chunks_.size(); ++i) {
3120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const Chunk& chunk = chunks_[i];
3130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
3140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int e_block_size = bs->cols[e_block_id].size;
3150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    double* y_ptr = y +  bs->cols[e_block_id].position;
3171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
3180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
3200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        ete(e_block_size, e_block_size);
3210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (D != NULL) {
3220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const typename EigenTypes<kEBlockSize>::ConstVectorRef
3230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          diag(D + bs->cols[e_block_id].position, e_block_size);
3240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      ete = diag.array().square().matrix().asDiagonal();
3250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    } else {
3260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      ete.setZero();
3270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
3280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3291d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    const double* values = A->values();
3300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int j = 0; j < chunk.size; ++j) {
3310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const CompressedRow& row = bs->rows[chunk.start + j];
3320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const Cell& e_cell = row.cells.front();
3330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      DCHECK_EQ(e_block_id, e_cell.block_id);
3340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3351d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      FixedArray<double, 8> sj(row.block.size);
3361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      typename EigenTypes<kRowBlockSize>::VectorRef(sj.get(), row.block.size) =
3380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          typename EigenTypes<kRowBlockSize>::ConstVectorRef
3391d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          (b + bs->rows[chunk.start + j].block.position, row.block.size);
3400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      for (int c = 1; c < row.cells.size(); ++c) {
3420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        const int f_block_id = row.cells[c].block_id;
3430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        const int f_block_size = bs->cols[f_block_id].size;
3440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        const int r_block = f_block_id - num_eliminate_blocks_;
3450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3461d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
3471d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            values + row.cells[c].position, row.block.size, f_block_size,
3481d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            z + lhs_row_layout_[r_block],
3491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            sj.get());
3500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
3510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
3531d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + e_cell.position, row.block.size, e_block_size,
3541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          sj.get(),
3551d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          y_ptr);
3561d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3571d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeMatrixMultiply
3581d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
3591d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              values + e_cell.position, row.block.size, e_block_size,
3601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              values + e_cell.position, row.block.size, e_block_size,
3611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              ete.data(), 0, 0, e_block_size, e_block_size);
3620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
3630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    ete.llt().solveInPlace(y_block);
3650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
3660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
3670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Update the rhs of the reduced linear system. Compute
3690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
3700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   F'b - F'E(E'E)^(-1) E'b
3710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
3720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
3730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
3740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongUpdateRhs(const Chunk& chunk,
3751d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          const BlockSparseMatrix* A,
3760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          const double* b,
3770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          int row_block_counter,
3781d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          const double* inverse_ete_g,
3790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          double* rhs) {
3800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
3811d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
3821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const int e_block_size = bs->cols[e_block_id].size;
3831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int b_pos = bs->rows[row_block_counter].block.position;
3851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const double* values = A->values();
3860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int j = 0; j < chunk.size; ++j) {
3870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const CompressedRow& row = bs->rows[row_block_counter + j];
3880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const Cell& e_cell = row.cells.front();
3890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    typename EigenTypes<kRowBlockSize>::Vector sj =
3910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        typename EigenTypes<kRowBlockSize>::ConstVectorRef
3921d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        (b + b_pos, row.block.size);
3931d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3941d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    MatrixVectorMultiply<kRowBlockSize, kEBlockSize, -1>(
3951d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        values + e_cell.position, row.block.size, e_block_size,
3961d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        inverse_ete_g, sj.data());
3970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int c = 1; c < row.cells.size(); ++c) {
3990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block_id = row.cells[c].block_id;
4000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block_size = bs->cols[block_id].size;
4010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block = block_id - num_eliminate_blocks_;
4020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CeresMutexLock l(rhs_locks_[block]);
4031d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
4041d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + row.cells[c].position,
4051d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          row.block.size, block_size,
4061d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          sj.data(), rhs + lhs_row_layout_[block]);
4070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
4080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    b_pos += row.block.size;
4090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
4100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
4110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Given a Chunk - set of rows with the same e_block, e.g. in the
4130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// following Chunk with two rows.
4140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                E                   F
4160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//      [ y11   0   0   0 |  z11     0    0   0    z51]
4170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//      [ y12   0   0   0 |  z12   z22    0   0      0]
4180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// this function computes twp matrices. The diagonal block matrix
4200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   ete = y11 * y11' + y12 * y12'
4220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and the off diagonal blocks in the Guass Newton Hessian.
4240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   buffer = [y11'(z11 + z12), y12' * z22, y11' * z51]
4260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// which are zero compressed versions of the block sparse matrices E'E
4280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and E'F.
4290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and the gradient of the e_block, E'b.
4310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
4320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
4330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
4340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongChunkDiagonalBlockAndGradient(
4350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const Chunk& chunk,
4361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    const BlockSparseMatrix* A,
4370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const double* b,
4380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int row_block_counter,
4390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* ete,
4401d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    double* g,
4410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    double* buffer,
4420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    BlockRandomAccessMatrix* lhs) {
4430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
4440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int b_pos = bs->rows[row_block_counter].block.position;
4460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const int e_block_size = ete->rows();
4470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Iterate over the rows in this chunk, for each row, compute the
4490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // contribution of its F blocks to the Schur complement, the
4500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // contribution of its E block to the matrix EE' (ete), and the
4510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // corresponding block in the gradient vector.
4521d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const double* values = A->values();
4530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int j = 0; j < chunk.size; ++j) {
4540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const CompressedRow& row = bs->rows[row_block_counter + j];
4550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (row.cells.size() > 1) {
4570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      EBlockRowOuterProduct(A, row_block_counter + j, lhs);
4580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
4590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Extract the e_block, ETE += E_i' E_i
4610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const Cell& e_cell = row.cells.front();
4621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    MatrixTransposeMatrixMultiply
4631d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
4641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            values + e_cell.position, row.block.size, e_block_size,
4651d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            values + e_cell.position, row.block.size, e_block_size,
4661d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            ete->data(), 0, 0, e_block_size, e_block_size);
4670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // g += E_i' b_i
4691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
4701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        values + e_cell.position, row.block.size, e_block_size,
4711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        b + b_pos,
4721d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        g);
4731d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
4740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // buffer = E'F. This computation is done by iterating over the
4760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // f_blocks for each row in the chunk.
4770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int c = 1; c < row.cells.size(); ++c) {
4780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int f_block_id = row.cells[c].block_id;
4790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int f_block_size = bs->cols[f_block_id].size;
4800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      double* buffer_ptr =
4810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          buffer +  FindOrDie(chunk.buffer_layout, f_block_id);
4821d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeMatrixMultiply
4831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          <kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
4841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + e_cell.position, row.block.size, e_block_size,
4851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + row.cells[c].position, row.block.size, f_block_size,
4861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          buffer_ptr, 0, 0, e_block_size, f_block_size);
4870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
4880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    b_pos += row.block.size;
4890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
4900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
4910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
4920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Compute the outer product F'E(E'E)^{-1}E'F and subtract it from the
4930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Schur complement matrix, i.e
4940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
4950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//  S -= F'E(E'E)^{-1}E'F.
4960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
4970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
4980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
4990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongChunkOuterProduct(const CompressedRowBlockStructure* bs,
5000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                  const Matrix& inverse_ete,
5010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                  const double* buffer,
5020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                  const BufferLayoutType& buffer_layout,
5030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                  BlockRandomAccessMatrix* lhs) {
5040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // This is the most computationally expensive part of this
5050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // code. Profiling experiments reveal that the bottleneck is not the
5060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // computation of the right-hand matrix product, but memory
5070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // references to the left hand side.
5080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const int e_block_size = inverse_ete.rows();
5090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  BufferLayoutType::const_iterator it1 = buffer_layout.begin();
5101d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
5111d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#ifdef CERES_USE_OPENMP
5121d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  int thread_id = omp_get_thread_num();
5131d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#else
5141d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  int thread_id = 0;
5151d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#endif
5161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  double* b1_transpose_inverse_ete =
5171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
5181d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
5190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // S(i,j) -= bi' * ete^{-1} b_j
5200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (; it1 != buffer_layout.end(); ++it1) {
5210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1 = it1->first - num_eliminate_blocks_;
5220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1_size = bs->cols[it1->first].size;
5231d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    MatrixTransposeMatrixMultiply
5241d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        <kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
5251d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        buffer + it1->second, e_block_size, block1_size,
5261d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        inverse_ete.data(), e_block_size, e_block_size,
5271d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
5280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
5290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    BufferLayoutType::const_iterator it2 = it1;
5300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (; it2 != buffer_layout.end(); ++it2) {
5310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block2 = it2->first - num_eliminate_blocks_;
5320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
5330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int r, c, row_stride, col_stride;
5340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CellInfo* cell_info = lhs->GetCell(block1, block2,
5350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &r, &c,
5360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &row_stride, &col_stride);
5371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      if (cell_info != NULL) {
5381d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        const int block2_size = bs->cols[it2->first].size;
5391d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        CeresMutexLock l(&cell_info->m);
5401d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        MatrixMatrixMultiply
5411d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            <kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
5421d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                b1_transpose_inverse_ete, block1_size, e_block_size,
5431d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                buffer  + it2->second, e_block_size, block2_size,
5441d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                cell_info->values, r, c, row_stride, col_stride);
5450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
5460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
5470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
5480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
5490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
5500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// For rows with no e_blocks, the schur complement update reduces to S
5510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// += F'F. This function iterates over the rows of A with no e_block,
5520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and calls NoEBlockRowOuterProduct on each row.
5530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
5540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
5550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
5561d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingNoEBlockRowsUpdate(const BlockSparseMatrix* A,
5570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                   const double* b,
5580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                   int row_block_counter,
5590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                   BlockRandomAccessMatrix* lhs,
5600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                   double* rhs) {
5610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
5621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const double* values = A->values();
5630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (; row_block_counter < bs->rows.size(); ++row_block_counter) {
5640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const CompressedRow& row = bs->rows[row_block_counter];
5650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int c = 0; c < row.cells.size(); ++c) {
5660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block_id = row.cells[c].block_id;
5670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block_size = bs->cols[block_id].size;
5680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block = block_id - num_eliminate_blocks_;
5691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
5701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + row.cells[c].position, row.block.size, block_size,
5711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          b + row.block.position,
5721d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          rhs + lhs_row_layout_[block]);
5730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
5740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    NoEBlockRowOuterProduct(A, row_block_counter, lhs);
5750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
5760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
5770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
5780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
5790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// A row r of A, which has no e_blocks gets added to the Schur
5800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Complement as S += r r'. This function is responsible for computing
5810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// the contribution of a single row r to the Schur complement. It is
5820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// very similar in structure to EBlockRowOuterProduct except for
5830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// one difference. It does not use any of the template
5840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// parameters. This is because the algorithm used for detecting the
5850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// static structure of the matrix A only pays attention to rows with
5860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// e_blocks. This is becase rows without e_blocks are rare and
5870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// typically arise from regularization terms in the original
5880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// optimization problem, and have a very different structure than the
5890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// rows with e_blocks. Including them in the static structure
5900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// detection will lead to most template parameters being set to
5910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// dynamic. Since the number of rows without e_blocks is small, the
5920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// lack of templating is not an issue.
5930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
5940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
5950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
5961d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingNoEBlockRowOuterProduct(const BlockSparseMatrix* A,
5970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                     int row_block_index,
5980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                     BlockRandomAccessMatrix* lhs) {
5990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
6000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRow& row = bs->rows[row_block_index];
6011d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const double* values = A->values();
6020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = 0; i < row.cells.size(); ++i) {
6030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1 = row.cells[i].block_id - num_eliminate_blocks_;
6040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    DCHECK_GE(block1, 0);
6050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1_size = bs->cols[row.cells[i].block_id].size;
6070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int r, c, row_stride, col_stride;
6080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    CellInfo* cell_info = lhs->GetCell(block1, block1,
6090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                       &r, &c,
6100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                       &row_stride, &col_stride);
6110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (cell_info != NULL) {
6120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CeresMutexLock l(&cell_info->m);
6131d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      // This multiply currently ignores the fact that this is a
6141d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      // symmetric outer product.
6151d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeMatrixMultiply
6161d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
6171d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              values + row.cells[i].position, row.block.size, block1_size,
6181d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              values + row.cells[i].position, row.block.size, block1_size,
6191d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              cell_info->values, r, c, row_stride, col_stride);
6200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
6210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int j = i + 1; j < row.cells.size(); ++j) {
6230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block2 = row.cells[j].block_id - num_eliminate_blocks_;
6240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      DCHECK_GE(block2, 0);
6250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      DCHECK_LT(block1, block2);
6260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int r, c, row_stride, col_stride;
6270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CellInfo* cell_info = lhs->GetCell(block1, block2,
6280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &r, &c,
6290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &row_stride, &col_stride);
6301d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      if (cell_info != NULL) {
6311d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        const int block2_size = bs->cols[row.cells[j].block_id].size;
6321d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        CeresMutexLock l(&cell_info->m);
6331d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        MatrixTransposeMatrixMultiply
6341d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
6351d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                values + row.cells[i].position, row.block.size, block1_size,
6361d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                values + row.cells[j].position, row.block.size, block2_size,
6371d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                cell_info->values, r, c, row_stride, col_stride);
6380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
6390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
6400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
6410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
6420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// For a row with an e_block, compute the contribition S += F'F. This
6440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// function has the same structure as NoEBlockRowOuterProduct, except
6450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// that this function uses the template parameters.
6460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongtemplate <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
6470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongvoid
6480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus KongSchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
6491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha HaeberlingEBlockRowOuterProduct(const BlockSparseMatrix* A,
6500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                      int row_block_index,
6510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                      BlockRandomAccessMatrix* lhs) {
6520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRowBlockStructure* bs = A->block_structure();
6530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  const CompressedRow& row = bs->rows[row_block_index];
6541d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  const double* values = A->values();
6550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  for (int i = 1; i < row.cells.size(); ++i) {
6560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1 = row.cells[i].block_id - num_eliminate_blocks_;
6570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    DCHECK_GE(block1, 0);
6580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const int block1_size = bs->cols[row.cells[i].block_id].size;
6601d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    int r, c, row_stride, col_stride;
6611d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    CellInfo* cell_info = lhs->GetCell(block1, block1,
6621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                                       &r, &c,
6631d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                                       &row_stride, &col_stride);
6641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    if (cell_info != NULL) {
6650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CeresMutexLock l(&cell_info->m);
6661d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      // block += b1.transpose() * b1;
6671d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      MatrixTransposeMatrixMultiply
6681d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
6691d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + row.cells[i].position, row.block.size, block1_size,
6701d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          values + row.cells[i].position, row.block.size, block1_size,
6711d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          cell_info->values, r, c, row_stride, col_stride);
6720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
6730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int j = i + 1; j < row.cells.size(); ++j) {
6750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block2 = row.cells[j].block_id - num_eliminate_blocks_;
6760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      DCHECK_GE(block2, 0);
6770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      DCHECK_LT(block1, block2);
6780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int block2_size = bs->cols[row.cells[j].block_id].size;
6790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int r, c, row_stride, col_stride;
6800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      CellInfo* cell_info = lhs->GetCell(block1, block2,
6810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &r, &c,
6820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                         &row_stride, &col_stride);
6831d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      if (cell_info != NULL) {
6841d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        // block += b1.transpose() * b2;
6851d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        CeresMutexLock l(&cell_info->m);
6861d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        MatrixTransposeMatrixMultiply
6871d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling            <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
6881d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                values + row.cells[i].position, row.block.size, block1_size,
6891d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                values + row.cells[j].position, row.block.size, block2_size,
6901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                cell_info->values, r, c, row_stride, col_stride);
6910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
6920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
6930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
6940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}
6950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}  // namespace internal
6970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}  // namespace ceres
6980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
6990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif  // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
700