1// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2014 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9//   this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11//   this list of conditions and the following disclaimer in the documentation
12//   and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14//   used to endorse or promote products derived from this software without
15//   specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
31#include "ceres/internal/port.h"
32
33#include <algorithm>
34#include <ctime>
35#include <set>
36#include <vector>
37
38#include "ceres/block_random_access_dense_matrix.h"
39#include "ceres/block_random_access_matrix.h"
40#include "ceres/block_random_access_sparse_matrix.h"
41#include "ceres/block_sparse_matrix.h"
42#include "ceres/block_structure.h"
43#include "ceres/cxsparse.h"
44#include "ceres/detect_structure.h"
45#include "ceres/internal/eigen.h"
46#include "ceres/internal/scoped_ptr.h"
47#include "ceres/lapack.h"
48#include "ceres/linear_solver.h"
49#include "ceres/schur_complement_solver.h"
50#include "ceres/suitesparse.h"
51#include "ceres/triplet_sparse_matrix.h"
52#include "ceres/types.h"
53#include "ceres/wall_time.h"
54#include "Eigen/Dense"
55#include "Eigen/SparseCore"
56
57namespace ceres {
58namespace internal {
59
60LinearSolver::Summary SchurComplementSolver::SolveImpl(
61    BlockSparseMatrix* A,
62    const double* b,
63    const LinearSolver::PerSolveOptions& per_solve_options,
64    double* x) {
65  EventLogger event_logger("SchurComplementSolver::Solve");
66
67  if (eliminator_.get() == NULL) {
68    InitStorage(A->block_structure());
69    DetectStructure(*A->block_structure(),
70                    options_.elimination_groups[0],
71                    &options_.row_block_size,
72                    &options_.e_block_size,
73                    &options_.f_block_size);
74    eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_)));
75    eliminator_->Init(options_.elimination_groups[0], A->block_structure());
76  };
77  fill(x, x + A->num_cols(), 0.0);
78  event_logger.AddEvent("Setup");
79
80  eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get());
81  event_logger.AddEvent("Eliminate");
82
83  double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
84  const LinearSolver::Summary summary =
85      SolveReducedLinearSystem(reduced_solution);
86  event_logger.AddEvent("ReducedSolve");
87
88  if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
89    eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x);
90    event_logger.AddEvent("BackSubstitute");
91  }
92
93  return summary;
94}
95
96// Initialize a BlockRandomAccessDenseMatrix to store the Schur
97// complement.
98void DenseSchurComplementSolver::InitStorage(
99    const CompressedRowBlockStructure* bs) {
100  const int num_eliminate_blocks = options().elimination_groups[0];
101  const int num_col_blocks = bs->cols.size();
102
103  vector<int> blocks(num_col_blocks - num_eliminate_blocks, 0);
104  for (int i = num_eliminate_blocks, j = 0;
105       i < num_col_blocks;
106       ++i, ++j) {
107    blocks[j] = bs->cols[i].size;
108  }
109
110  set_lhs(new BlockRandomAccessDenseMatrix(blocks));
111  set_rhs(new double[lhs()->num_rows()]);
112}
113
114// Solve the system Sx = r, assuming that the matrix S is stored in a
115// BlockRandomAccessDenseMatrix. The linear system is solved using
116// Eigen's Cholesky factorization.
117LinearSolver::Summary
118DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
119  LinearSolver::Summary summary;
120  summary.num_iterations = 0;
121  summary.termination_type = LINEAR_SOLVER_SUCCESS;
122  summary.message = "Success.";
123
124  const BlockRandomAccessDenseMatrix* m =
125      down_cast<const BlockRandomAccessDenseMatrix*>(lhs());
126  const int num_rows = m->num_rows();
127
128  // The case where there are no f blocks, and the system is block
129  // diagonal.
130  if (num_rows == 0) {
131    return summary;
132  }
133
134  summary.num_iterations = 1;
135
136  if (options().dense_linear_algebra_library_type == EIGEN) {
137    Eigen::LLT<Matrix, Eigen::Upper> llt =
138        ConstMatrixRef(m->values(), num_rows, num_rows)
139        .selfadjointView<Eigen::Upper>()
140        .llt();
141    if (llt.info() != Eigen::Success) {
142      summary.termination_type = LINEAR_SOLVER_FAILURE;
143      summary.message =
144          "Eigen failure. Unable to perform dense Cholesky factorization.";
145      return summary;
146    }
147
148    VectorRef(solution, num_rows) = llt.solve(ConstVectorRef(rhs(), num_rows));
149  } else {
150    VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
151    summary.termination_type =
152        LAPACK::SolveInPlaceUsingCholesky(num_rows,
153                                          m->values(),
154                                          solution,
155                                          &summary.message);
156  }
157
158  return summary;
159}
160
161SparseSchurComplementSolver::SparseSchurComplementSolver(
162    const LinearSolver::Options& options)
163    : SchurComplementSolver(options),
164      factor_(NULL),
165      cxsparse_factor_(NULL) {
166}
167
168SparseSchurComplementSolver::~SparseSchurComplementSolver() {
169  if (factor_ != NULL) {
170    ss_.Free(factor_);
171    factor_ = NULL;
172  }
173
174  if (cxsparse_factor_ != NULL) {
175    cxsparse_.Free(cxsparse_factor_);
176    cxsparse_factor_ = NULL;
177  }
178}
179
180// Determine the non-zero blocks in the Schur Complement matrix, and
181// initialize a BlockRandomAccessSparseMatrix object.
182void SparseSchurComplementSolver::InitStorage(
183    const CompressedRowBlockStructure* bs) {
184  const int num_eliminate_blocks = options().elimination_groups[0];
185  const int num_col_blocks = bs->cols.size();
186  const int num_row_blocks = bs->rows.size();
187
188  blocks_.resize(num_col_blocks - num_eliminate_blocks, 0);
189  for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) {
190    blocks_[i - num_eliminate_blocks] = bs->cols[i].size;
191  }
192
193  set<pair<int, int> > block_pairs;
194  for (int i = 0; i < blocks_.size(); ++i) {
195    block_pairs.insert(make_pair(i, i));
196  }
197
198  int r = 0;
199  while (r < num_row_blocks) {
200    int e_block_id = bs->rows[r].cells.front().block_id;
201    if (e_block_id >= num_eliminate_blocks) {
202      break;
203    }
204    vector<int> f_blocks;
205
206    // Add to the chunk until the first block in the row is
207    // different than the one in the first row for the chunk.
208    for (; r < num_row_blocks; ++r) {
209      const CompressedRow& row = bs->rows[r];
210      if (row.cells.front().block_id != e_block_id) {
211        break;
212      }
213
214      // Iterate over the blocks in the row, ignoring the first
215      // block since it is the one to be eliminated.
216      for (int c = 1; c < row.cells.size(); ++c) {
217        const Cell& cell = row.cells[c];
218        f_blocks.push_back(cell.block_id - num_eliminate_blocks);
219      }
220    }
221
222    sort(f_blocks.begin(), f_blocks.end());
223    f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
224    for (int i = 0; i < f_blocks.size(); ++i) {
225      for (int j = i + 1; j < f_blocks.size(); ++j) {
226        block_pairs.insert(make_pair(f_blocks[i], f_blocks[j]));
227      }
228    }
229  }
230
231  // Remaing rows do not contribute to the chunks and directly go
232  // into the schur complement via an outer product.
233  for (; r < num_row_blocks; ++r) {
234    const CompressedRow& row = bs->rows[r];
235    CHECK_GE(row.cells.front().block_id, num_eliminate_blocks);
236    for (int i = 0; i < row.cells.size(); ++i) {
237      int r_block1_id = row.cells[i].block_id - num_eliminate_blocks;
238      for (int j = 0; j < row.cells.size(); ++j) {
239        int r_block2_id = row.cells[j].block_id - num_eliminate_blocks;
240        if (r_block1_id <= r_block2_id) {
241          block_pairs.insert(make_pair(r_block1_id, r_block2_id));
242        }
243      }
244    }
245  }
246
247  set_lhs(new BlockRandomAccessSparseMatrix(blocks_, block_pairs));
248  set_rhs(new double[lhs()->num_rows()]);
249}
250
251LinearSolver::Summary
252SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
253  switch (options().sparse_linear_algebra_library_type) {
254    case SUITE_SPARSE:
255      return SolveReducedLinearSystemUsingSuiteSparse(solution);
256    case CX_SPARSE:
257      return SolveReducedLinearSystemUsingCXSparse(solution);
258    case EIGEN_SPARSE:
259      return SolveReducedLinearSystemUsingEigen(solution);
260    default:
261      LOG(FATAL) << "Unknown sparse linear algebra library : "
262                 << options().sparse_linear_algebra_library_type;
263  }
264
265  return LinearSolver::Summary();
266}
267
268// Solve the system Sx = r, assuming that the matrix S is stored in a
269// BlockRandomAccessSparseMatrix.  The linear system is solved using
270// CHOLMOD's sparse cholesky factorization routines.
271LinearSolver::Summary
272SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
273    double* solution) {
274#ifdef CERES_NO_SUITESPARSE
275
276  LinearSolver::Summary summary;
277  summary.num_iterations = 0;
278  summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
279  summary.message = "Ceres was not built with SuiteSparse support. "
280      "Therefore, SPARSE_SCHUR cannot be used with SUITE_SPARSE";
281  return summary;
282
283#else
284
285  LinearSolver::Summary summary;
286  summary.num_iterations = 0;
287  summary.termination_type = LINEAR_SOLVER_SUCCESS;
288  summary.message = "Success.";
289
290  TripletSparseMatrix* tsm =
291      const_cast<TripletSparseMatrix*>(
292          down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
293  const int num_rows = tsm->num_rows();
294
295  // The case where there are no f blocks, and the system is block
296  // diagonal.
297  if (num_rows == 0) {
298    return summary;
299  }
300
301  summary.num_iterations = 1;
302  cholmod_sparse* cholmod_lhs = NULL;
303  if (options().use_postordering) {
304    // If we are going to do a full symbolic analysis of the schur
305    // complement matrix from scratch and not rely on the
306    // pre-ordering, then the fastest path in cholmod_factorize is the
307    // one corresponding to upper triangular matrices.
308
309    // Create a upper triangular symmetric matrix.
310    cholmod_lhs = ss_.CreateSparseMatrix(tsm);
311    cholmod_lhs->stype = 1;
312
313    if (factor_ == NULL) {
314      factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs,
315                                         blocks_,
316                                         blocks_,
317                                         &summary.message);
318    }
319  } else {
320    // If we are going to use the natural ordering (i.e. rely on the
321    // pre-ordering computed by solver_impl.cc), then the fastest
322    // path in cholmod_factorize is the one corresponding to lower
323    // triangular matrices.
324
325    // Create a upper triangular symmetric matrix.
326    cholmod_lhs = ss_.CreateSparseMatrixTranspose(tsm);
327    cholmod_lhs->stype = -1;
328
329    if (factor_ == NULL) {
330      factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(cholmod_lhs,
331                                                       &summary.message);
332    }
333  }
334
335  if (factor_ == NULL) {
336    ss_.Free(cholmod_lhs);
337    summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
338    // No need to set message as it has already been set by the
339    // symbolic analysis routines above.
340    return summary;
341  }
342
343  summary.termination_type =
344    ss_.Cholesky(cholmod_lhs, factor_, &summary.message);
345
346  ss_.Free(cholmod_lhs);
347
348  if (summary.termination_type != LINEAR_SOLVER_SUCCESS) {
349    // No need to set message as it has already been set by the
350    // numeric factorization routine above.
351    return summary;
352  }
353
354  cholmod_dense*  cholmod_rhs =
355      ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows);
356  cholmod_dense* cholmod_solution = ss_.Solve(factor_,
357                                              cholmod_rhs,
358                                              &summary.message);
359  ss_.Free(cholmod_rhs);
360
361  if (cholmod_solution == NULL) {
362    summary.message =
363        "SuiteSparse failure. Unable to perform triangular solve.";
364    summary.termination_type = LINEAR_SOLVER_FAILURE;
365    return summary;
366  }
367
368  VectorRef(solution, num_rows)
369      = VectorRef(static_cast<double*>(cholmod_solution->x), num_rows);
370  ss_.Free(cholmod_solution);
371  return summary;
372#endif  // CERES_NO_SUITESPARSE
373}
374
375// Solve the system Sx = r, assuming that the matrix S is stored in a
376// BlockRandomAccessSparseMatrix.  The linear system is solved using
377// CXSparse's sparse cholesky factorization routines.
378LinearSolver::Summary
379SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
380    double* solution) {
381#ifdef CERES_NO_CXSPARSE
382
383  LinearSolver::Summary summary;
384  summary.num_iterations = 0;
385  summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
386  summary.message = "Ceres was not built with CXSparse support. "
387      "Therefore, SPARSE_SCHUR cannot be used with CX_SPARSE";
388  return summary;
389
390#else
391
392  LinearSolver::Summary summary;
393  summary.num_iterations = 0;
394  summary.termination_type = LINEAR_SOLVER_SUCCESS;
395  summary.message = "Success.";
396
397  // Extract the TripletSparseMatrix that is used for actually storing S.
398  TripletSparseMatrix* tsm =
399      const_cast<TripletSparseMatrix*>(
400          down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
401  const int num_rows = tsm->num_rows();
402
403  // The case where there are no f blocks, and the system is block
404  // diagonal.
405  if (num_rows == 0) {
406    return summary;
407  }
408
409  cs_di* lhs = CHECK_NOTNULL(cxsparse_.CreateSparseMatrix(tsm));
410  VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
411
412  // Compute symbolic factorization if not available.
413  if (cxsparse_factor_ == NULL) {
414    cxsparse_factor_ = cxsparse_.BlockAnalyzeCholesky(lhs, blocks_, blocks_);
415  }
416
417  if (cxsparse_factor_ == NULL) {
418    summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
419    summary.message =
420        "CXSparse failure. Unable to find symbolic factorization.";
421  } else if (!cxsparse_.SolveCholesky(lhs, cxsparse_factor_, solution)) {
422    summary.termination_type = LINEAR_SOLVER_FAILURE;
423    summary.message = "CXSparse::SolveCholesky failed.";
424  }
425
426  cxsparse_.Free(lhs);
427  return summary;
428#endif  // CERES_NO_CXPARSE
429}
430
431// Solve the system Sx = r, assuming that the matrix S is stored in a
432// BlockRandomAccessSparseMatrix.  The linear system is solved using
433// Eigen's sparse cholesky factorization routines.
434LinearSolver::Summary
435SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen(
436    double* solution) {
437#ifndef CERES_USE_EIGEN_SPARSE
438
439  LinearSolver::Summary summary;
440  summary.num_iterations = 0;
441  summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
442  summary.message =
443      "SPARSE_SCHUR cannot be used with EIGEN_SPARSE. "
444      "Ceres was not built with support for "
445      "Eigen's SimplicialLDLT decomposition. "
446      "This requires enabling building with -DEIGENSPARSE=ON.";
447  return summary;
448
449#else
450  EventLogger event_logger("SchurComplementSolver::EigenSolve");
451  LinearSolver::Summary summary;
452  summary.num_iterations = 0;
453  summary.termination_type = LINEAR_SOLVER_SUCCESS;
454  summary.message = "Success.";
455
456  // Extract the TripletSparseMatrix that is used for actually storing S.
457  TripletSparseMatrix* tsm =
458      const_cast<TripletSparseMatrix*>(
459          down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
460  const int num_rows = tsm->num_rows();
461
462  // The case where there are no f blocks, and the system is block
463  // diagonal.
464  if (num_rows == 0) {
465    return summary;
466  }
467
468  // This is an upper triangular matrix.
469  CompressedRowSparseMatrix crsm(*tsm);
470  // Map this to a column major, lower triangular matrix.
471  Eigen::MappedSparseMatrix<double, Eigen::ColMajor> eigen_lhs(
472      crsm.num_rows(),
473      crsm.num_rows(),
474      crsm.num_nonzeros(),
475      crsm.mutable_rows(),
476      crsm.mutable_cols(),
477      crsm.mutable_values());
478  event_logger.AddEvent("ToCompressedRowSparseMatrix");
479
480  // Compute symbolic factorization if one does not exist.
481  if (simplicial_ldlt_.get() == NULL) {
482    simplicial_ldlt_.reset(new SimplicialLDLT);
483    // This ordering is quite bad. The scalar ordering produced by the
484    // AMD algorithm is quite bad and can be an order of magnitude
485    // worse than the one computed using the block version of the
486    // algorithm.
487    simplicial_ldlt_->analyzePattern(eigen_lhs);
488    event_logger.AddEvent("Analysis");
489    if (simplicial_ldlt_->info() != Eigen::Success) {
490      summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
491      summary.message =
492          "Eigen failure. Unable to find symbolic factorization.";
493      return summary;
494    }
495  }
496
497  simplicial_ldlt_->factorize(eigen_lhs);
498  event_logger.AddEvent("Factorize");
499  if (simplicial_ldlt_->info() != Eigen::Success) {
500    summary.termination_type = LINEAR_SOLVER_FAILURE;
501    summary.message = "Eigen failure. Unable to find numeric factoriztion.";
502    return summary;
503  }
504
505  VectorRef(solution, num_rows) =
506      simplicial_ldlt_->solve(ConstVectorRef(rhs(), num_rows));
507  event_logger.AddEvent("Solve");
508  if (simplicial_ldlt_->info() != Eigen::Success) {
509    summary.termination_type = LINEAR_SOLVER_FAILURE;
510    summary.message = "Eigen failure. Unable to do triangular solve.";
511  }
512
513  return summary;
514#endif  // CERES_USE_EIGEN_SPARSE
515}
516
517}  // namespace internal
518}  // namespace ceres
519