1// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
3// http://code.google.com/p/ceres-solver/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9//   this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11//   this list of conditions and the following disclaimer in the documentation
12//   and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14//   used to endorse or promote products derived from this software without
15//   specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
31#include <algorithm>
32#include <ctime>
33#include <set>
34#include <vector>
35
36#ifndef CERES_NO_CXSPARSE
37#include "cs.h"
38#endif  // CERES_NO_CXSPARSE
39
40#include "Eigen/Dense"
41#include "ceres/block_random_access_dense_matrix.h"
42#include "ceres/block_random_access_matrix.h"
43#include "ceres/block_random_access_sparse_matrix.h"
44#include "ceres/block_sparse_matrix.h"
45#include "ceres/block_structure.h"
46#include "ceres/detect_structure.h"
47#include "ceres/linear_solver.h"
48#include "ceres/schur_complement_solver.h"
49#include "ceres/suitesparse.h"
50#include "ceres/triplet_sparse_matrix.h"
51#include "ceres/internal/eigen.h"
52#include "ceres/internal/port.h"
53#include "ceres/internal/scoped_ptr.h"
54#include "ceres/types.h"
55
56
57namespace ceres {
58namespace internal {
59
60LinearSolver::Summary SchurComplementSolver::SolveImpl(
61    BlockSparseMatrixBase* A,
62    const double* b,
63    const LinearSolver::PerSolveOptions& per_solve_options,
64    double* x) {
65  const time_t start_time = time(NULL);
66  if (eliminator_.get() == NULL) {
67    InitStorage(A->block_structure());
68    DetectStructure(*A->block_structure(),
69                    options_.elimination_groups[0],
70                    &options_.row_block_size,
71                    &options_.e_block_size,
72                    &options_.f_block_size);
73    eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_)));
74    eliminator_->Init(options_.elimination_groups[0], A->block_structure());
75  };
76  const time_t init_time = time(NULL);
77  fill(x, x + A->num_cols(), 0.0);
78
79  LinearSolver::Summary summary;
80  summary.num_iterations = 1;
81  summary.termination_type = FAILURE;
82  eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get());
83  const time_t eliminate_time = time(NULL);
84
85  double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
86  const bool status = SolveReducedLinearSystem(reduced_solution);
87  const time_t solve_time = time(NULL);
88
89  if (!status) {
90    return summary;
91  }
92
93  eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x);
94  const time_t backsubstitute_time = time(NULL);
95  summary.termination_type = TOLERANCE;
96
97  VLOG(2) << "time (sec) total: " << (backsubstitute_time - start_time)
98          << " init: " << (init_time - start_time)
99          << " eliminate: " << (eliminate_time - init_time)
100          << " solve: " << (solve_time - eliminate_time)
101          << " backsubstitute: " << (backsubstitute_time - solve_time);
102  return summary;
103}
104
105// Initialize a BlockRandomAccessDenseMatrix to store the Schur
106// complement.
107void DenseSchurComplementSolver::InitStorage(
108    const CompressedRowBlockStructure* bs) {
109  const int num_eliminate_blocks = options().elimination_groups[0];
110  const int num_col_blocks = bs->cols.size();
111
112  vector<int> blocks(num_col_blocks - num_eliminate_blocks, 0);
113  for (int i = num_eliminate_blocks, j = 0;
114       i < num_col_blocks;
115       ++i, ++j) {
116    blocks[j] = bs->cols[i].size;
117  }
118
119  set_lhs(new BlockRandomAccessDenseMatrix(blocks));
120  set_rhs(new double[lhs()->num_rows()]);
121}
122
123// Solve the system Sx = r, assuming that the matrix S is stored in a
124// BlockRandomAccessDenseMatrix. The linear system is solved using
125// Eigen's Cholesky factorization.
126bool DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
127  const BlockRandomAccessDenseMatrix* m =
128      down_cast<const BlockRandomAccessDenseMatrix*>(lhs());
129  const int num_rows = m->num_rows();
130
131  // The case where there are no f blocks, and the system is block
132  // diagonal.
133  if (num_rows == 0) {
134    return true;
135  }
136
137  // TODO(sameeragarwal): Add proper error handling; this completely ignores
138  // the quality of the solution to the solve.
139  VectorRef(solution, num_rows) =
140      ConstMatrixRef(m->values(), num_rows, num_rows)
141      .selfadjointView<Eigen::Upper>()
142      .ldlt()
143      .solve(ConstVectorRef(rhs(), num_rows));
144
145  return true;
146}
147
148
149SparseSchurComplementSolver::SparseSchurComplementSolver(
150    const LinearSolver::Options& options)
151    : SchurComplementSolver(options) {
152#ifndef CERES_NO_SUITESPARSE
153  factor_ = NULL;
154#endif  // CERES_NO_SUITESPARSE
155
156#ifndef CERES_NO_CXSPARSE
157  cxsparse_factor_ = NULL;
158#endif  // CERES_NO_CXSPARSE
159}
160
161SparseSchurComplementSolver::~SparseSchurComplementSolver() {
162#ifndef CERES_NO_SUITESPARSE
163  if (factor_ != NULL) {
164    ss_.Free(factor_);
165    factor_ = NULL;
166  }
167#endif  // CERES_NO_SUITESPARSE
168
169#ifndef CERES_NO_CXSPARSE
170  if (cxsparse_factor_ != NULL) {
171    cxsparse_.Free(cxsparse_factor_);
172    cxsparse_factor_ = NULL;
173  }
174#endif  // CERES_NO_CXSPARSE
175}
176
177// Determine the non-zero blocks in the Schur Complement matrix, and
178// initialize a BlockRandomAccessSparseMatrix object.
179void SparseSchurComplementSolver::InitStorage(
180    const CompressedRowBlockStructure* bs) {
181  const int num_eliminate_blocks = options().elimination_groups[0];
182  const int num_col_blocks = bs->cols.size();
183  const int num_row_blocks = bs->rows.size();
184
185  blocks_.resize(num_col_blocks - num_eliminate_blocks, 0);
186  for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) {
187    blocks_[i - num_eliminate_blocks] = bs->cols[i].size;
188  }
189
190  set<pair<int, int> > block_pairs;
191  for (int i = 0; i < blocks_.size(); ++i) {
192    block_pairs.insert(make_pair(i, i));
193  }
194
195  int r = 0;
196  while (r < num_row_blocks) {
197    int e_block_id = bs->rows[r].cells.front().block_id;
198    if (e_block_id >= num_eliminate_blocks) {
199      break;
200    }
201    vector<int> f_blocks;
202
203    // Add to the chunk until the first block in the row is
204    // different than the one in the first row for the chunk.
205    for (; r < num_row_blocks; ++r) {
206      const CompressedRow& row = bs->rows[r];
207      if (row.cells.front().block_id != e_block_id) {
208        break;
209      }
210
211      // Iterate over the blocks in the row, ignoring the first
212      // block since it is the one to be eliminated.
213      for (int c = 1; c < row.cells.size(); ++c) {
214        const Cell& cell = row.cells[c];
215        f_blocks.push_back(cell.block_id - num_eliminate_blocks);
216      }
217    }
218
219    sort(f_blocks.begin(), f_blocks.end());
220    f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
221    for (int i = 0; i < f_blocks.size(); ++i) {
222      for (int j = i + 1; j < f_blocks.size(); ++j) {
223        block_pairs.insert(make_pair(f_blocks[i], f_blocks[j]));
224      }
225    }
226  }
227
228  // Remaing rows do not contribute to the chunks and directly go
229  // into the schur complement via an outer product.
230  for (; r < num_row_blocks; ++r) {
231    const CompressedRow& row = bs->rows[r];
232    CHECK_GE(row.cells.front().block_id, num_eliminate_blocks);
233    for (int i = 0; i < row.cells.size(); ++i) {
234      int r_block1_id = row.cells[i].block_id - num_eliminate_blocks;
235      for (int j = 0; j < row.cells.size(); ++j) {
236        int r_block2_id = row.cells[j].block_id - num_eliminate_blocks;
237        if (r_block1_id <= r_block2_id) {
238          block_pairs.insert(make_pair(r_block1_id, r_block2_id));
239        }
240      }
241    }
242  }
243
244  set_lhs(new BlockRandomAccessSparseMatrix(blocks_, block_pairs));
245  set_rhs(new double[lhs()->num_rows()]);
246}
247
248bool SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
249  switch (options().sparse_linear_algebra_library) {
250    case SUITE_SPARSE:
251      return SolveReducedLinearSystemUsingSuiteSparse(solution);
252    case CX_SPARSE:
253      return SolveReducedLinearSystemUsingCXSparse(solution);
254    default:
255      LOG(FATAL) << "Unknown sparse linear algebra library : "
256                 << options().sparse_linear_algebra_library;
257  }
258
259  LOG(FATAL) << "Unknown sparse linear algebra library : "
260             << options().sparse_linear_algebra_library;
261  return false;
262}
263
264#ifndef CERES_NO_SUITESPARSE
265// Solve the system Sx = r, assuming that the matrix S is stored in a
266// BlockRandomAccessSparseMatrix.  The linear system is solved using
267// CHOLMOD's sparse cholesky factorization routines.
268bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
269    double* solution) {
270  const time_t start_time = time(NULL);
271
272  TripletSparseMatrix* tsm =
273      const_cast<TripletSparseMatrix*>(
274          down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
275
276  const int num_rows = tsm->num_rows();
277
278  // The case where there are no f blocks, and the system is block
279  // diagonal.
280  if (num_rows == 0) {
281    return true;
282  }
283
284  cholmod_sparse* cholmod_lhs = ss_.CreateSparseMatrix(tsm);
285  // The matrix is symmetric, and the upper triangular part of the
286  // matrix contains the values.
287  cholmod_lhs->stype = 1;
288  const time_t lhs_time = time(NULL);
289
290  cholmod_dense*  cholmod_rhs =
291      ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows);
292  const time_t rhs_time = time(NULL);
293
294  // Symbolic factorization is computed if we don't already have one handy.
295  if (factor_ == NULL) {
296    if (options().use_block_amd) {
297      factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs, blocks_, blocks_);
298    } else {
299      factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
300    }
301
302    if (VLOG_IS_ON(2)) {
303      cholmod_print_common("Symbolic Analysis", ss_.mutable_cc());
304    }
305  }
306
307  CHECK_NOTNULL(factor_);
308
309  const time_t symbolic_time = time(NULL);
310  cholmod_dense* cholmod_solution =
311      ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
312
313  const time_t solve_time = time(NULL);
314
315  ss_.Free(cholmod_lhs);
316  cholmod_lhs = NULL;
317  ss_.Free(cholmod_rhs);
318  cholmod_rhs = NULL;
319
320  if (cholmod_solution == NULL) {
321    LOG(WARNING) << "CHOLMOD solve failed.";
322    return false;
323  }
324
325  VectorRef(solution, num_rows)
326      = VectorRef(static_cast<double*>(cholmod_solution->x), num_rows);
327  ss_.Free(cholmod_solution);
328  const time_t final_time = time(NULL);
329  VLOG(2) << "time: " << (final_time - start_time)
330          << " lhs : " << (lhs_time - start_time)
331          << " rhs:  " << (rhs_time - lhs_time)
332          << " analyze: " <<  (symbolic_time - rhs_time)
333          << " factor_and_solve: " << (solve_time - symbolic_time)
334          << " cleanup: " << (final_time - solve_time);
335  return true;
336}
337#else
338bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
339    double* solution) {
340  LOG(FATAL) << "No SuiteSparse support in Ceres.";
341  return false;
342}
343#endif  // CERES_NO_SUITESPARSE
344
345#ifndef CERES_NO_CXSPARSE
346// Solve the system Sx = r, assuming that the matrix S is stored in a
347// BlockRandomAccessSparseMatrix.  The linear system is solved using
348// CXSparse's sparse cholesky factorization routines.
349bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
350    double* solution) {
351  // Extract the TripletSparseMatrix that is used for actually storing S.
352  TripletSparseMatrix* tsm =
353      const_cast<TripletSparseMatrix*>(
354          down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
355
356  const int num_rows = tsm->num_rows();
357
358  // The case where there are no f blocks, and the system is block
359  // diagonal.
360  if (num_rows == 0) {
361    return true;
362  }
363
364  cs_di* lhs = CHECK_NOTNULL(cxsparse_.CreateSparseMatrix(tsm));
365  VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
366
367  // Compute symbolic factorization if not available.
368  if (cxsparse_factor_ == NULL) {
369    cxsparse_factor_ = CHECK_NOTNULL(cxsparse_.AnalyzeCholesky(lhs));
370  }
371
372  // Solve the linear system.
373  bool ok = cxsparse_.SolveCholesky(lhs, cxsparse_factor_, solution);
374
375  cxsparse_.Free(lhs);
376  return ok;
377}
378#else
379bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
380    double* solution) {
381  LOG(FATAL) << "No CXSparse support in Ceres.";
382  return false;
383}
384#endif  // CERES_NO_CXPARSE
385
386}  // namespace internal
387}  // namespace ceres
388