10ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Ceres Solver - A fast non-linear least squares minimizer
20ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
30ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// http://code.google.com/p/ceres-solver/
40ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
50ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Redistribution and use in source and binary forms, with or without
60ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// modification, are permitted provided that the following conditions are met:
70ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
80ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions of source code must retain the above copyright notice,
90ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   this list of conditions and the following disclaimer.
100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Redistributions in binary form must reproduce the above copyright notice,
110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   this list of conditions and the following disclaimer in the documentation
120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   and/or other materials provided with the distribution.
130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// * Neither the name of Google Inc. nor the names of its contributors may be
140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   used to endorse or promote products derived from this software without
150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   specific prior written permission.
160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// POSSIBILITY OF SUCH DAMAGE.
280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Author: keir@google.com (Keir Mierle)
300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// The ProgramEvaluator runs the cost functions contained in each residual block
320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// and stores the result into a jacobian. The particular type of jacobian is
330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// abstracted out using two template parameters:
340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   - An "EvaluatePreparer" that is responsible for creating the array with
360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     pointers to the jacobian blocks where the cost function evaluates to.
370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   - A "JacobianWriter" that is responsible for storing the resulting
380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     jacobian blocks in the passed sparse matrix.
390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// This abstraction affords an efficient evaluator implementation while still
410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// supporting writing to multiple sparse matrix formats. For example, when the
420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// ProgramEvaluator is parameterized for writing to block sparse matrices, the
430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// residual jacobians are written directly into their final position in the
440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// block sparse matrix by the user's CostFunction; there is no copying.
450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// The evaluation is threaded with OpenMP.
470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// The EvaluatePreparer and JacobianWriter interfaces are as follows:
490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   class EvaluatePreparer {
510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // Prepare the jacobians array for use as the destination of a call to
520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // a cost function's evaluate method.
530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     void Prepare(const ResidualBlock* residual_block,
540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                  int residual_block_index,
550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                  SparseMatrix* jacobian,
560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                  double** jacobians);
570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   }
580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   class JacobianWriter {
600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // Create a jacobian that this writer can write. Same as
610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // Evaluator::CreateJacobian.
620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     SparseMatrix* CreateJacobian() const;
630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // Create num_threads evaluate preparers. Caller owns result which must
650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // be freed with delete[]. Resulting preparers are valid while *this is.
660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     EvaluatePreparer* CreateEvaluatePreparers(int num_threads);
670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // Write the block jacobians from a residual block evaluation to the
690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     // larger sparse jacobian.
700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//     void Write(int residual_id,
710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                int residual_offset,
720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                double** jacobians,
730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//                SparseMatrix* jacobian);
740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//   }
750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong//
760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Note: The ProgramEvaluator is not thread safe, since internally it maintains
770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// some per-thread scratch space.
780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifndef CERES_INTERNAL_PROGRAM_EVALUATOR_H_
800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#define CERES_INTERNAL_PROGRAM_EVALUATOR_H_
810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
8279397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez// This include must come before any #ifndef check on Ceres compile options.
8379397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez#include "ceres/internal/port.h"
8479397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez
850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifdef CERES_USE_OPENMP
860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include <omp.h>
870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif
880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
891d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include <map>
901d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include <string>
911d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include <vector>
921d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/execution_summary.h"
931d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/eigen.h"
941d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling#include "ceres/internal/scoped_ptr.h"
950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/parameter_block.h"
960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/program.h"
970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#include "ceres/residual_block.h"
98399f7d09e0c45af54b77b4ab9508d6f23759b927Scott Ettinger#include "ceres/small_blas.h"
990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace ceres {
1010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongnamespace internal {
1020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
10379397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandezstruct NullJacobianFinalizer {
10479397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez  void operator()(SparseMatrix* jacobian, int num_parameters) {}
10579397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez};
10679397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez
10779397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandeztemplate<typename EvaluatePreparer,
10879397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez         typename JacobianWriter,
10979397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez         typename JacobianFinalizer = NullJacobianFinalizer>
1100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kongclass ProgramEvaluator : public Evaluator {
1110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong public:
1120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  ProgramEvaluator(const Evaluator::Options &options, Program* program)
1130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      : options_(options),
1140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        program_(program),
1150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        jacobian_writer_(options, program),
1160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        evaluate_preparers_(
1170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            jacobian_writer_.CreateEvaluatePreparers(options.num_threads)) {
1180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifndef CERES_USE_OPENMP
1190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    CHECK_EQ(1, options_.num_threads)
1200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        << "OpenMP support is not compiled into this binary; "
1210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        << "only options.num_threads=1 is supported.";
1220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif
1230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    BuildResidualLayout(*program, &residual_layout_);
1250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    evaluate_scratch_.reset(CreateEvaluatorScratch(*program,
1260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                                   options.num_threads));
1270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Implementation of Evaluator interface.
1300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  SparseMatrix* CreateJacobian() const {
1310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return jacobian_writer_.CreateJacobian();
1320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
1330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1341d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  bool Evaluate(const Evaluator::EvaluateOptions& evaluate_options,
1351d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                const double* state,
1360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                double* cost,
1370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                double* residuals,
1380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                double* gradient,
1390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                SparseMatrix* jacobian) {
1401d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    ScopedExecutionTimer total_timer("Evaluator::Total", &execution_summary_);
1411d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    ScopedExecutionTimer call_type_timer(gradient == NULL && jacobian == NULL
1421d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                                         ? "Evaluator::Residual"
1431d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                                         : "Evaluator::Jacobian",
1441d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                                         &execution_summary_);
1451d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
1460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // The parameters are stateful, so set the state before evaluating.
1470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (!program_->StateVectorToParameterBlocks(state)) {
1480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      return false;
1490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (residuals != NULL) {
1520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      VectorRef(residuals, program_->NumResiduals()).setZero();
1530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (jacobian != NULL) {
1560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      jacobian->SetZero();
1570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Each thread gets it's own cost and evaluate scratch space.
1600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int i = 0; i < options_.num_threads; ++i) {
1610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      evaluate_scratch_[i].cost = 0.0;
1621d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      if (gradient != NULL) {
1631d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling        VectorRef(evaluate_scratch_[i].gradient.get(),
1641d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling                  program_->NumEffectiveParameters()).setZero();
1651d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling      }
1660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
1670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // This bool is used to disable the loop if an error is encountered
1690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // without breaking out of it. The remaining loop iterations are still run,
1700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // but with an empty body, and so will finish quickly.
1710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    bool abort = false;
1720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int num_residual_blocks = program_->NumResidualBlocks();
1730ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp parallel for num_threads(options_.num_threads)
1740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int i = 0; i < num_residual_blocks; ++i) {
1750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// Disable the loop instead of breaking, as required by OpenMP.
1760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp flush(abort)
1770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (abort) {
1780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        continue;
1790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
1800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#ifdef CERES_USE_OPENMP
1820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int thread_id = omp_get_thread_num();
1830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#else
1840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      int thread_id = 0;
1850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif
1860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      EvaluatePreparer* preparer = &evaluate_preparers_[thread_id];
1870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      EvaluateScratch* scratch = &evaluate_scratch_[thread_id];
1880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Prepare block residuals if requested.
1900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const ResidualBlock* residual_block = program_->residual_blocks()[i];
1910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      double* block_residuals = NULL;
1920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (residuals != NULL) {
1930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        block_residuals = residuals + residual_layout_[i];
1940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      } else if (gradient != NULL) {
1950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        block_residuals = scratch->residual_block_residuals.get();
1960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
1970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
1980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Prepare block jacobians if requested.
1990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      double** block_jacobians = NULL;
2000ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (jacobian != NULL || gradient != NULL) {
2010ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        preparer->Prepare(residual_block,
2020ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                          i,
2030ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                          jacobian,
2040ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                          scratch->jacobian_block_ptrs.get());
2050ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        block_jacobians = scratch->jacobian_block_ptrs.get();
2060ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2070ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Evaluate the cost, residuals, and jacobians.
2090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      double block_cost;
2100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (!residual_block->Evaluate(
2111d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              evaluate_options.apply_loss_function,
2120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              &block_cost,
2130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              block_residuals,
2140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              block_jacobians,
2150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              scratch->residual_block_evaluate_scratch.get())) {
2160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        abort = true;
2170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// This ensures that the OpenMP threads have a consistent view of 'abort'. Do
2180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// the flush inside the failure case so that there is usually only one
2190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong// synchronization point per loop iteration instead of two.
2200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#pragma omp flush(abort)
2210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        continue;
2220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      scratch->cost += block_cost;
2250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Store the jacobians, if they were requested.
2270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (jacobian != NULL) {
2280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        jacobian_writer_.Write(i,
2290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               residual_layout_[i],
2300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               block_jacobians,
2310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               jacobian);
2320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Compute and store the gradient, if it was requested.
2350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (gradient != NULL) {
2360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        int num_residuals = residual_block->NumResiduals();
2370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        int num_parameter_blocks = residual_block->NumParameterBlocks();
2380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        for (int j = 0; j < num_parameter_blocks; ++j) {
2390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          const ParameterBlock* parameter_block =
2400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              residual_block->parameter_blocks()[j];
2410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          if (parameter_block->IsConstant()) {
2420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            continue;
2430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          }
2441d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
2451d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling          MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
2461d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              block_jacobians[j],
2471d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              num_residuals,
2481d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              parameter_block->LocalSize(),
2491d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              block_residuals,
2501d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling              scratch->gradient.get() + parameter_block->delta_offset());
2510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        }
2520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
2540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    if (!abort) {
25679397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      const int num_parameters = program_->NumEffectiveParameters();
25779397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez
2580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      // Sum the cost and gradient (if requested) from each thread.
2590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      (*cost) = 0.0;
2600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      if (gradient != NULL) {
2610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        VectorRef(gradient, num_parameters).setZero();
2620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
2630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      for (int i = 0; i < options_.num_threads; ++i) {
2640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        (*cost) += evaluate_scratch_[i].cost;
2650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        if (gradient != NULL) {
2660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          VectorRef(gradient, num_parameters) +=
2670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              VectorRef(evaluate_scratch_[i].gradient.get(), num_parameters);
2680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        }
2690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      }
27079397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez
27179397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      // Finalize the Jacobian if it is available.
27279397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      // `num_parameters` is passed to the finalizer so that additional
27379397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      // storage can be reserved for additional diagonal elements if
27479397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      // necessary.
27579397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      if (jacobian != NULL) {
27679397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez        JacobianFinalizer f;
27779397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez        f(jacobian, num_parameters);
27879397c21138f54fcff6ec067b44b847f1f7e0e98Carlos Hernandez      }
2790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
2800ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return !abort;
2810ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2820ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2830ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  bool Plus(const double* state,
2840ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            const double* delta,
2850ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong            double* state_plus_delta) const {
2860ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return program_->Plus(state, delta, state_plus_delta);
2870ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2880ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2890ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int NumParameters() const {
2900ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return program_->NumParameters();
2910ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2920ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int NumEffectiveParameters() const {
2930ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return program_->NumEffectiveParameters();
2940ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2950ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
2960ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  int NumResiduals() const {
2970ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return program_->NumResiduals();
2980ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
2990ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3001d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  virtual map<string, int> CallStatistics() const {
3011d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    return execution_summary_.calls();
3021d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  }
3031d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3041d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  virtual map<string, double> TimeStatistics() const {
3051d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling    return execution_summary_.times();
3061d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  }
3071d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling
3080ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong private:
3090ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Per-thread scratch space needed to evaluate and store each residual block.
3100ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  struct EvaluateScratch {
3110ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    void Init(int max_parameters_per_residual_block,
3120ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              int max_scratch_doubles_needed_for_evaluate,
3130ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              int max_residuals_per_residual_block,
3140ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong              int num_parameters) {
3150ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      residual_block_evaluate_scratch.reset(
3160ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          new double[max_scratch_doubles_needed_for_evaluate]);
3170ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      gradient.reset(new double[num_parameters]);
3180ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      VectorRef(gradient.get(), num_parameters).setZero();
3190ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      residual_block_residuals.reset(
3200ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          new double[max_residuals_per_residual_block]);
3210ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      jacobian_block_ptrs.reset(
3220ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong          new double*[max_parameters_per_residual_block]);
3230ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
3240ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3250ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    double cost;
3260ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    scoped_array<double> residual_block_evaluate_scratch;
3270ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // The gradient in the local parameterization.
3280ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    scoped_array<double> gradient;
3290ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    // Enough space to store the residual for the largest residual block.
3300ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    scoped_array<double> residual_block_residuals;
3310ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    scoped_array<double*> jacobian_block_ptrs;
3320ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  };
3330ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3340ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  static void BuildResidualLayout(const Program& program,
3350ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                  vector<int>* residual_layout) {
3360ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
3370ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    residual_layout->resize(program.NumResidualBlocks());
3380ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int residual_pos = 0;
3390ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int i = 0; i < residual_blocks.size(); ++i) {
3400ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      const int num_residuals = residual_blocks[i]->NumResiduals();
3410ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      (*residual_layout)[i] = residual_pos;
3420ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      residual_pos += num_residuals;
3430ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
3440ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
3450ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3460ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  // Create scratch space for each thread evaluating the program.
3470ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  static EvaluateScratch* CreateEvaluatorScratch(const Program& program,
3480ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                                                 int num_threads) {
3490ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int max_parameters_per_residual_block =
3500ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        program.MaxParametersPerResidualBlock();
3510ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int max_scratch_doubles_needed_for_evaluate =
3520ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        program.MaxScratchDoublesNeededForEvaluate();
3530ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int max_residuals_per_residual_block =
3540ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong        program.MaxResidualsPerResidualBlock();
3550ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    int num_parameters = program.NumEffectiveParameters();
3560ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3570ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    EvaluateScratch* evaluate_scratch = new EvaluateScratch[num_threads];
3580ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    for (int i = 0; i < num_threads; i++) {
3590ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong      evaluate_scratch[i].Init(max_parameters_per_residual_block,
3600ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               max_scratch_doubles_needed_for_evaluate,
3610ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               max_residuals_per_residual_block,
3620ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong                               num_parameters);
3630ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    }
3640ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong    return evaluate_scratch;
3650ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  }
3660ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3670ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  Evaluator::Options options_;
3680ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  Program* program_;
3690ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  JacobianWriter jacobian_writer_;
3700ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  scoped_array<EvaluatePreparer> evaluate_preparers_;
3710ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  scoped_array<EvaluateScratch> evaluate_scratch_;
3720ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong  vector<int> residual_layout_;
3731d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling  ::ceres::internal::ExecutionSummary execution_summary_;
3740ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong};
3750ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3760ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}  // namespace internal
3770ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong}  // namespace ceres
3780ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong
3790ae28bd5885b5daa526898fcf7c323dc2c3e1963Angus Kong#endif  // CERES_INTERNAL_PROGRAM_EVALUATOR_H_
380