1// Ceres Solver - A fast non-linear least squares minimizer 2// Copyright 2013 Google Inc. All rights reserved. 3// http://code.google.com/p/ceres-solver/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are met: 7// 8// * Redistributions of source code must retain the above copyright notice, 9// this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above copyright notice, 11// this list of conditions and the following disclaimer in the documentation 12// and/or other materials provided with the distribution. 13// * Neither the name of Google Inc. nor the names of its contributors may be 14// used to endorse or promote products derived from this software without 15// specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27// POSSIBILITY OF SUCH DAMAGE. 28// 29// Author: sameeragarwal@google.com (Sameer Agarwal) 30// mierle@gmail.com (Keir Mierle) 31// 32// This autodiff implementation differs from the one found in 33// autodiff_cost_function.h by supporting autodiff on cost functions 34// with variable numbers of parameters with variable sizes. With the 35// other implementation, all the sizes (both the number of parameter 36// blocks and the size of each block) must be fixed at compile time. 37// 38// The functor API differs slightly from the API for fixed size 39// autodiff; the expected interface for the cost functors is: 40// 41// struct MyCostFunctor { 42// template<typename T> 43// bool operator()(T const* const* parameters, T* residuals) const { 44// // Use parameters[i] to access the i'th parameter block. 45// } 46// } 47// 48// Since the sizing of the parameters is done at runtime, you must 49// also specify the sizes after creating the dynamic autodiff cost 50// function. For example: 51// 52// DynamicAutoDiffCostFunction<MyCostFunctor, 3> cost_function( 53// new MyCostFunctor()); 54// cost_function.AddParameterBlock(5); 55// cost_function.AddParameterBlock(10); 56// cost_function.SetNumResiduals(21); 57// 58// Under the hood, the implementation evaluates the cost function 59// multiple times, computing a small set of the derivatives (four by 60// default, controlled by the Stride template parameter) with each 61// pass. There is a tradeoff with the size of the passes; you may want 62// to experiment with the stride. 63 64#ifndef CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 65#define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 66 67#include <cmath> 68#include <numeric> 69#include <vector> 70 71#include "ceres/cost_function.h" 72#include "ceres/internal/scoped_ptr.h" 73#include "ceres/jet.h" 74#include "glog/logging.h" 75 76namespace ceres { 77 78template <typename CostFunctor, int Stride = 4> 79class DynamicAutoDiffCostFunction : public CostFunction { 80 public: 81 explicit DynamicAutoDiffCostFunction(CostFunctor* functor) 82 : functor_(functor) {} 83 84 virtual ~DynamicAutoDiffCostFunction() {} 85 86 void AddParameterBlock(int size) { 87 mutable_parameter_block_sizes()->push_back(size); 88 } 89 90 void SetNumResiduals(int num_residuals) { 91 set_num_residuals(num_residuals); 92 } 93 94 virtual bool Evaluate(double const* const* parameters, 95 double* residuals, 96 double** jacobians) const { 97 CHECK_GT(num_residuals(), 0) 98 << "You must call DynamicAutoDiffCostFunction::SetNumResiduals() " 99 << "before DynamicAutoDiffCostFunction::Evaluate()."; 100 101 if (jacobians == NULL) { 102 return (*functor_)(parameters, residuals); 103 } 104 105 // The difficulty with Jets, as implemented in Ceres, is that they were 106 // originally designed for strictly compile-sized use. At this point, there 107 // is a large body of code that assumes inside a cost functor it is 108 // acceptable to do e.g. T(1.5) and get an appropriately sized jet back. 109 // 110 // Unfortunately, it is impossible to communicate the expected size of a 111 // dynamically sized jet to the static instantiations that existing code 112 // depends on. 113 // 114 // To work around this issue, the solution here is to evaluate the 115 // jacobians in a series of passes, each one computing Stripe * 116 // num_residuals() derivatives. This is done with small, fixed-size jets. 117 const int num_parameter_blocks = parameter_block_sizes().size(); 118 const int num_parameters = std::accumulate(parameter_block_sizes().begin(), 119 parameter_block_sizes().end(), 120 0); 121 122 // Allocate scratch space for the strided evaluation. 123 vector<Jet<double, Stride> > input_jets(num_parameters); 124 vector<Jet<double, Stride> > output_jets(num_residuals()); 125 126 // Make the parameter pack that is sent to the functor (reused). 127 vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, 128 static_cast<Jet<double, Stride>* >(NULL)); 129 int num_active_parameters = 0; 130 131 // To handle constant parameters between non-constant parameter blocks, the 132 // start position --- a raw parameter index --- of each contiguous block of 133 // non-constant parameters is recorded in start_derivative_section. 134 vector<int> start_derivative_section; 135 bool in_derivative_section = false; 136 int parameter_cursor = 0; 137 138 // Discover the derivative sections and set the parameter values. 139 for (int i = 0; i < num_parameter_blocks; ++i) { 140 jet_parameters[i] = &input_jets[parameter_cursor]; 141 142 const int parameter_block_size = parameter_block_sizes()[i]; 143 if (jacobians[i] != NULL) { 144 if (!in_derivative_section) { 145 start_derivative_section.push_back(parameter_cursor); 146 in_derivative_section = true; 147 } 148 149 num_active_parameters += parameter_block_size; 150 } else { 151 in_derivative_section = false; 152 } 153 154 for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) { 155 input_jets[parameter_cursor].a = parameters[i][j]; 156 } 157 } 158 159 // When `num_active_parameters % Stride != 0` then it can be the case 160 // that `active_parameter_count < Stride` while parameter_cursor is less 161 // than the total number of parameters and with no remaining non-constant 162 // parameter blocks. Pushing parameter_cursor (the total number of 163 // parameters) as a final entry to start_derivative_section is required 164 // because if a constant parameter block is encountered after the 165 // last non-constant block then current_derivative_section is incremented 166 // and would otherwise index an invalid position in 167 // start_derivative_section. Setting the final element to the total number 168 // of parameters means that this can only happen at most once in the loop 169 // below. 170 start_derivative_section.push_back(parameter_cursor); 171 172 // Evaluate all of the strides. Each stride is a chunk of the derivative to 173 // evaluate, typically some size proportional to the size of the SIMD 174 // registers of the CPU. 175 int num_strides = static_cast<int>(ceil(num_active_parameters / 176 static_cast<float>(Stride))); 177 178 int current_derivative_section = 0; 179 int current_derivative_section_cursor = 0; 180 181 for (int pass = 0; pass < num_strides; ++pass) { 182 // Set most of the jet components to zero, except for 183 // non-constant #Stride parameters. 184 const int initial_derivative_section = current_derivative_section; 185 const int initial_derivative_section_cursor = 186 current_derivative_section_cursor; 187 188 int active_parameter_count = 0; 189 parameter_cursor = 0; 190 191 for (int i = 0; i < num_parameter_blocks; ++i) { 192 for (int j = 0; j < parameter_block_sizes()[i]; 193 ++j, parameter_cursor++) { 194 input_jets[parameter_cursor].v.setZero(); 195 if (active_parameter_count < Stride && 196 parameter_cursor >= ( 197 start_derivative_section[current_derivative_section] + 198 current_derivative_section_cursor)) { 199 if (jacobians[i] != NULL) { 200 input_jets[parameter_cursor].v[active_parameter_count] = 1.0; 201 ++active_parameter_count; 202 ++current_derivative_section_cursor; 203 } else { 204 ++current_derivative_section; 205 current_derivative_section_cursor = 0; 206 } 207 } 208 } 209 } 210 211 if (!(*functor_)(&jet_parameters[0], &output_jets[0])) { 212 return false; 213 } 214 215 // Copy the pieces of the jacobians into their final place. 216 active_parameter_count = 0; 217 218 current_derivative_section = initial_derivative_section; 219 current_derivative_section_cursor = initial_derivative_section_cursor; 220 221 for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) { 222 for (int j = 0; j < parameter_block_sizes()[i]; 223 ++j, parameter_cursor++) { 224 if (active_parameter_count < Stride && 225 parameter_cursor >= ( 226 start_derivative_section[current_derivative_section] + 227 current_derivative_section_cursor)) { 228 if (jacobians[i] != NULL) { 229 for (int k = 0; k < num_residuals(); ++k) { 230 jacobians[i][k * parameter_block_sizes()[i] + j] = 231 output_jets[k].v[active_parameter_count]; 232 } 233 ++active_parameter_count; 234 ++current_derivative_section_cursor; 235 } else { 236 ++current_derivative_section; 237 current_derivative_section_cursor = 0; 238 } 239 } 240 } 241 } 242 243 // Only copy the residuals over once (even though we compute them on 244 // every loop). 245 if (pass == num_strides - 1) { 246 for (int k = 0; k < num_residuals(); ++k) { 247 residuals[k] = output_jets[k].a; 248 } 249 } 250 } 251 return true; 252 } 253 254 private: 255 internal::scoped_ptr<CostFunctor> functor_; 256}; 257 258} // namespace ceres 259 260#endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 261