1/////////////////////////////////////////////////////////////////////////// 2// 3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas 4// Digital Ltd. LLC 5// 6// All rights reserved. 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// * Redistributions of source code must retain the above copyright 12// notice, this list of conditions and the following disclaimer. 13// * Redistributions in binary form must reproduce the above 14// copyright notice, this list of conditions and the following disclaimer 15// in the documentation and/or other materials provided with the 16// distribution. 17// * Neither the name of Industrial Light & Magic nor the names of 18// its contributors may be used to endorse or promote products derived 19// from this software without specific prior written permission. 20// 21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32// 33/////////////////////////////////////////////////////////////////////////// 34 35 36 37 38 39//---------------------------------------------------------------------------- 40// 41// Implementation of non-template items declared in ImathMatrixAlgo.h 42// 43//---------------------------------------------------------------------------- 44 45#include "ImathMatrixAlgo.h" 46#include <cmath> 47#include <algorithm> // for std::max() 48 49#if defined(OPENEXR_DLL) 50 #define EXPORT_CONST __declspec(dllexport) 51#else 52 #define EXPORT_CONST const 53#endif 54 55namespace Imath { 56 57EXPORT_CONST M33f identity33f ( 1, 0, 0, 58 0, 1, 0, 59 0, 0, 1); 60 61EXPORT_CONST M33d identity33d ( 1, 0, 0, 62 0, 1, 0, 63 0, 0, 1); 64 65EXPORT_CONST M44f identity44f ( 1, 0, 0, 0, 66 0, 1, 0, 0, 67 0, 0, 1, 0, 68 0, 0, 0, 1); 69 70EXPORT_CONST M44d identity44d ( 1, 0, 0, 0, 71 0, 1, 0, 0, 72 0, 0, 1, 0, 73 0, 0, 0, 1); 74 75namespace 76{ 77 78class KahanSum 79{ 80public: 81 KahanSum() : _total(0), _correction(0) {} 82 83 void 84 operator+= (const double val) 85 { 86 const double y = val - _correction; 87 const double t = _total + y; 88 _correction = (t - _total) - y; 89 _total = t; 90 } 91 92 double get() const 93 { 94 return _total; 95 } 96 97private: 98 double _total; 99 double _correction; 100}; 101 102} 103 104template <typename T> 105M44d 106procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const T* weights, const size_t numPoints, const bool doScale) 107{ 108 if (numPoints == 0) 109 return M44d(); 110 111 // Always do the accumulation in double precision: 112 V3d Acenter (0.0); 113 V3d Bcenter (0.0); 114 double weightsSum = 0.0; 115 116 if (weights == 0) 117 { 118 for (int i = 0; i < numPoints; ++i) 119 { 120 Acenter += (V3d) A[i]; 121 Bcenter += (V3d) B[i]; 122 } 123 weightsSum = (double) numPoints; 124 } 125 else 126 { 127 for (int i = 0; i < numPoints; ++i) 128 { 129 const double w = weights[i]; 130 weightsSum += w; 131 132 Acenter += w * (V3d) A[i]; 133 Bcenter += w * (V3d) B[i]; 134 } 135 } 136 137 if (weightsSum == 0) 138 return M44d(); 139 140 Acenter /= weightsSum; 141 Bcenter /= weightsSum; 142 143 // 144 // Find Q such that |Q*A - B| (actually A-Acenter and B-Bcenter, weighted) 145 // is minimized in the least squares sense. 146 // From Golub/Van Loan, p.601 147 // 148 // A,B are 3xn 149 // Let C = B A^T (where A is 3xn and B^T is nx3, so C is 3x3) 150 // Compute the SVD: C = U D V^T (U,V rotations, D diagonal). 151 // Throw away the D part, and return Q = U V^T 152 M33d C (0.0); 153 if (weights == 0) 154 { 155 for (int i = 0; i < numPoints; ++i) 156 C += outerProduct ((V3d) B[i] - Bcenter, (V3d) A[i] - Acenter); 157 } 158 else 159 { 160 for (int i = 0; i < numPoints; ++i) 161 { 162 const double w = weights[i]; 163 C += outerProduct (w * ((V3d) B[i] - Bcenter), (V3d) A[i] - Acenter); 164 } 165 } 166 167 M33d U, V; 168 V3d S; 169 jacobiSVD (C, U, S, V, Imath::limits<double>::epsilon(), true); 170 171 // We want Q.transposed() here since we are going to be using it in the 172 // Imath style (multiplying vectors on the right, v' = v*A^T): 173 const M33d Qt = V * U.transposed(); 174 175 double s = 1.0; 176 if (doScale && numPoints > 1) 177 { 178 // Finding a uniform scale: let us assume the Q is completely fixed 179 // at this point (solving for both simultaneously seems much harder). 180 // We are trying to compute (again, per Golub and van Loan) 181 // min || s*A*Q - B ||_F 182 // Notice that we've jammed a uniform scale in front of the Q. 183 // Now, the Frobenius norm (the least squares norm over matrices) 184 // has the neat property that it is equivalent to minimizing the trace 185 // of M^T*M (see your friendly neighborhood linear algebra text for a 186 // derivation). Thus, we can expand this out as 187 // min tr (s*A*Q - B)^T*(s*A*Q - B) 188 // = min tr(Q^T*A^T*s*s*A*Q) + tr(B^T*B) - 2*tr(Q^T*A^T*s*B) by linearity of the trace 189 // = min s^2 tr(A^T*A) + tr(B^T*B) - 2*s*tr(Q^T*A^T*B) using the fact that the trace is invariant 190 // under similarity transforms Q*M*Q^T 191 // If we differentiate w.r.t. s and set this to 0, we get 192 // 0 = 2*s*tr(A^T*A) - 2*tr(Q^T*A^T*B) 193 // so 194 // 2*s*tr(A^T*A) = 2*s*tr(Q^T*A^T*B) 195 // s = tr(Q^T*A^T*B) / tr(A^T*A) 196 197 KahanSum traceATA; 198 if (weights == 0) 199 { 200 for (int i = 0; i < numPoints; ++i) 201 traceATA += ((V3d) A[i] - Acenter).length2(); 202 } 203 else 204 { 205 for (int i = 0; i < numPoints; ++i) 206 traceATA += ((double) weights[i]) * ((V3d) A[i] - Acenter).length2(); 207 } 208 209 KahanSum traceBATQ; 210 for (int i = 0; i < 3; ++i) 211 for (int j = 0; j < 3; ++j) 212 traceBATQ += Qt[j][i] * C[i][j]; 213 214 s = traceBATQ.get() / traceATA.get(); 215 } 216 217 // Q is the rotation part of what we want to return. 218 // The entire transform is: 219 // (translate origin to Bcenter) * Q * (translate Acenter to origin) 220 // last first 221 // The effect of this on a point is: 222 // (translate origin to Bcenter) * Q * (translate Acenter to origin) * point 223 // = (translate origin to Bcenter) * Q * (-Acenter + point) 224 // = (translate origin to Bcenter) * (-Q*Acenter + Q*point) 225 // = (translate origin to Bcenter) * (translate Q*Acenter to origin) * Q*point 226 // = (translate Q*Acenter to Bcenter) * Q*point 227 // So what we want to return is: 228 // (translate Q*Acenter to Bcenter) * Q 229 // 230 // In block form, this is: 231 // [ 1 0 0 | ] [ 0 ] [ 1 0 0 | ] [ 1 0 0 | ] [ | ] [ ] 232 // [ 0 1 0 tb ] [ s*Q 0 ] [ 0 1 0 -ta ] = [ 0 1 0 tb ] [ s*Q -s*Q*ta ] = [ Q tb-s*Q*ta ] 233 // [ 0 0 1 | ] [ 0 ] [ 0 0 1 | ] [ 0 0 1 | ] [ | ] [ ] 234 // [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] 235 // (ofc the whole thing is transposed for Imath). 236 const V3d translate = Bcenter - s*Acenter*Qt; 237 238 return M44d (s*Qt.x[0][0], s*Qt.x[0][1], s*Qt.x[0][2], T(0), 239 s*Qt.x[1][0], s*Qt.x[1][1], s*Qt.x[1][2], T(0), 240 s*Qt.x[2][0], s*Qt.x[2][1], s*Qt.x[2][2], T(0), 241 translate.x, translate.y, translate.z, T(1)); 242} // procrustesRotationAndTranslation 243 244template <typename T> 245M44d 246procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const size_t numPoints, const bool doScale) 247{ 248 return procrustesRotationAndTranslation (A, B, (const T*) 0, numPoints, doScale); 249} // procrustesRotationAndTranslation 250 251 252template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const size_t numPoints, const bool doScale); 253template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const size_t numPoints, const bool doScale); 254template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const double* weights, const size_t numPoints, const bool doScale); 255template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const float* weights, const size_t numPoints, const bool doScale); 256 257 258namespace 259{ 260 261// Applies the 2x2 Jacobi rotation 262// [ c s 0 ] [ 1 0 0 ] [ c 0 s ] 263// [ -s c 0 ] or [ 0 c s ] or [ 0 1 0 ] 264// [ 0 0 1 ] [ 0 -s c ] [ -s 0 c ] 265// from the right; that is, computes 266// J * A 267// for the Jacobi rotation J and the matrix A. This is efficient because we 268// only need to touch exactly the 2 columns that are affected, so we never 269// need to explicitly construct the J matrix. 270template <typename T, int j, int k> 271void 272jacobiRotateRight (Imath::Matrix33<T>& A, 273 const T c, 274 const T s) 275{ 276 for (int i = 0; i < 3; ++i) 277 { 278 const T tau1 = A[i][j]; 279 const T tau2 = A[i][k]; 280 A[i][j] = c * tau1 - s * tau2; 281 A[i][k] = s * tau1 + c * tau2; 282 } 283} 284 285template <typename T> 286void 287jacobiRotateRight (Imath::Matrix44<T>& A, 288 const int j, 289 const int k, 290 const T c, 291 const T s) 292{ 293 for (int i = 0; i < 4; ++i) 294 { 295 const T tau1 = A[i][j]; 296 const T tau2 = A[i][k]; 297 A[i][j] = c * tau1 - s * tau2; 298 A[i][k] = s * tau1 + c * tau2; 299 } 300} 301 302// This routine solves the 2x2 SVD: 303// [ c1 s1 ] [ w x ] [ c2 s2 ] [ d1 0 ] 304// [ ] [ ] [ ] = [ ] 305// [ -s1 c1 ] [ y z ] [ -s2 c2 ] [ 0 d2 ] 306// where 307// [ w x ] 308// A = [ ] 309// [ y z ] 310// is the subset of A consisting of the [j,k] entries, A([j k], [j k]) in 311// Matlab parlance. The method is the 'USVD' algorithm described in the 312// following paper: 313// 'Computation of the Singular Value Decomposition using Mesh-Connected Processors' 314// by Richard P. Brent, Franklin T. Luk, and Charles Van Loan 315// It breaks the computation into two steps: the first symmetrizes the matrix, 316// and the second diagonalizes the symmetric matrix. 317template <typename T, int j, int k, int l> 318bool 319twoSidedJacobiRotation (Imath::Matrix33<T>& A, 320 Imath::Matrix33<T>& U, 321 Imath::Matrix33<T>& V, 322 const T tol) 323{ 324 // Load everything into local variables to make things easier on the 325 // optimizer: 326 const T w = A[j][j]; 327 const T x = A[j][k]; 328 const T y = A[k][j]; 329 const T z = A[k][k]; 330 331 // We will keep track of whether we're actually performing any rotations, 332 // since if the matrix is already diagonal we'll end up with the identity 333 // as our Jacobi rotation and we can short-circuit. 334 bool changed = false; 335 336 // The first step is to symmetrize the 2x2 matrix, 337 // [ c s ]^T [ w x ] = [ p q ] 338 // [ -s c ] [ y z ] [ q r ] 339 T mu_1 = w + z; 340 T mu_2 = x - y; 341 342 T c, s; 343 if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance) 344 { // Note that the <= is important here 345 c = T(1); // because we want to bypass the computation 346 s = T(0); // of rho if mu_1 = mu_2 = 0. 347 348 const T p = w; 349 const T r = z; 350 mu_1 = r - p; 351 mu_2 = x + y; 352 } 353 else 354 { 355 const T rho = mu_1 / mu_2; 356 s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function? 357 if (rho < 0) 358 s = -s; 359 c = s * rho; 360 361 mu_1 = s * (x + y) + c * (z - w); // = r - p 362 mu_2 = T(2) * (c * x - s * z); // = 2*q 363 364 changed = true; 365 } 366 367 // The second stage diagonalizes, 368 // [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ] 369 // [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ] 370 T c_2, s_2; 371 if (std::abs(mu_2) <= tol*std::abs(mu_1)) 372 { 373 c_2 = T(1); 374 s_2 = T(0); 375 } 376 else 377 { 378 const T rho_2 = mu_1 / mu_2; 379 T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2)); 380 if (rho_2 < 0) 381 t_2 = -t_2; 382 c_2 = T(1) / std::sqrt (T(1) + t_2*t_2); 383 s_2 = c_2 * t_2; 384 385 changed = true; 386 } 387 388 const T c_1 = c_2 * c - s_2 * s; 389 const T s_1 = s_2 * c + c_2 * s; 390 391 if (!changed) 392 { 393 // We've decided that the off-diagonal entries are already small 394 // enough, so we'll set them to zero. This actually appears to result 395 // in smaller errors than leaving them be, possibly because it prevents 396 // us from trying to do extra rotations later that we don't need. 397 A[k][j] = 0; 398 A[j][k] = 0; 399 return false; 400 } 401 402 const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2); 403 const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2); 404 405 // For the entries we just zeroed out, we'll just set them to 0, since 406 // they should be 0 up to machine precision. 407 A[j][j] = d_1; 408 A[k][k] = d_2; 409 A[k][j] = 0; 410 A[j][k] = 0; 411 412 // Rotate the entries that _weren't_ involved in the 2x2 SVD: 413 { 414 // Rotate on the left by 415 // [ c1 s1 0 ]^T [ c1 0 s1 ]^T [ 1 0 0 ]^T 416 // [ -s1 c1 0 ] or [ 0 1 0 ] or [ 0 c1 s1 ] 417 // [ 0 0 1 ] [ -s1 0 c1 ] [ 0 -s1 c1 ] 418 // This has the effect of adding the (weighted) ith and jth _rows_ to 419 // each other. 420 const T tau1 = A[j][l]; 421 const T tau2 = A[k][l]; 422 A[j][l] = c_1 * tau1 - s_1 * tau2; 423 A[k][l] = s_1 * tau1 + c_1 * tau2; 424 } 425 426 { 427 // Rotate on the right by 428 // [ c2 s2 0 ] [ c2 0 s2 ] [ 1 0 0 ] 429 // [ -s2 c2 0 ] or [ 0 1 0 ] or [ 0 c2 s2 ] 430 // [ 0 0 1 ] [ -s2 0 c2 ] [ 0 -s2 c2 ] 431 // This has the effect of adding the (weighted) ith and jth _columns_ to 432 // each other. 433 const T tau1 = A[l][j]; 434 const T tau2 = A[l][k]; 435 A[l][j] = c_2 * tau1 - s_2 * tau2; 436 A[l][k] = s_2 * tau1 + c_2 * tau2; 437 } 438 439 // Now apply the rotations to U and V: 440 // Remember that we have 441 // R1^T * A * R2 = D 442 // This is in the 2x2 case, but after doing a bunch of these 443 // we will get something like this for the 3x3 case: 444 // ... R1b^T * R1a^T * A * R2a * R2b * ... = D 445 // ----------------- --------------- 446 // = U^T = V 447 // So, 448 // U = R1a * R1b * ... 449 // V = R2a * R2b * ... 450 jacobiRotateRight<T, j, k> (U, c_1, s_1); 451 jacobiRotateRight<T, j, k> (V, c_2, s_2); 452 453 return true; 454} 455 456template <typename T> 457bool 458twoSidedJacobiRotation (Imath::Matrix44<T>& A, 459 int j, 460 int k, 461 Imath::Matrix44<T>& U, 462 Imath::Matrix44<T>& V, 463 const T tol) 464{ 465 // Load everything into local variables to make things easier on the 466 // optimizer: 467 const T w = A[j][j]; 468 const T x = A[j][k]; 469 const T y = A[k][j]; 470 const T z = A[k][k]; 471 472 // We will keep track of whether we're actually performing any rotations, 473 // since if the matrix is already diagonal we'll end up with the identity 474 // as our Jacobi rotation and we can short-circuit. 475 bool changed = false; 476 477 // The first step is to symmetrize the 2x2 matrix, 478 // [ c s ]^T [ w x ] = [ p q ] 479 // [ -s c ] [ y z ] [ q r ] 480 T mu_1 = w + z; 481 T mu_2 = x - y; 482 483 T c, s; 484 if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance) 485 { // Note that the <= is important here 486 c = T(1); // because we want to bypass the computation 487 s = T(0); // of rho if mu_1 = mu_2 = 0. 488 489 const T p = w; 490 const T r = z; 491 mu_1 = r - p; 492 mu_2 = x + y; 493 } 494 else 495 { 496 const T rho = mu_1 / mu_2; 497 s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function? 498 if (rho < 0) 499 s = -s; 500 c = s * rho; 501 502 mu_1 = s * (x + y) + c * (z - w); // = r - p 503 mu_2 = T(2) * (c * x - s * z); // = 2*q 504 505 changed = true; 506 } 507 508 // The second stage diagonalizes, 509 // [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ] 510 // [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ] 511 T c_2, s_2; 512 if (std::abs(mu_2) <= tol*std::abs(mu_1)) 513 { 514 c_2 = T(1); 515 s_2 = T(0); 516 } 517 else 518 { 519 const T rho_2 = mu_1 / mu_2; 520 T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2)); 521 if (rho_2 < 0) 522 t_2 = -t_2; 523 c_2 = T(1) / std::sqrt (T(1) + t_2*t_2); 524 s_2 = c_2 * t_2; 525 526 changed = true; 527 } 528 529 const T c_1 = c_2 * c - s_2 * s; 530 const T s_1 = s_2 * c + c_2 * s; 531 532 if (!changed) 533 { 534 // We've decided that the off-diagonal entries are already small 535 // enough, so we'll set them to zero. This actually appears to result 536 // in smaller errors than leaving them be, possibly because it prevents 537 // us from trying to do extra rotations later that we don't need. 538 A[k][j] = 0; 539 A[j][k] = 0; 540 return false; 541 } 542 543 const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2); 544 const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2); 545 546 // For the entries we just zeroed out, we'll just set them to 0, since 547 // they should be 0 up to machine precision. 548 A[j][j] = d_1; 549 A[k][k] = d_2; 550 A[k][j] = 0; 551 A[j][k] = 0; 552 553 // Rotate the entries that _weren't_ involved in the 2x2 SVD: 554 for (int l = 0; l < 4; ++l) 555 { 556 if (l == j || l == k) 557 continue; 558 559 // Rotate on the left by 560 // [ 1 ] 561 // [ . ] 562 // [ c2 s2 ] j 563 // [ 1 ] 564 // [ -s2 c2 ] k 565 // [ . ] 566 // [ 1 ] 567 // j k 568 // 569 // This has the effect of adding the (weighted) ith and jth _rows_ to 570 // each other. 571 const T tau1 = A[j][l]; 572 const T tau2 = A[k][l]; 573 A[j][l] = c_1 * tau1 - s_1 * tau2; 574 A[k][l] = s_1 * tau1 + c_1 * tau2; 575 } 576 577 for (int l = 0; l < 4; ++l) 578 { 579 // We set the A[j/k][j/k] entries already 580 if (l == j || l == k) 581 continue; 582 583 // Rotate on the right by 584 // [ 1 ] 585 // [ . ] 586 // [ c2 s2 ] j 587 // [ 1 ] 588 // [ -s2 c2 ] k 589 // [ . ] 590 // [ 1 ] 591 // j k 592 // 593 // This has the effect of adding the (weighted) ith and jth _columns_ to 594 // each other. 595 const T tau1 = A[l][j]; 596 const T tau2 = A[l][k]; 597 A[l][j] = c_2 * tau1 - s_2 * tau2; 598 A[l][k] = s_2 * tau1 + c_2 * tau2; 599 } 600 601 // Now apply the rotations to U and V: 602 // Remember that we have 603 // R1^T * A * R2 = D 604 // This is in the 2x2 case, but after doing a bunch of these 605 // we will get something like this for the 3x3 case: 606 // ... R1b^T * R1a^T * A * R2a * R2b * ... = D 607 // ----------------- --------------- 608 // = U^T = V 609 // So, 610 // U = R1a * R1b * ... 611 // V = R2a * R2b * ... 612 jacobiRotateRight (U, j, k, c_1, s_1); 613 jacobiRotateRight (V, j, k, c_2, s_2); 614 615 return true; 616} 617 618template <typename T> 619void 620swapColumns (Imath::Matrix33<T>& A, int j, int k) 621{ 622 for (int i = 0; i < 3; ++i) 623 std::swap (A[i][j], A[i][k]); 624} 625 626template <typename T> 627T 628maxOffDiag (const Imath::Matrix33<T>& A) 629{ 630 T result = 0; 631 result = std::max (result, std::abs (A[0][1])); 632 result = std::max (result, std::abs (A[0][2])); 633 result = std::max (result, std::abs (A[1][0])); 634 result = std::max (result, std::abs (A[1][2])); 635 result = std::max (result, std::abs (A[2][0])); 636 result = std::max (result, std::abs (A[2][1])); 637 return result; 638} 639 640template <typename T> 641T 642maxOffDiag (const Imath::Matrix44<T>& A) 643{ 644 T result = 0; 645 for (int i = 0; i < 4; ++i) 646 { 647 for (int j = 0; j < 4; ++j) 648 { 649 if (i != j) 650 result = std::max (result, std::abs (A[i][j])); 651 } 652 } 653 654 return result; 655} 656 657template <typename T> 658void 659twoSidedJacobiSVD (Imath::Matrix33<T> A, 660 Imath::Matrix33<T>& U, 661 Imath::Vec3<T>& S, 662 Imath::Matrix33<T>& V, 663 const T tol, 664 const bool forcePositiveDeterminant) 665{ 666 // The two-sided Jacobi SVD works by repeatedly zeroing out 667 // off-diagonal entries of the matrix, 2 at a time. Basically, 668 // we can take our 3x3 matrix, 669 // [* * *] 670 // [* * *] 671 // [* * *] 672 // and use a pair of orthogonal transforms to zero out, say, the 673 // pair of entries (0, 1) and (1, 0): 674 // [ c1 s1 ] [* * *] [ c2 s2 ] [* *] 675 // [-s1 c1 ] [* * *] [-s2 c2 ] = [ * *] 676 // [ 1] [* * *] [ 1] [* * *] 677 // When we go to zero out the next pair of entries (say, (0, 2) and (2, 0)) 678 // then we don't expect those entries to stay 0: 679 // [ c1 s1 ] [* *] [ c2 s2 ] [* * ] 680 // [-s1 c1 ] [ * *] [-s2 c2 ] = [* * *] 681 // [ 1] [* * *] [ 1] [ * *] 682 // However, if we keep doing this, we'll find that the off-diagonal entries 683 // converge to 0 fairly quickly (convergence should be roughly cubic). The 684 // result is a diagonal A matrix and a bunch of orthogonal transforms: 685 // [* * *] [* ] 686 // L1 L2 ... Ln [* * *] Rn ... R2 R1 = [ * ] 687 // [* * *] [ *] 688 // ------------ ------- ------------ ------- 689 // U^T A V S 690 // This turns out to be highly accurate because (1) orthogonal transforms 691 // are extremely stable to compute and apply (this is why QR factorization 692 // works so well, FWIW) and because (2) by applying everything to the original 693 // matrix A instead of computing (A^T * A) we avoid any precision loss that 694 // would result from that. 695 U.makeIdentity(); 696 V.makeIdentity(); 697 698 const int maxIter = 20; // In case we get really unlucky, prevents infinite loops 699 const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum 700 if (absTol != 0) // _off-diagonal_ entry. 701 { 702 int numIter = 0; 703 do 704 { 705 ++numIter; 706 bool changed = twoSidedJacobiRotation<T, 0, 1, 2> (A, U, V, tol); 707 changed = twoSidedJacobiRotation<T, 0, 2, 1> (A, U, V, tol) || changed; 708 changed = twoSidedJacobiRotation<T, 1, 2, 0> (A, U, V, tol) || changed; 709 if (!changed) 710 break; 711 } while (maxOffDiag(A) > absTol && numIter < maxIter); 712 } 713 714 // The off-diagonal entries are (effectively) 0, so whatever's left on the 715 // diagonal are the singular values: 716 S.x = A[0][0]; 717 S.y = A[1][1]; 718 S.z = A[2][2]; 719 720 // Nothing thus far has guaranteed that the singular values are positive, 721 // so let's go back through and flip them if not (since by contract we are 722 // supposed to return all positive SVs): 723 for (int i = 0; i < 3; ++i) 724 { 725 if (S[i] < 0) 726 { 727 // If we flip S[i], we need to flip the corresponding column of U 728 // (we could also pick V if we wanted; it doesn't really matter): 729 S[i] = -S[i]; 730 for (int j = 0; j < 3; ++j) 731 U[j][i] = -U[j][i]; 732 } 733 } 734 735 // Order the singular values from largest to smallest; this requires 736 // exactly two passes through the data using bubble sort: 737 for (int i = 0; i < 2; ++i) 738 { 739 for (int j = 0; j < (2 - i); ++j) 740 { 741 // No absolute values necessary since we already ensured that 742 // they're positive: 743 if (S[j] < S[j+1]) 744 { 745 // If we swap singular values we also have to swap 746 // corresponding columns in U and V: 747 std::swap (S[j], S[j+1]); 748 swapColumns (U, j, j+1); 749 swapColumns (V, j, j+1); 750 } 751 } 752 } 753 754 if (forcePositiveDeterminant) 755 { 756 // We want to guarantee that the returned matrices always have positive 757 // determinant. We can do this by adding the appropriate number of 758 // matrices of the form: 759 // [ 1 ] 760 // L = [ 1 ] 761 // [ -1 ] 762 // Note that L' = L and L*L = Identity. Thus we can add: 763 // U*L*L*S*V = (U*L)*(L*S)*V 764 // if U has a negative determinant, and 765 // U*S*L*L*V = U*(S*L)*(L*V) 766 // if V has a neg. determinant. 767 if (U.determinant() < 0) 768 { 769 for (int i = 0; i < 3; ++i) 770 U[i][2] = -U[i][2]; 771 S.z = -S.z; 772 } 773 774 if (V.determinant() < 0) 775 { 776 for (int i = 0; i < 3; ++i) 777 V[i][2] = -V[i][2]; 778 S.z = -S.z; 779 } 780 } 781} 782 783template <typename T> 784void 785twoSidedJacobiSVD (Imath::Matrix44<T> A, 786 Imath::Matrix44<T>& U, 787 Imath::Vec4<T>& S, 788 Imath::Matrix44<T>& V, 789 const T tol, 790 const bool forcePositiveDeterminant) 791{ 792 // Please see the Matrix33 version for a detailed description of the algorithm. 793 U.makeIdentity(); 794 V.makeIdentity(); 795 796 const int maxIter = 20; // In case we get really unlucky, prevents infinite loops 797 const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum 798 if (absTol != 0) // _off-diagonal_ entry. 799 { 800 int numIter = 0; 801 do 802 { 803 ++numIter; 804 bool changed = twoSidedJacobiRotation (A, 0, 1, U, V, tol); 805 changed = twoSidedJacobiRotation (A, 0, 2, U, V, tol) || changed; 806 changed = twoSidedJacobiRotation (A, 0, 3, U, V, tol) || changed; 807 changed = twoSidedJacobiRotation (A, 1, 2, U, V, tol) || changed; 808 changed = twoSidedJacobiRotation (A, 1, 3, U, V, tol) || changed; 809 changed = twoSidedJacobiRotation (A, 2, 3, U, V, tol) || changed; 810 if (!changed) 811 break; 812 } while (maxOffDiag(A) > absTol && numIter < maxIter); 813 } 814 815 // The off-diagonal entries are (effectively) 0, so whatever's left on the 816 // diagonal are the singular values: 817 S[0] = A[0][0]; 818 S[1] = A[1][1]; 819 S[2] = A[2][2]; 820 S[3] = A[3][3]; 821 822 // Nothing thus far has guaranteed that the singular values are positive, 823 // so let's go back through and flip them if not (since by contract we are 824 // supposed to return all positive SVs): 825 for (int i = 0; i < 4; ++i) 826 { 827 if (S[i] < 0) 828 { 829 // If we flip S[i], we need to flip the corresponding column of U 830 // (we could also pick V if we wanted; it doesn't really matter): 831 S[i] = -S[i]; 832 for (int j = 0; j < 4; ++j) 833 U[j][i] = -U[j][i]; 834 } 835 } 836 837 // Order the singular values from largest to smallest using insertion sort: 838 for (int i = 1; i < 4; ++i) 839 { 840 const Imath::Vec4<T> uCol (U[0][i], U[1][i], U[2][i], U[3][i]); 841 const Imath::Vec4<T> vCol (V[0][i], V[1][i], V[2][i], V[3][i]); 842 const T sVal = S[i]; 843 844 int j = i - 1; 845 while (std::abs (S[j]) < std::abs (sVal)) 846 { 847 for (int k = 0; k < 4; ++k) 848 U[k][j+1] = U[k][j]; 849 for (int k = 0; k < 4; ++k) 850 V[k][j+1] = V[k][j]; 851 S[j+1] = S[j]; 852 853 --j; 854 if (j < 0) 855 break; 856 } 857 858 for (int k = 0; k < 4; ++k) 859 U[k][j+1] = uCol[k]; 860 for (int k = 0; k < 4; ++k) 861 V[k][j+1] = vCol[k]; 862 S[j+1] = sVal; 863 } 864 865 if (forcePositiveDeterminant) 866 { 867 // We want to guarantee that the returned matrices always have positive 868 // determinant. We can do this by adding the appropriate number of 869 // matrices of the form: 870 // [ 1 ] 871 // L = [ 1 ] 872 // [ 1 ] 873 // [ -1 ] 874 // Note that L' = L and L*L = Identity. Thus we can add: 875 // U*L*L*S*V = (U*L)*(L*S)*V 876 // if U has a negative determinant, and 877 // U*S*L*L*V = U*(S*L)*(L*V) 878 // if V has a neg. determinant. 879 if (U.determinant() < 0) 880 { 881 for (int i = 0; i < 4; ++i) 882 U[i][3] = -U[i][3]; 883 S[3] = -S[3]; 884 } 885 886 if (V.determinant() < 0) 887 { 888 for (int i = 0; i < 4; ++i) 889 V[i][3] = -V[i][3]; 890 S[3] = -S[3]; 891 } 892 } 893} 894 895} 896 897template <typename T> 898void 899jacobiSVD (const Imath::Matrix33<T>& A, 900 Imath::Matrix33<T>& U, 901 Imath::Vec3<T>& S, 902 Imath::Matrix33<T>& V, 903 const T tol, 904 const bool forcePositiveDeterminant) 905{ 906 twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant); 907} 908 909template <typename T> 910void 911jacobiSVD (const Imath::Matrix44<T>& A, 912 Imath::Matrix44<T>& U, 913 Imath::Vec4<T>& S, 914 Imath::Matrix44<T>& V, 915 const T tol, 916 const bool forcePositiveDeterminant) 917{ 918 twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant); 919} 920 921template void jacobiSVD (const Imath::Matrix33<float>& A, 922 Imath::Matrix33<float>& U, 923 Imath::Vec3<float>& S, 924 Imath::Matrix33<float>& V, 925 const float tol, 926 const bool forcePositiveDeterminant); 927template void jacobiSVD (const Imath::Matrix33<double>& A, 928 Imath::Matrix33<double>& U, 929 Imath::Vec3<double>& S, 930 Imath::Matrix33<double>& V, 931 const double tol, 932 const bool forcePositiveDeterminant); 933template void jacobiSVD (const Imath::Matrix44<float>& A, 934 Imath::Matrix44<float>& U, 935 Imath::Vec4<float>& S, 936 Imath::Matrix44<float>& V, 937 const float tol, 938 const bool forcePositiveDeterminant); 939template void jacobiSVD (const Imath::Matrix44<double>& A, 940 Imath::Matrix44<double>& U, 941 Imath::Vec4<double>& S, 942 Imath::Matrix44<double>& V, 943 const double tol, 944 const bool forcePositiveDeterminant); 945 946namespace 947{ 948 949template <int j, int k, typename TM> 950inline 951void 952jacobiRotateRight (TM& A, 953 const typename TM::BaseType s, 954 const typename TM::BaseType tau) 955{ 956 typedef typename TM::BaseType T; 957 958 for (unsigned int i = 0; i < TM::dimensions(); ++i) 959 { 960 const T nu1 = A[i][j]; 961 const T nu2 = A[i][k]; 962 A[i][j] -= s * (nu2 + tau * nu1); 963 A[i][k] += s * (nu1 - tau * nu2); 964 } 965} 966 967template <int j, int k, int l, typename T> 968bool 969jacobiRotation (Matrix33<T>& A, 970 Matrix33<T>& V, 971 Vec3<T>& Z, 972 const T tol) 973{ 974 // Load everything into local variables to make things easier on the 975 // optimizer: 976 const T x = A[j][j]; 977 const T y = A[j][k]; 978 const T z = A[k][k]; 979 980 // The first stage diagonalizes, 981 // [ c s ]^T [ x y ] [ c -s ] = [ d1 0 ] 982 // [ -s c ] [ y z ] [ s c ] [ 0 d2 ] 983 const T mu1 = z - x; 984 const T mu2 = 2 * y; 985 986 if (std::abs(mu2) <= tol*std::abs(mu1)) 987 { 988 // We've decided that the off-diagonal entries are already small 989 // enough, so we'll set them to zero. This actually appears to result 990 // in smaller errors than leaving them be, possibly because it prevents 991 // us from trying to do extra rotations later that we don't need. 992 A[j][k] = 0; 993 return false; 994 } 995 const T rho = mu1 / mu2; 996 const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho)); 997 const T c = T(1) / std::sqrt (T(1) + t*t); 998 const T s = t * c; 999 const T tau = s / (T(1) + c); 1000 const T h = t * y; 1001 1002 // Update diagonal elements. 1003 Z[j] -= h; 1004 Z[k] += h; 1005 A[j][j] -= h; 1006 A[k][k] += h; 1007 1008 // For the entries we just zeroed out, we'll just set them to 0, since 1009 // they should be 0 up to machine precision. 1010 A[j][k] = 0; 1011 1012 // We only update upper triagnular elements of A, since 1013 // A is supposed to be symmetric. 1014 T& offd1 = l < j ? A[l][j] : A[j][l]; 1015 T& offd2 = l < k ? A[l][k] : A[k][l]; 1016 const T nu1 = offd1; 1017 const T nu2 = offd2; 1018 offd1 = nu1 - s * (nu2 + tau * nu1); 1019 offd2 = nu2 + s * (nu1 - tau * nu2); 1020 1021 // Apply rotation to V 1022 jacobiRotateRight<j, k> (V, s, tau); 1023 1024 return true; 1025} 1026 1027template <int j, int k, int l1, int l2, typename T> 1028bool 1029jacobiRotation (Matrix44<T>& A, 1030 Matrix44<T>& V, 1031 Vec4<T>& Z, 1032 const T tol) 1033{ 1034 const T x = A[j][j]; 1035 const T y = A[j][k]; 1036 const T z = A[k][k]; 1037 1038 const T mu1 = z - x; 1039 const T mu2 = T(2) * y; 1040 1041 // Let's see if rho^(-1) = mu2 / mu1 is less than tol 1042 // This test also checks if rho^2 will overflow 1043 // when tol^(-1) < sqrt(limits<T>::max()). 1044 if (std::abs(mu2) <= tol*std::abs(mu1)) 1045 { 1046 A[j][k] = 0; 1047 return true; 1048 } 1049 1050 const T rho = mu1 / mu2; 1051 const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho)); 1052 const T c = T(1) / std::sqrt (T(1) + t*t); 1053 const T s = c * t; 1054 const T tau = s / (T(1) + c); 1055 const T h = t * y; 1056 1057 Z[j] -= h; 1058 Z[k] += h; 1059 A[j][j] -= h; 1060 A[k][k] += h; 1061 A[j][k] = 0; 1062 1063 { 1064 T& offd1 = l1 < j ? A[l1][j] : A[j][l1]; 1065 T& offd2 = l1 < k ? A[l1][k] : A[k][l1]; 1066 const T nu1 = offd1; 1067 const T nu2 = offd2; 1068 offd1 -= s * (nu2 + tau * nu1); 1069 offd2 += s * (nu1 - tau * nu2); 1070 } 1071 1072 { 1073 T& offd1 = l2 < j ? A[l2][j] : A[j][l2]; 1074 T& offd2 = l2 < k ? A[l2][k] : A[k][l2]; 1075 const T nu1 = offd1; 1076 const T nu2 = offd2; 1077 offd1 -= s * (nu2 + tau * nu1); 1078 offd2 += s * (nu1 - tau * nu2); 1079 } 1080 1081 jacobiRotateRight<j, k> (V, s, tau); 1082 1083 return true; 1084} 1085 1086template <typename TM> 1087inline 1088typename TM::BaseType 1089maxOffDiagSymm (const TM& A) 1090{ 1091 typedef typename TM::BaseType T; 1092 T result = 0; 1093 for (unsigned int i = 0; i < TM::dimensions(); ++i) 1094 for (unsigned int j = i+1; j < TM::dimensions(); ++j) 1095 result = std::max (result, std::abs (A[i][j])); 1096 1097 return result; 1098} 1099 1100} // namespace 1101 1102template <typename T> 1103void 1104jacobiEigenSolver (Matrix33<T>& A, 1105 Vec3<T>& S, 1106 Matrix33<T>& V, 1107 const T tol) 1108{ 1109 V.makeIdentity(); 1110 for(int i = 0; i < 3; ++i) { 1111 S[i] = A[i][i]; 1112 } 1113 1114 const int maxIter = 20; // In case we get really unlucky, prevents infinite loops 1115 const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum 1116 if (absTol != 0) // _off-diagonal_ entry. 1117 { 1118 int numIter = 0; 1119 do 1120 { 1121 // Z is for accumulating small changes (h) to diagonal entries 1122 // of A for one sweep. Adding h's directly to A might cause 1123 // a cancellation effect when h is relatively very small to 1124 // the corresponding diagonal entry of A and 1125 // this will increase numerical errors 1126 Vec3<T> Z(0, 0, 0); 1127 ++numIter; 1128 bool changed = jacobiRotation<0, 1, 2> (A, V, Z, tol); 1129 changed = jacobiRotation<0, 2, 1> (A, V, Z, tol) || changed; 1130 changed = jacobiRotation<1, 2, 0> (A, V, Z, tol) || changed; 1131 // One sweep passed. Add accumulated changes (Z) to singular values (S) 1132 // Update diagonal elements of A for better accuracy as well. 1133 for(int i = 0; i < 3; ++i) { 1134 A[i][i] = S[i] += Z[i]; 1135 } 1136 if (!changed) 1137 break; 1138 } while (maxOffDiagSymm(A) > absTol && numIter < maxIter); 1139 } 1140} 1141 1142template <typename T> 1143void 1144jacobiEigenSolver (Matrix44<T>& A, 1145 Vec4<T>& S, 1146 Matrix44<T>& V, 1147 const T tol) 1148{ 1149 V.makeIdentity(); 1150 1151 for(int i = 0; i < 4; ++i) { 1152 S[i] = A[i][i]; 1153 } 1154 1155 const int maxIter = 20; // In case we get really unlucky, prevents infinite loops 1156 const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum 1157 if (absTol != 0) // _off-diagonal_ entry. 1158 { 1159 int numIter = 0; 1160 do 1161 { 1162 ++numIter; 1163 Vec4<T> Z(0, 0, 0, 0); 1164 bool changed = jacobiRotation<0, 1, 2, 3> (A, V, Z, tol); 1165 changed = jacobiRotation<0, 2, 1, 3> (A, V, Z, tol) || changed; 1166 changed = jacobiRotation<0, 3, 1, 2> (A, V, Z, tol) || changed; 1167 changed = jacobiRotation<1, 2, 0, 3> (A, V, Z, tol) || changed; 1168 changed = jacobiRotation<1, 3, 0, 2> (A, V, Z, tol) || changed; 1169 changed = jacobiRotation<2, 3, 0, 1> (A, V, Z, tol) || changed; 1170 for(int i = 0; i < 4; ++i) { 1171 A[i][i] = S[i] += Z[i]; 1172 } 1173 if (!changed) 1174 break; 1175 } while (maxOffDiagSymm(A) > absTol && numIter < maxIter); 1176 } 1177} 1178 1179template <typename TM, typename TV> 1180void 1181maxEigenVector (TM& A, TV& V) 1182{ 1183 TV S; 1184 TM MV; 1185 jacobiEigenSolver(A, S, MV); 1186 1187 int maxIdx(0); 1188 for(unsigned int i = 1; i < TV::dimensions(); ++i) 1189 { 1190 if(std::abs(S[i]) > std::abs(S[maxIdx])) 1191 maxIdx = i; 1192 } 1193 1194 for(unsigned int i = 0; i < TV::dimensions(); ++i) 1195 V[i] = MV[i][maxIdx]; 1196} 1197 1198template <typename TM, typename TV> 1199void 1200minEigenVector (TM& A, TV& V) 1201{ 1202 TV S; 1203 TM MV; 1204 jacobiEigenSolver(A, S, MV); 1205 1206 int minIdx(0); 1207 for(unsigned int i = 1; i < TV::dimensions(); ++i) 1208 { 1209 if(std::abs(S[i]) < std::abs(S[minIdx])) 1210 minIdx = i; 1211 } 1212 1213 for(unsigned int i = 0; i < TV::dimensions(); ++i) 1214 V[i] = MV[i][minIdx]; 1215} 1216 1217template void jacobiEigenSolver (Matrix33<float>& A, 1218 Vec3<float>& S, 1219 Matrix33<float>& V, 1220 const float tol); 1221template void jacobiEigenSolver (Matrix33<double>& A, 1222 Vec3<double>& S, 1223 Matrix33<double>& V, 1224 const double tol); 1225template void jacobiEigenSolver (Matrix44<float>& A, 1226 Vec4<float>& S, 1227 Matrix44<float>& V, 1228 const float tol); 1229template void jacobiEigenSolver (Matrix44<double>& A, 1230 Vec4<double>& S, 1231 Matrix44<double>& V, 1232 const double tol); 1233 1234template void maxEigenVector (Matrix33<float>& A, 1235 Vec3<float>& S); 1236template void maxEigenVector (Matrix44<float>& A, 1237 Vec4<float>& S); 1238template void maxEigenVector (Matrix33<double>& A, 1239 Vec3<double>& S); 1240template void maxEigenVector (Matrix44<double>& A, 1241 Vec4<double>& S); 1242 1243template void minEigenVector (Matrix33<float>& A, 1244 Vec3<float>& S); 1245template void minEigenVector (Matrix44<float>& A, 1246 Vec4<float>& S); 1247template void minEigenVector (Matrix33<double>& A, 1248 Vec3<double>& S); 1249template void minEigenVector (Matrix44<double>& A, 1250 Vec4<double>& S); 1251 1252} // namespace Imath 1253