1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// *       Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// *       Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// *       Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35
36
37
38
39//----------------------------------------------------------------------------
40//
41//	Implementation of non-template items declared in ImathMatrixAlgo.h
42//
43//----------------------------------------------------------------------------
44
45#include "ImathMatrixAlgo.h"
46#include <cmath>
47#include <algorithm> // for std::max()
48
49#if defined(OPENEXR_DLL)
50    #define EXPORT_CONST __declspec(dllexport)
51#else
52    #define EXPORT_CONST const
53#endif
54
55namespace Imath {
56
57EXPORT_CONST M33f identity33f ( 1, 0, 0,
58                0, 1, 0,
59                0, 0, 1);
60
61EXPORT_CONST M33d identity33d ( 1, 0, 0,
62                0, 1, 0,
63                0, 0, 1);
64
65EXPORT_CONST M44f identity44f ( 1, 0, 0, 0,
66                0, 1, 0, 0,
67                0, 0, 1, 0,
68                0, 0, 0, 1);
69
70EXPORT_CONST M44d identity44d ( 1, 0, 0, 0,
71                0, 1, 0, 0,
72                0, 0, 1, 0,
73                0, 0, 0, 1);
74
75namespace
76{
77
78class KahanSum
79{
80public:
81    KahanSum() : _total(0), _correction(0) {}
82
83    void
84    operator+= (const double val)
85    {
86        const double y = val - _correction;
87        const double t = _total + y;
88        _correction = (t - _total) - y;
89        _total = t;
90    }
91
92    double get() const
93    {
94        return _total;
95    }
96
97private:
98    double _total;
99    double _correction;
100};
101
102}
103
104template <typename T>
105M44d
106procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const T* weights, const size_t numPoints, const bool doScale)
107{
108    if (numPoints == 0)
109        return M44d();
110
111    // Always do the accumulation in double precision:
112    V3d Acenter (0.0);
113    V3d Bcenter (0.0);
114    double weightsSum = 0.0;
115
116    if (weights == 0)
117    {
118        for (int i = 0; i < numPoints; ++i)
119        {
120            Acenter += (V3d) A[i];
121            Bcenter += (V3d) B[i];
122        }
123        weightsSum = (double) numPoints;
124    }
125    else
126    {
127        for (int i = 0; i < numPoints; ++i)
128        {
129            const double w = weights[i];
130            weightsSum += w;
131
132            Acenter += w * (V3d) A[i];
133            Bcenter += w * (V3d) B[i];
134        }
135    }
136
137    if (weightsSum == 0)
138        return M44d();
139
140    Acenter /= weightsSum;
141    Bcenter /= weightsSum;
142
143    //
144    // Find Q such that |Q*A - B|  (actually A-Acenter and B-Bcenter, weighted)
145    // is minimized in the least squares sense.
146    // From Golub/Van Loan, p.601
147    //
148    // A,B are 3xn
149    // Let C = B A^T   (where A is 3xn and B^T is nx3, so C is 3x3)
150    // Compute the SVD: C = U D V^T  (U,V rotations, D diagonal).
151    // Throw away the D part, and return Q = U V^T
152    M33d C (0.0);
153    if (weights == 0)
154    {
155        for (int i = 0; i < numPoints; ++i)
156            C += outerProduct ((V3d) B[i] - Bcenter, (V3d) A[i] - Acenter);
157    }
158    else
159    {
160        for (int i = 0; i < numPoints; ++i)
161        {
162            const double w = weights[i];
163            C += outerProduct (w * ((V3d) B[i] - Bcenter), (V3d) A[i] - Acenter);
164        }
165    }
166
167    M33d U, V;
168    V3d S;
169    jacobiSVD (C, U, S, V, Imath::limits<double>::epsilon(), true);
170
171    // We want Q.transposed() here since we are going to be using it in the
172    // Imath style (multiplying vectors on the right, v' = v*A^T):
173    const M33d Qt = V * U.transposed();
174
175    double s = 1.0;
176    if (doScale && numPoints > 1)
177    {
178        // Finding a uniform scale: let us assume the Q is completely fixed
179        // at this point (solving for both simultaneously seems much harder).
180        // We are trying to compute (again, per Golub and van Loan)
181        //    min || s*A*Q - B ||_F
182        // Notice that we've jammed a uniform scale in front of the Q.
183        // Now, the Frobenius norm (the least squares norm over matrices)
184        // has the neat property that it is equivalent to minimizing the trace
185        // of M^T*M (see your friendly neighborhood linear algebra text for a
186        // derivation).  Thus, we can expand this out as
187        //   min tr (s*A*Q - B)^T*(s*A*Q - B)
188        // = min tr(Q^T*A^T*s*s*A*Q) + tr(B^T*B) - 2*tr(Q^T*A^T*s*B)  by linearity of the trace
189        // = min s^2 tr(A^T*A) + tr(B^T*B) - 2*s*tr(Q^T*A^T*B)        using the fact that the trace is invariant
190        //                                                            under similarity transforms Q*M*Q^T
191        // If we differentiate w.r.t. s and set this to 0, we get
192        // 0 = 2*s*tr(A^T*A) - 2*tr(Q^T*A^T*B)
193        // so
194        // 2*s*tr(A^T*A) = 2*s*tr(Q^T*A^T*B)
195        // s = tr(Q^T*A^T*B) / tr(A^T*A)
196
197        KahanSum traceATA;
198        if (weights == 0)
199        {
200            for (int i = 0; i < numPoints; ++i)
201                traceATA += ((V3d) A[i] - Acenter).length2();
202        }
203        else
204        {
205            for (int i = 0; i < numPoints; ++i)
206                traceATA += ((double) weights[i]) * ((V3d) A[i] - Acenter).length2();
207        }
208
209        KahanSum traceBATQ;
210        for (int i = 0; i < 3; ++i)
211            for (int j = 0; j < 3; ++j)
212                traceBATQ += Qt[j][i] * C[i][j];
213
214        s = traceBATQ.get() / traceATA.get();
215    }
216
217    // Q is the rotation part of what we want to return.
218    // The entire transform is:
219    //    (translate origin to Bcenter) * Q * (translate Acenter to origin)
220    //                last                                first
221    // The effect of this on a point is:
222    //    (translate origin to Bcenter) * Q * (translate Acenter to origin) * point
223    //  = (translate origin to Bcenter) * Q * (-Acenter + point)
224    //  = (translate origin to Bcenter) * (-Q*Acenter + Q*point)
225    //  = (translate origin to Bcenter) * (translate Q*Acenter to origin) * Q*point
226    //  = (translate Q*Acenter to Bcenter) * Q*point
227    // So what we want to return is:
228    //    (translate Q*Acenter to Bcenter) * Q
229    //
230    // In block form, this is:
231    //   [ 1 0 0  | ] [       0 ] [ 1 0 0  |  ]   [ 1 0 0  | ] [           |   ]   [                 ]
232    //   [ 0 1 0 tb ] [  s*Q  0 ] [ 0 1 0 -ta ] = [ 0 1 0 tb ] [  s*Q  -s*Q*ta ] = [   Q   tb-s*Q*ta ]
233    //   [ 0 0 1  | ] [       0 ] [ 0 0 1  |  ]   [ 0 0 1  | ] [           |   ]   [                 ]
234    //   [ 0 0 0  1 ] [ 0 0 0 1 ] [ 0 0 0  1  ]   [ 0 0 0  1 ] [ 0 0 0     1   ]   [ 0 0 0    1      ]
235    // (ofc the whole thing is transposed for Imath).
236    const V3d translate = Bcenter - s*Acenter*Qt;
237
238    return M44d (s*Qt.x[0][0], s*Qt.x[0][1], s*Qt.x[0][2], T(0),
239                 s*Qt.x[1][0], s*Qt.x[1][1], s*Qt.x[1][2], T(0),
240                 s*Qt.x[2][0], s*Qt.x[2][1], s*Qt.x[2][2], T(0),
241                 translate.x, translate.y, translate.z, T(1));
242} // procrustesRotationAndTranslation
243
244template <typename T>
245M44d
246procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const size_t numPoints, const bool doScale)
247{
248    return procrustesRotationAndTranslation (A, B, (const T*) 0, numPoints, doScale);
249} // procrustesRotationAndTranslation
250
251
252template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const size_t numPoints, const bool doScale);
253template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const size_t numPoints, const bool doScale);
254template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const double* weights, const size_t numPoints, const bool doScale);
255template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const float* weights, const size_t numPoints, const bool doScale);
256
257
258namespace
259{
260
261// Applies the 2x2 Jacobi rotation
262//   [  c s 0 ]    [ 1  0 0 ]    [  c 0 s ]
263//   [ -s c 0 ] or [ 0  c s ] or [  0 1 0 ]
264//   [  0 0 1 ]    [ 0 -s c ]    [ -s 0 c ]
265// from the right; that is, computes
266//   J * A
267// for the Jacobi rotation J and the matrix A.  This is efficient because we
268// only need to touch exactly the 2 columns that are affected, so we never
269// need to explicitly construct the J matrix.
270template <typename T, int j, int k>
271void
272jacobiRotateRight (Imath::Matrix33<T>& A,
273                   const T c,
274                   const T s)
275{
276    for (int i = 0; i < 3; ++i)
277    {
278        const T tau1 = A[i][j];
279        const T tau2 = A[i][k];
280        A[i][j] = c * tau1 - s * tau2;
281        A[i][k] = s * tau1 + c * tau2;
282    }
283}
284
285template <typename T>
286void
287jacobiRotateRight (Imath::Matrix44<T>& A,
288                   const int j,
289                   const int k,
290                   const T c,
291                   const T s)
292{
293    for (int i = 0; i < 4; ++i)
294    {
295        const T tau1 = A[i][j];
296        const T tau2 = A[i][k];
297        A[i][j] = c * tau1 - s * tau2;
298        A[i][k] = s * tau1 + c * tau2;
299    }
300}
301
302// This routine solves the 2x2 SVD:
303//     [  c1   s1 ] [ w   x ] [  c2  s2 ]   [ d1    0 ]
304//     [          ] [       ] [         ] = [         ]
305//     [ -s1   c1 ] [ y   z ] [ -s2  c2 ]   [  0   d2 ]
306// where
307//      [ w   x ]
308//  A = [       ]
309//      [ y   z ]
310// is the subset of A consisting of the [j,k] entries, A([j k], [j k]) in
311// Matlab parlance.  The method is the 'USVD' algorithm described in the
312// following paper:
313//    'Computation of the Singular Value Decomposition using Mesh-Connected Processors'
314//    by Richard P. Brent, Franklin T. Luk, and Charles Van Loan
315// It breaks the computation into two steps: the first symmetrizes the matrix,
316// and the second diagonalizes the symmetric matrix.
317template <typename T, int j, int k, int l>
318bool
319twoSidedJacobiRotation (Imath::Matrix33<T>& A,
320                        Imath::Matrix33<T>& U,
321                        Imath::Matrix33<T>& V,
322                        const T tol)
323{
324    // Load everything into local variables to make things easier on the
325    // optimizer:
326    const T w = A[j][j];
327    const T x = A[j][k];
328    const T y = A[k][j];
329    const T z = A[k][k];
330
331    // We will keep track of whether we're actually performing any rotations,
332    // since if the matrix is already diagonal we'll end up with the identity
333    // as our Jacobi rotation and we can short-circuit.
334    bool changed = false;
335
336    // The first step is to symmetrize the 2x2 matrix,
337    //   [ c  s ]^T [ w x ] = [ p q ]
338    //   [ -s c ]   [ y z ]   [ q r ]
339    T mu_1 = w + z;
340    T mu_2 = x - y;
341
342    T c, s;
343    if (std::abs(mu_2) <= tol*std::abs(mu_1))  // Already symmetric (to tolerance)
344    {                                          // Note that the <= is important here
345        c = T(1);                              // because we want to bypass the computation
346        s = T(0);                              // of rho if mu_1 = mu_2 = 0.
347
348        const T p = w;
349        const T r = z;
350        mu_1 = r - p;
351        mu_2 = x + y;
352    }
353    else
354    {
355        const T rho = mu_1 / mu_2;
356        s = T(1) / std::sqrt (T(1) + rho*rho);  // TODO is there a native inverse square root function?
357        if (rho < 0)
358            s = -s;
359        c = s * rho;
360
361        mu_1 = s * (x + y) + c * (z - w);   // = r - p
362        mu_2 = T(2) * (c * x - s * z);      // = 2*q
363
364        changed = true;
365    }
366
367    // The second stage diagonalizes,
368    //   [ c2   s2 ]^T [ p q ] [ c2  s2 ]  = [ d1   0 ]
369    //   [ -s2  c2 ]   [ q r ] [ -s2 c2 ]    [  0  d2 ]
370    T c_2, s_2;
371    if (std::abs(mu_2) <= tol*std::abs(mu_1))
372    {
373       c_2 = T(1);
374       s_2 = T(0);
375    }
376    else
377    {
378        const T rho_2 = mu_1 / mu_2;
379        T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
380        if (rho_2 < 0)
381            t_2 = -t_2;
382        c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
383        s_2 = c_2 * t_2;
384
385        changed = true;
386    }
387
388    const T c_1 = c_2 * c - s_2 * s;
389    const T s_1 = s_2 * c + c_2 * s;
390
391    if (!changed)
392    {
393        // We've decided that the off-diagonal entries are already small
394        // enough, so we'll set them to zero.  This actually appears to result
395        // in smaller errors than leaving them be, possibly because it prevents
396        // us from trying to do extra rotations later that we don't need.
397        A[k][j] = 0;
398        A[j][k] = 0;
399        return false;
400    }
401
402    const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
403    const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
404
405    // For the entries we just zeroed out, we'll just set them to 0, since
406    // they should be 0 up to machine precision.
407    A[j][j] = d_1;
408    A[k][k] = d_2;
409    A[k][j] = 0;
410    A[j][k] = 0;
411
412    // Rotate the entries that _weren't_ involved in the 2x2 SVD:
413    {
414        // Rotate on the left by
415        //    [  c1 s1 0 ]^T      [  c1 0 s1 ]^T      [ 1   0  0 ]^T
416        //    [ -s1 c1 0 ]    or  [   0 1  0 ]    or  [ 0  c1 s1 ]
417        //    [   0  0 1 ]        [ -s1 0 c1 ]        [ 0 -s1 c1 ]
418        // This has the effect of adding the (weighted) ith and jth _rows_ to
419        // each other.
420        const T tau1 = A[j][l];
421        const T tau2 = A[k][l];
422        A[j][l] = c_1 * tau1 - s_1 * tau2;
423        A[k][l] = s_1 * tau1 + c_1 * tau2;
424    }
425
426    {
427        // Rotate on the right by
428        //    [  c2 s2 0 ]      [  c2 0 s2 ]      [ 1   0  0 ]
429        //    [ -s2 c2 0 ]  or  [   0 1  0 ]  or  [ 0  c2 s2 ]
430        //    [   0  0 1 ]      [ -s2 0 c2 ]      [ 0 -s2 c2 ]
431        // This has the effect of adding the (weighted) ith and jth _columns_ to
432        // each other.
433        const T tau1 = A[l][j];
434        const T tau2 = A[l][k];
435        A[l][j] = c_2 * tau1 - s_2 * tau2;
436        A[l][k] = s_2 * tau1 + c_2 * tau2;
437    }
438
439    // Now apply the rotations to U and V:
440    // Remember that we have
441    //    R1^T * A * R2 = D
442    // This is in the 2x2 case, but after doing a bunch of these
443    // we will get something like this for the 3x3 case:
444    //   ... R1b^T * R1a^T * A * R2a * R2b * ... = D
445    //   -----------------       ---------------
446    //        = U^T                    = V
447    // So,
448    //   U = R1a * R1b * ...
449    //   V = R2a * R2b * ...
450    jacobiRotateRight<T, j, k> (U, c_1, s_1);
451    jacobiRotateRight<T, j, k> (V, c_2, s_2);
452
453    return true;
454}
455
456template <typename T>
457bool
458twoSidedJacobiRotation (Imath::Matrix44<T>& A,
459                        int j,
460                        int k,
461                        Imath::Matrix44<T>& U,
462                        Imath::Matrix44<T>& V,
463                        const T tol)
464{
465    // Load everything into local variables to make things easier on the
466    // optimizer:
467    const T w = A[j][j];
468    const T x = A[j][k];
469    const T y = A[k][j];
470    const T z = A[k][k];
471
472    // We will keep track of whether we're actually performing any rotations,
473    // since if the matrix is already diagonal we'll end up with the identity
474    // as our Jacobi rotation and we can short-circuit.
475    bool changed = false;
476
477    // The first step is to symmetrize the 2x2 matrix,
478    //   [ c  s ]^T [ w x ] = [ p q ]
479    //   [ -s c ]   [ y z ]   [ q r ]
480    T mu_1 = w + z;
481    T mu_2 = x - y;
482
483    T c, s;
484    if (std::abs(mu_2) <= tol*std::abs(mu_1))  // Already symmetric (to tolerance)
485    {                                          // Note that the <= is important here
486        c = T(1);                              // because we want to bypass the computation
487        s = T(0);                              // of rho if mu_1 = mu_2 = 0.
488
489        const T p = w;
490        const T r = z;
491        mu_1 = r - p;
492        mu_2 = x + y;
493    }
494    else
495    {
496        const T rho = mu_1 / mu_2;
497        s = T(1) / std::sqrt (T(1) + rho*rho);  // TODO is there a native inverse square root function?
498        if (rho < 0)
499            s = -s;
500        c = s * rho;
501
502        mu_1 = s * (x + y) + c * (z - w);   // = r - p
503        mu_2 = T(2) * (c * x - s * z);      // = 2*q
504
505        changed = true;
506    }
507
508    // The second stage diagonalizes,
509    //   [ c2   s2 ]^T [ p q ] [ c2  s2 ]  = [ d1   0 ]
510    //   [ -s2  c2 ]   [ q r ] [ -s2 c2 ]    [  0  d2 ]
511    T c_2, s_2;
512    if (std::abs(mu_2) <= tol*std::abs(mu_1))
513    {
514       c_2 = T(1);
515       s_2 = T(0);
516    }
517    else
518    {
519        const T rho_2 = mu_1 / mu_2;
520        T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));
521        if (rho_2 < 0)
522            t_2 = -t_2;
523        c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);
524        s_2 = c_2 * t_2;
525
526        changed = true;
527    }
528
529    const T c_1 = c_2 * c - s_2 * s;
530    const T s_1 = s_2 * c + c_2 * s;
531
532    if (!changed)
533    {
534        // We've decided that the off-diagonal entries are already small
535        // enough, so we'll set them to zero.  This actually appears to result
536        // in smaller errors than leaving them be, possibly because it prevents
537        // us from trying to do extra rotations later that we don't need.
538        A[k][j] = 0;
539        A[j][k] = 0;
540        return false;
541    }
542
543    const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);
544    const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);
545
546    // For the entries we just zeroed out, we'll just set them to 0, since
547    // they should be 0 up to machine precision.
548    A[j][j] = d_1;
549    A[k][k] = d_2;
550    A[k][j] = 0;
551    A[j][k] = 0;
552
553    // Rotate the entries that _weren't_ involved in the 2x2 SVD:
554    for (int l = 0; l < 4; ++l)
555    {
556        if (l == j || l == k)
557            continue;
558
559        // Rotate on the left by
560        //    [ 1               ]
561        //    [   .             ]
562        //    [     c2   s2     ]  j
563        //    [        1        ]
564        //    [    -s2   c2     ]  k
565        //    [             .   ]
566        //    [               1 ]
567        //          j    k
568        //
569        // This has the effect of adding the (weighted) ith and jth _rows_ to
570        // each other.
571        const T tau1 = A[j][l];
572        const T tau2 = A[k][l];
573        A[j][l] = c_1 * tau1 - s_1 * tau2;
574        A[k][l] = s_1 * tau1 + c_1 * tau2;
575    }
576
577    for (int l = 0; l < 4; ++l)
578    {
579        // We set the A[j/k][j/k] entries already
580        if (l == j || l == k)
581            continue;
582
583        // Rotate on the right by
584        //    [ 1               ]
585        //    [   .             ]
586        //    [     c2   s2     ]  j
587        //    [        1        ]
588        //    [    -s2   c2     ]  k
589        //    [             .   ]
590        //    [               1 ]
591        //          j    k
592        //
593        // This has the effect of adding the (weighted) ith and jth _columns_ to
594        // each other.
595        const T tau1 = A[l][j];
596        const T tau2 = A[l][k];
597        A[l][j] = c_2 * tau1 - s_2 * tau2;
598        A[l][k] = s_2 * tau1 + c_2 * tau2;
599    }
600
601    // Now apply the rotations to U and V:
602    // Remember that we have
603    //    R1^T * A * R2 = D
604    // This is in the 2x2 case, but after doing a bunch of these
605    // we will get something like this for the 3x3 case:
606    //   ... R1b^T * R1a^T * A * R2a * R2b * ... = D
607    //   -----------------       ---------------
608    //        = U^T                    = V
609    // So,
610    //   U = R1a * R1b * ...
611    //   V = R2a * R2b * ...
612    jacobiRotateRight (U, j, k, c_1, s_1);
613    jacobiRotateRight (V, j, k, c_2, s_2);
614
615    return true;
616}
617
618template <typename T>
619void
620swapColumns (Imath::Matrix33<T>& A, int j, int k)
621{
622    for (int i = 0; i < 3; ++i)
623        std::swap (A[i][j], A[i][k]);
624}
625
626template <typename T>
627T
628maxOffDiag (const Imath::Matrix33<T>& A)
629{
630    T result = 0;
631    result = std::max (result, std::abs (A[0][1]));
632    result = std::max (result, std::abs (A[0][2]));
633    result = std::max (result, std::abs (A[1][0]));
634    result = std::max (result, std::abs (A[1][2]));
635    result = std::max (result, std::abs (A[2][0]));
636    result = std::max (result, std::abs (A[2][1]));
637    return result;
638}
639
640template <typename T>
641T
642maxOffDiag (const Imath::Matrix44<T>& A)
643{
644    T result = 0;
645    for (int i = 0; i < 4; ++i)
646    {
647        for (int j = 0; j < 4; ++j)
648        {
649            if (i != j)
650                result = std::max (result, std::abs (A[i][j]));
651        }
652    }
653
654    return result;
655}
656
657template <typename T>
658void
659twoSidedJacobiSVD (Imath::Matrix33<T> A,
660                   Imath::Matrix33<T>& U,
661                   Imath::Vec3<T>& S,
662                   Imath::Matrix33<T>& V,
663                   const T tol,
664                   const bool forcePositiveDeterminant)
665{
666    // The two-sided Jacobi SVD works by repeatedly zeroing out
667    // off-diagonal entries of the matrix, 2 at a time.  Basically,
668    // we can take our 3x3 matrix,
669    //    [* * *]
670    //    [* * *]
671    //    [* * *]
672    // and use a pair of orthogonal transforms to zero out, say, the
673    // pair of entries (0, 1) and (1, 0):
674    //  [ c1 s1  ] [* * *] [ c2 s2  ]   [*   *]
675    //  [-s1 c1  ] [* * *] [-s2 c2  ] = [  * *]
676    //  [       1] [* * *] [       1]   [* * *]
677    // When we go to zero out the next pair of entries (say, (0, 2) and (2, 0))
678    // then we don't expect those entries to stay 0:
679    //  [ c1 s1  ] [*   *] [ c2 s2  ]   [* *  ]
680    //  [-s1 c1  ] [  * *] [-s2 c2  ] = [* * *]
681    //  [       1] [* * *] [       1]   [  * *]
682    // However, if we keep doing this, we'll find that the off-diagonal entries
683    // converge to 0 fairly quickly (convergence should be roughly cubic).  The
684    // result is a diagonal A matrix and a bunch of orthogonal transforms:
685    //               [* * *]                [*    ]
686    //  L1 L2 ... Ln [* * *] Rn ... R2 R1 = [  *  ]
687    //               [* * *]                [    *]
688    //  ------------ ------- ------------   -------
689    //      U^T         A         V            S
690    // This turns out to be highly accurate because (1) orthogonal transforms
691    // are extremely stable to compute and apply (this is why QR factorization
692    // works so well, FWIW) and because (2) by applying everything to the original
693    // matrix A instead of computing (A^T * A) we avoid any precision loss that
694    // would result from that.
695    U.makeIdentity();
696    V.makeIdentity();
697
698    const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
699    const T absTol = tol * maxOffDiag (A);  // Tolerance is in terms of the maximum
700    if (absTol != 0)                        // _off-diagonal_ entry.
701    {
702        int numIter = 0;
703        do
704        {
705            ++numIter;
706            bool changed = twoSidedJacobiRotation<T, 0, 1, 2> (A, U, V, tol);
707            changed = twoSidedJacobiRotation<T, 0, 2, 1> (A, U, V, tol) || changed;
708            changed = twoSidedJacobiRotation<T, 1, 2, 0> (A, U, V, tol) || changed;
709            if (!changed)
710                break;
711        } while (maxOffDiag(A) > absTol && numIter < maxIter);
712    }
713
714    // The off-diagonal entries are (effectively) 0, so whatever's left on the
715    // diagonal are the singular values:
716    S.x = A[0][0];
717    S.y = A[1][1];
718    S.z = A[2][2];
719
720    // Nothing thus far has guaranteed that the singular values are positive,
721    // so let's go back through and flip them if not (since by contract we are
722    // supposed to return all positive SVs):
723    for (int i = 0; i < 3; ++i)
724    {
725        if (S[i] < 0)
726        {
727            // If we flip S[i], we need to flip the corresponding column of U
728            // (we could also pick V if we wanted; it doesn't really matter):
729            S[i] = -S[i];
730            for (int j = 0; j < 3; ++j)
731                U[j][i] = -U[j][i];
732        }
733    }
734
735    // Order the singular values from largest to smallest; this requires
736    // exactly two passes through the data using bubble sort:
737    for (int i = 0; i < 2; ++i)
738    {
739        for (int j = 0; j < (2 - i); ++j)
740        {
741            // No absolute values necessary since we already ensured that
742            // they're positive:
743            if (S[j] < S[j+1])
744            {
745                // If we swap singular values we also have to swap
746                // corresponding columns in U and V:
747                std::swap (S[j], S[j+1]);
748                swapColumns (U, j, j+1);
749                swapColumns (V, j, j+1);
750            }
751        }
752    }
753
754    if (forcePositiveDeterminant)
755    {
756        // We want to guarantee that the returned matrices always have positive
757        // determinant.  We can do this by adding the appropriate number of
758        // matrices of the form:
759        //       [ 1       ]
760        //  L =  [    1    ]
761        //       [      -1 ]
762        // Note that L' = L and L*L = Identity.  Thus we can add:
763        //   U*L*L*S*V = (U*L)*(L*S)*V
764        // if U has a negative determinant, and
765        //   U*S*L*L*V = U*(S*L)*(L*V)
766        // if V has a neg. determinant.
767        if (U.determinant() < 0)
768        {
769            for (int i = 0; i < 3; ++i)
770                U[i][2] = -U[i][2];
771            S.z = -S.z;
772        }
773
774        if (V.determinant() < 0)
775        {
776            for (int i = 0; i < 3; ++i)
777                V[i][2] = -V[i][2];
778            S.z = -S.z;
779        }
780    }
781}
782
783template <typename T>
784void
785twoSidedJacobiSVD (Imath::Matrix44<T> A,
786                   Imath::Matrix44<T>& U,
787                   Imath::Vec4<T>& S,
788                   Imath::Matrix44<T>& V,
789                   const T tol,
790                   const bool forcePositiveDeterminant)
791{
792    // Please see the Matrix33 version for a detailed description of the algorithm.
793    U.makeIdentity();
794    V.makeIdentity();
795
796    const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
797    const T absTol = tol * maxOffDiag (A);  // Tolerance is in terms of the maximum
798    if (absTol != 0)                        // _off-diagonal_ entry.
799    {
800        int numIter = 0;
801        do
802        {
803            ++numIter;
804            bool changed = twoSidedJacobiRotation (A, 0, 1, U, V, tol);
805            changed = twoSidedJacobiRotation (A, 0, 2, U, V, tol) || changed;
806            changed = twoSidedJacobiRotation (A, 0, 3, U, V, tol) || changed;
807            changed = twoSidedJacobiRotation (A, 1, 2, U, V, tol) || changed;
808            changed = twoSidedJacobiRotation (A, 1, 3, U, V, tol) || changed;
809            changed = twoSidedJacobiRotation (A, 2, 3, U, V, tol) || changed;
810            if (!changed)
811                break;
812        } while (maxOffDiag(A) > absTol && numIter < maxIter);
813    }
814
815    // The off-diagonal entries are (effectively) 0, so whatever's left on the
816    // diagonal are the singular values:
817    S[0] = A[0][0];
818    S[1] = A[1][1];
819    S[2] = A[2][2];
820    S[3] = A[3][3];
821
822    // Nothing thus far has guaranteed that the singular values are positive,
823    // so let's go back through and flip them if not (since by contract we are
824    // supposed to return all positive SVs):
825    for (int i = 0; i < 4; ++i)
826    {
827        if (S[i] < 0)
828        {
829            // If we flip S[i], we need to flip the corresponding column of U
830            // (we could also pick V if we wanted; it doesn't really matter):
831            S[i] = -S[i];
832            for (int j = 0; j < 4; ++j)
833                U[j][i] = -U[j][i];
834        }
835    }
836
837    // Order the singular values from largest to smallest using insertion sort:
838    for (int i = 1; i < 4; ++i)
839    {
840        const Imath::Vec4<T> uCol (U[0][i], U[1][i], U[2][i], U[3][i]);
841        const Imath::Vec4<T> vCol (V[0][i], V[1][i], V[2][i], V[3][i]);
842        const T sVal = S[i];
843
844        int j = i - 1;
845        while (std::abs (S[j]) < std::abs (sVal))
846        {
847            for (int k = 0; k < 4; ++k)
848                U[k][j+1] = U[k][j];
849            for (int k = 0; k < 4; ++k)
850                V[k][j+1] = V[k][j];
851            S[j+1] = S[j];
852
853            --j;
854            if (j < 0)
855                break;
856        }
857
858        for (int k = 0; k < 4; ++k)
859            U[k][j+1] = uCol[k];
860        for (int k = 0; k < 4; ++k)
861            V[k][j+1] = vCol[k];
862        S[j+1] = sVal;
863    }
864
865    if (forcePositiveDeterminant)
866    {
867        // We want to guarantee that the returned matrices always have positive
868        // determinant.  We can do this by adding the appropriate number of
869        // matrices of the form:
870        //       [ 1          ]
871        //  L =  [    1       ]
872        //       [       1    ]
873        //       [         -1 ]
874        // Note that L' = L and L*L = Identity.  Thus we can add:
875        //   U*L*L*S*V = (U*L)*(L*S)*V
876        // if U has a negative determinant, and
877        //   U*S*L*L*V = U*(S*L)*(L*V)
878        // if V has a neg. determinant.
879        if (U.determinant() < 0)
880        {
881            for (int i = 0; i < 4; ++i)
882                U[i][3] = -U[i][3];
883            S[3] = -S[3];
884        }
885
886        if (V.determinant() < 0)
887        {
888            for (int i = 0; i < 4; ++i)
889                V[i][3] = -V[i][3];
890            S[3] = -S[3];
891        }
892    }
893}
894
895}
896
897template <typename T>
898void
899jacobiSVD (const Imath::Matrix33<T>& A,
900           Imath::Matrix33<T>& U,
901           Imath::Vec3<T>& S,
902           Imath::Matrix33<T>& V,
903           const T tol,
904           const bool forcePositiveDeterminant)
905{
906    twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
907}
908
909template <typename T>
910void
911jacobiSVD (const Imath::Matrix44<T>& A,
912           Imath::Matrix44<T>& U,
913           Imath::Vec4<T>& S,
914           Imath::Matrix44<T>& V,
915           const T tol,
916           const bool forcePositiveDeterminant)
917{
918    twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);
919}
920
921template void jacobiSVD (const Imath::Matrix33<float>& A,
922                         Imath::Matrix33<float>& U,
923                         Imath::Vec3<float>& S,
924                         Imath::Matrix33<float>& V,
925                         const float tol,
926                         const bool forcePositiveDeterminant);
927template void jacobiSVD (const Imath::Matrix33<double>& A,
928                         Imath::Matrix33<double>& U,
929                         Imath::Vec3<double>& S,
930                         Imath::Matrix33<double>& V,
931                         const double tol,
932                         const bool forcePositiveDeterminant);
933template void jacobiSVD (const Imath::Matrix44<float>& A,
934                         Imath::Matrix44<float>& U,
935                         Imath::Vec4<float>& S,
936                         Imath::Matrix44<float>& V,
937                         const float tol,
938                         const bool forcePositiveDeterminant);
939template void jacobiSVD (const Imath::Matrix44<double>& A,
940                         Imath::Matrix44<double>& U,
941                         Imath::Vec4<double>& S,
942                         Imath::Matrix44<double>& V,
943                         const double tol,
944                         const bool forcePositiveDeterminant);
945
946namespace
947{
948
949template <int j, int k, typename TM>
950inline
951void
952jacobiRotateRight (TM& A,
953                   const typename TM::BaseType s,
954                   const typename TM::BaseType tau)
955{
956    typedef typename TM::BaseType T;
957
958    for (unsigned int i = 0; i < TM::dimensions(); ++i)
959    {
960        const T nu1 = A[i][j];
961        const T nu2 = A[i][k];
962        A[i][j] -= s * (nu2 + tau * nu1);
963        A[i][k] += s * (nu1 - tau * nu2);
964   }
965}
966
967template <int j, int k, int l, typename T>
968bool
969jacobiRotation (Matrix33<T>& A,
970                Matrix33<T>& V,
971                Vec3<T>& Z,
972                const T tol)
973{
974    // Load everything into local variables to make things easier on the
975    // optimizer:
976    const T x = A[j][j];
977    const T y = A[j][k];
978    const T z = A[k][k];
979
980    // The first stage diagonalizes,
981    //   [ c  s ]^T [ x y ] [ c -s ]  = [ d1   0 ]
982    //   [ -s c ]   [ y z ] [ s  c ]    [  0  d2 ]
983    const T mu1 = z - x;
984    const T mu2 = 2 * y;
985
986    if (std::abs(mu2) <= tol*std::abs(mu1))
987    {
988        // We've decided that the off-diagonal entries are already small
989        // enough, so we'll set them to zero.  This actually appears to result
990        // in smaller errors than leaving them be, possibly because it prevents
991        // us from trying to do extra rotations later that we don't need.
992        A[j][k] = 0;
993        return false;
994    }
995    const T rho = mu1 / mu2;
996    const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
997    const T c = T(1) / std::sqrt (T(1) + t*t);
998    const T s = t * c;
999    const T tau = s / (T(1) + c);
1000    const T h = t * y;
1001
1002    // Update diagonal elements.
1003    Z[j] -= h;
1004    Z[k] += h;
1005    A[j][j] -= h;
1006    A[k][k] += h;
1007
1008    // For the entries we just zeroed out, we'll just set them to 0, since
1009    // they should be 0 up to machine precision.
1010    A[j][k] = 0;
1011
1012    // We only update upper triagnular elements of A, since
1013    // A is supposed to be symmetric.
1014    T& offd1 = l < j ? A[l][j] : A[j][l];
1015    T& offd2 = l < k ? A[l][k] : A[k][l];
1016    const T nu1 = offd1;
1017    const T nu2 = offd2;
1018    offd1 = nu1 - s * (nu2 + tau * nu1);
1019    offd2 = nu2 + s * (nu1 - tau * nu2);
1020
1021    // Apply rotation to V
1022    jacobiRotateRight<j, k> (V, s, tau);
1023
1024    return true;
1025}
1026
1027template <int j, int k, int l1, int l2, typename T>
1028bool
1029jacobiRotation (Matrix44<T>& A,
1030                Matrix44<T>& V,
1031                Vec4<T>& Z,
1032                const T tol)
1033{
1034    const T x = A[j][j];
1035    const T y = A[j][k];
1036    const T z = A[k][k];
1037
1038    const T mu1 = z - x;
1039    const T mu2 = T(2) * y;
1040
1041    // Let's see if rho^(-1) = mu2 / mu1 is less than tol
1042    // This test also checks if rho^2 will overflow
1043    // when tol^(-1) < sqrt(limits<T>::max()).
1044    if (std::abs(mu2) <= tol*std::abs(mu1))
1045    {
1046        A[j][k] = 0;
1047        return true;
1048    }
1049
1050    const T rho = mu1 / mu2;
1051    const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));
1052    const T c = T(1) / std::sqrt (T(1) + t*t);
1053    const T s = c * t;
1054    const T tau = s / (T(1) + c);
1055    const T h = t * y;
1056
1057    Z[j] -= h;
1058    Z[k] += h;
1059    A[j][j] -= h;
1060    A[k][k] += h;
1061    A[j][k] = 0;
1062
1063    {
1064        T& offd1 = l1 < j ? A[l1][j] : A[j][l1];
1065        T& offd2 = l1 < k ? A[l1][k] : A[k][l1];
1066        const T nu1 = offd1;
1067        const T nu2 = offd2;
1068        offd1 -= s * (nu2 + tau * nu1);
1069        offd2 += s * (nu1 - tau * nu2);
1070    }
1071
1072    {
1073        T& offd1 = l2 < j ? A[l2][j] : A[j][l2];
1074        T& offd2 = l2 < k ? A[l2][k] : A[k][l2];
1075        const T nu1 = offd1;
1076        const T nu2 = offd2;
1077        offd1 -= s * (nu2 + tau * nu1);
1078        offd2 += s * (nu1 - tau * nu2);
1079    }
1080
1081    jacobiRotateRight<j, k> (V, s, tau);
1082
1083    return true;
1084}
1085
1086template <typename TM>
1087inline
1088typename TM::BaseType
1089maxOffDiagSymm (const TM& A)
1090{
1091    typedef typename TM::BaseType T;
1092    T result = 0;
1093    for (unsigned int i = 0; i < TM::dimensions(); ++i)
1094        for (unsigned int j = i+1; j < TM::dimensions(); ++j)
1095            result = std::max (result, std::abs (A[i][j]));
1096
1097   return result;
1098}
1099
1100} // namespace
1101
1102template <typename T>
1103void
1104jacobiEigenSolver (Matrix33<T>& A,
1105                   Vec3<T>& S,
1106                   Matrix33<T>& V,
1107                   const T tol)
1108{
1109    V.makeIdentity();
1110    for(int i = 0; i < 3; ++i) {
1111        S[i] = A[i][i];
1112    }
1113
1114    const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
1115    const T absTol = tol * maxOffDiagSymm (A);  // Tolerance is in terms of the maximum
1116    if (absTol != 0)                        // _off-diagonal_ entry.
1117    {
1118        int numIter = 0;
1119        do
1120        {
1121            // Z is for accumulating small changes (h) to diagonal entries
1122            // of A for one sweep. Adding h's directly to A might cause
1123            // a cancellation effect when h is relatively very small to
1124            // the corresponding diagonal entry of A and
1125            // this will increase numerical errors
1126            Vec3<T> Z(0, 0, 0);
1127            ++numIter;
1128            bool changed = jacobiRotation<0, 1, 2> (A, V, Z, tol);
1129            changed = jacobiRotation<0, 2, 1> (A, V, Z, tol) || changed;
1130            changed = jacobiRotation<1, 2, 0> (A, V, Z, tol) || changed;
1131            // One sweep passed. Add accumulated changes (Z) to singular values (S)
1132            // Update diagonal elements of A for better accuracy as well.
1133            for(int i = 0; i < 3; ++i) {
1134                A[i][i] = S[i] += Z[i];
1135            }
1136            if (!changed)
1137                break;
1138        } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
1139    }
1140}
1141
1142template <typename T>
1143void
1144jacobiEigenSolver (Matrix44<T>& A,
1145                   Vec4<T>& S,
1146                   Matrix44<T>& V,
1147                   const T tol)
1148{
1149    V.makeIdentity();
1150
1151    for(int i = 0; i < 4; ++i) {
1152        S[i] = A[i][i];
1153    }
1154
1155    const int maxIter = 20;  // In case we get really unlucky, prevents infinite loops
1156    const T absTol = tol * maxOffDiagSymm (A);  // Tolerance is in terms of the maximum
1157    if (absTol != 0)                        // _off-diagonal_ entry.
1158    {
1159        int numIter = 0;
1160        do
1161        {
1162            ++numIter;
1163            Vec4<T> Z(0, 0, 0, 0);
1164            bool changed = jacobiRotation<0, 1, 2, 3> (A, V, Z, tol);
1165            changed = jacobiRotation<0, 2, 1, 3> (A, V, Z, tol) || changed;
1166            changed = jacobiRotation<0, 3, 1, 2> (A, V, Z, tol) || changed;
1167            changed = jacobiRotation<1, 2, 0, 3> (A, V, Z, tol) || changed;
1168            changed = jacobiRotation<1, 3, 0, 2> (A, V, Z, tol) || changed;
1169            changed = jacobiRotation<2, 3, 0, 1> (A, V, Z, tol) || changed;
1170            for(int i = 0; i < 4; ++i) {
1171                A[i][i] = S[i] += Z[i];
1172            }
1173           if (!changed)
1174                break;
1175        } while (maxOffDiagSymm(A) > absTol && numIter < maxIter);
1176    }
1177}
1178
1179template <typename TM, typename TV>
1180void
1181maxEigenVector (TM& A, TV& V)
1182{
1183    TV S;
1184    TM MV;
1185    jacobiEigenSolver(A, S, MV);
1186
1187    int maxIdx(0);
1188    for(unsigned int i = 1; i < TV::dimensions(); ++i)
1189    {
1190        if(std::abs(S[i]) > std::abs(S[maxIdx]))
1191            maxIdx = i;
1192    }
1193
1194    for(unsigned int i = 0; i < TV::dimensions(); ++i)
1195        V[i] = MV[i][maxIdx];
1196}
1197
1198template <typename TM, typename TV>
1199void
1200minEigenVector (TM& A, TV& V)
1201{
1202    TV S;
1203    TM MV;
1204    jacobiEigenSolver(A, S, MV);
1205
1206    int minIdx(0);
1207    for(unsigned int i = 1; i < TV::dimensions(); ++i)
1208    {
1209        if(std::abs(S[i]) < std::abs(S[minIdx]))
1210            minIdx = i;
1211    }
1212
1213   for(unsigned int i = 0; i < TV::dimensions(); ++i)
1214        V[i] = MV[i][minIdx];
1215}
1216
1217template void jacobiEigenSolver (Matrix33<float>& A,
1218                                 Vec3<float>& S,
1219                                 Matrix33<float>& V,
1220                                 const float tol);
1221template void jacobiEigenSolver (Matrix33<double>& A,
1222                                 Vec3<double>& S,
1223                                 Matrix33<double>& V,
1224                                 const double tol);
1225template void jacobiEigenSolver (Matrix44<float>& A,
1226                                 Vec4<float>& S,
1227                                 Matrix44<float>& V,
1228                                 const float tol);
1229template void jacobiEigenSolver (Matrix44<double>& A,
1230                                 Vec4<double>& S,
1231                                 Matrix44<double>& V,
1232                                 const double tol);
1233
1234template void maxEigenVector (Matrix33<float>& A,
1235                              Vec3<float>& S);
1236template void maxEigenVector (Matrix44<float>& A,
1237                              Vec4<float>& S);
1238template void maxEigenVector (Matrix33<double>& A,
1239                              Vec3<double>& S);
1240template void maxEigenVector (Matrix44<double>& A,
1241                              Vec4<double>& S);
1242
1243template void minEigenVector (Matrix33<float>& A,
1244                              Vec3<float>& S);
1245template void minEigenVector (Matrix44<float>& A,
1246                              Vec4<float>& S);
1247template void minEigenVector (Matrix33<double>& A,
1248                              Vec3<double>& S);
1249template void minEigenVector (Matrix44<double>& A,
1250                              Vec4<double>& S);
1251
1252} // namespace Imath
1253