1/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "main_FIX.h"
33#include "stack_alloc.h"
34#include "tuning_parameters.h"
35
36/*****************************/
37/* Internal function headers */
38/*****************************/
39
40typedef struct {
41    opus_int32 Q36_part;
42    opus_int32 Q48_part;
43} inv_D_t;
44
45/* Factorize square matrix A into LDL form */
46static OPUS_INLINE void silk_LDL_factorize_FIX(
47    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
48    opus_int            M,          /* I   Size of Matrix                                               */
49    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
50    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
51);
52
53/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
54static OPUS_INLINE void silk_LS_SolveFirst_FIX(
55    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
56    opus_int            M,          /* I    Dim of Matrix equation                                      */
57    const opus_int32    *b,         /* I    b Vector                                                    */
58    opus_int32          *x_Q16      /* O    x Vector                                                    */
59);
60
61/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
62static OPUS_INLINE void silk_LS_SolveLast_FIX(
63    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
64    const opus_int      M,          /* I    Dim of Matrix equation                                      */
65    const opus_int32    *b,         /* I    b Vector                                                    */
66    opus_int32          *x_Q16      /* O    x Vector                                                    */
67);
68
69static OPUS_INLINE void silk_LS_divide_Q16_FIX(
70    opus_int32          T[],        /* I/O  Numenator vector                                            */
71    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
72    opus_int            M           /* I    dimension                                                   */
73);
74
75/* Solves Ax = b, assuming A is symmetric */
76void silk_solve_LDL_FIX(
77    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
78    opus_int                        M,                                      /* I    Size of matrix                                                              */
79    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
80    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
81)
82{
83    VARDECL( opus_int32, L_Q16 );
84    opus_int32 Y[      MAX_MATRIX_SIZE ];
85    inv_D_t   inv_D[  MAX_MATRIX_SIZE ];
86    SAVE_STACK;
87
88    silk_assert( M <= MAX_MATRIX_SIZE );
89    ALLOC( L_Q16, M * M, opus_int32 );
90
91    /***************************************************
92    Factorize A by LDL such that A = L*D*L',
93    where L is lower triangular with ones on diagonal
94    ****************************************************/
95    silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
96
97    /****************************************************
98    * substitute D*L'*x = Y. ie:
99    L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
100    ******************************************************/
101    silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
102
103    /****************************************************
104    D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
105    diagonal just multiply with 1/d_i
106    ****************************************************/
107    silk_LS_divide_Q16_FIX( Y, inv_D, M );
108
109    /****************************************************
110    x = inv(L') * inv(D) * Y
111    *****************************************************/
112    silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
113    RESTORE_STACK;
114}
115
116static OPUS_INLINE void silk_LDL_factorize_FIX(
117    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
118    opus_int            M,          /* I   Size of Matrix                                               */
119    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
120    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
121)
122{
123    opus_int   i, j, k, status, loop_count;
124    const opus_int32 *ptr1, *ptr2;
125    opus_int32 diag_min_value, tmp_32, err;
126    opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
127    opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
128
129    silk_assert( M <= MAX_MATRIX_SIZE );
130
131    status = 1;
132    diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
133    for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
134        status = 0;
135        for( j = 0; j < M; j++ ) {
136            ptr1 = matrix_adr( L_Q16, j, 0, M );
137            tmp_32 = 0;
138            for( i = 0; i < j; i++ ) {
139                v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */
140                tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
141            }
142            tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
143
144            if( tmp_32 < diag_min_value ) {
145                tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
146                /* Matrix not positive semi-definite, or ill conditioned */
147                for( i = 0; i < M; i++ ) {
148                    matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
149                }
150                status = 1;
151                break;
152            }
153            D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */
154
155            /* two-step division */
156            one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */
157            one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */
158            err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */
159            one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */
160
161            /* Save 1/Ds */
162            inv_D[ j ].Q36_part = one_div_diag_Q36;
163            inv_D[ j ].Q48_part = one_div_diag_Q48;
164
165            matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
166            ptr1 = matrix_adr( A, j, 0, M );
167            ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
168            for( i = j + 1; i < M; i++ ) {
169                tmp_32 = 0;
170                for( k = 0; k < j; k++ ) {
171                    tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
172                }
173                tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
174
175                /* tmp_32 / D_Q0[j] : Divide to Q16 */
176                matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
177                    silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
178
179                /* go to next column */
180                ptr2 += M;
181            }
182        }
183    }
184
185    silk_assert( status == 0 );
186}
187
188static OPUS_INLINE void silk_LS_divide_Q16_FIX(
189    opus_int32          T[],        /* I/O  Numenator vector                                            */
190    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
191    opus_int            M           /* I    dimension                                                   */
192)
193{
194    opus_int   i;
195    opus_int32 tmp_32;
196    opus_int32 one_div_diag_Q36, one_div_diag_Q48;
197
198    for( i = 0; i < M; i++ ) {
199        one_div_diag_Q36 = inv_D[ i ].Q36_part;
200        one_div_diag_Q48 = inv_D[ i ].Q48_part;
201
202        tmp_32 = T[ i ];
203        T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
204    }
205}
206
207/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
208static OPUS_INLINE void silk_LS_SolveFirst_FIX(
209    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
210    opus_int            M,          /* I    Dim of Matrix equation                                      */
211    const opus_int32    *b,         /* I    b Vector                                                    */
212    opus_int32          *x_Q16      /* O    x Vector                                                    */
213)
214{
215    opus_int i, j;
216    const opus_int32 *ptr32;
217    opus_int32 tmp_32;
218
219    for( i = 0; i < M; i++ ) {
220        ptr32 = matrix_adr( L_Q16, i, 0, M );
221        tmp_32 = 0;
222        for( j = 0; j < i; j++ ) {
223            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
224        }
225        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
226    }
227}
228
229/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
230static OPUS_INLINE void silk_LS_SolveLast_FIX(
231    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
232    const opus_int      M,          /* I    Dim of Matrix equation                                      */
233    const opus_int32    *b,         /* I    b Vector                                                    */
234    opus_int32          *x_Q16      /* O    x Vector                                                    */
235)
236{
237    opus_int i, j;
238    const opus_int32 *ptr32;
239    opus_int32 tmp_32;
240
241    for( i = M - 1; i >= 0; i-- ) {
242        ptr32 = matrix_adr( L_Q16, 0, i, M );
243        tmp_32 = 0;
244        for( j = M - 1; j > i; j-- ) {
245            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
246        }
247        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
248    }
249}
250