qmf_arm.cpp revision 2228e360595641dd906bf1773307f43d304f5b2e
1
2/* -----------------------------------------------------------------------------------------------------------
3Software License for The Fraunhofer FDK AAC Codec Library for Android
4
5� Copyright  1995 - 2012 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6  All rights reserved.
7
8 1.    INTRODUCTION
9The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12
13AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16of the MPEG specifications.
17
18Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20individually for the purpose of encoding or decoding bit streams in products that are compliant with
21the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23software may already be covered under those patent licenses when it is used for those licensed purposes only.
24
25Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27applications information and documentation.
28
292.    COPYRIGHT LICENSE
30
31Redistribution and use in source and binary forms, with or without modification, are permitted without
32payment of copyright license fees provided that you satisfy the following conditions:
33
34You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35your modifications thereto in source code form.
36
37You must retain the complete text of this software license in the documentation and/or other materials
38provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40modifications thereto to recipients of copies in binary form.
41
42The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43prior written permission.
44
45You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46software or your modifications thereto.
47
48Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49and the date of any change. For modified versions of the FDK AAC Codec, the term
50"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52
533.    NO PATENT LICENSE
54
55NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57respect to this software.
58
59You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60by appropriate patent licenses.
61
624.    DISCLAIMER
63
64This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69or business interruption, however caused and on any theory of liability, whether in contract, strict
70liability, or tort (including negligence), arising in any way out of the use of this software, even if
71advised of the possibility of such damage.
72
735.    CONTACT INFORMATION
74
75Fraunhofer Institute for Integrated Circuits IIS
76Attention: Audio and Multimedia Departments - FDK AAC LL
77Am Wolfsmantel 33
7891058 Erlangen, Germany
79
80www.iis.fraunhofer.de/amm
81amm-info@iis.fraunhofer.de
82----------------------------------------------------------------------------------------------------------- */
83
84#if (QMF_NO_POLY==5)
85
86#define FUNCTION_qmfForwardModulationLP_odd
87
88#ifdef FUNCTION_qmfForwardModulationLP_odd
89static void
90qmfForwardModulationLP_odd( HANDLE_QMF_FILTER_BANK anaQmf, /*!< Handle of Qmf Analysis Bank  */
91                            const FIXP_QMF *timeIn,        /*!< Time Signal */
92                            FIXP_QMF *rSubband )           /*!< Real Output */
93{
94  int i;
95  int L = anaQmf->no_channels;
96  int M = L>>1;
97  int shift = (anaQmf->no_channels>>6) + 1;
98  int rSubband_e = 0;
99
100  FIXP_QMF *rSubbandPtr0 = &rSubband[M+0];                /* runs with increment */
101  FIXP_QMF *rSubbandPtr1 = &rSubband[M-1];                /* runs with decrement */
102  FIXP_QMF *timeIn0 = (FIXP_DBL *) &timeIn[0];            /* runs with increment */
103  FIXP_QMF *timeIn1 = (FIXP_DBL *) &timeIn[L];            /* runs with increment */
104  FIXP_QMF *timeIn2 = (FIXP_DBL *) &timeIn[L-1];          /* runs with decrement */
105  FIXP_QMF *timeIn3 = (FIXP_DBL *) &timeIn[2*L-1];        /* runs with decrement */
106
107  for (i = 0; i < M; i++)
108  {
109    *rSubbandPtr0++ = (*timeIn2-- >> 1) - (*timeIn0++ >> shift);
110    *rSubbandPtr1-- = (*timeIn1++ >> 1) + (*timeIn3-- >> shift);
111  }
112
113  dct_IV(rSubband,L, &rSubband_e);
114}
115#endif /* FUNCTION_qmfForwardModulationLP_odd */
116
117
118/* NEON optimized QMF currently builts only with RVCT toolchain */
119
120#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__)
121
122#if (SAMPLE_BITS == 16)
123#define FUNCTION_qmfAnaPrototypeFirSlot
124#endif
125
126#ifdef FUNCTION_qmfAnaPrototypeFirSlot
127
128#if defined(__GNUC__)	/* cppp replaced: elif */
129
130inline INT SMULBB (const SHORT a, const LONG b)
131{
132  INT result ;
133  __asm__ ("smulbb %0, %1, %2"
134     : "=r" (result)
135     : "r" (a), "r" (b)) ;
136  return result ;
137}
138inline INT SMULBT (const SHORT a, const LONG b)
139{
140  INT result ;
141  __asm__ ("smulbt %0, %1, %2"
142     : "=r" (result)
143     : "r" (a), "r" (b)) ;
144  return result ;
145}
146
147inline INT SMLABB(const LONG accu, const SHORT a, const LONG b)
148{
149  INT result ;
150  __asm__ ("smlabb %0, %1, %2,%3"
151     : "=r" (result)
152     : "r" (a), "r" (b), "r" (accu)) ;
153  return result;
154}
155inline INT SMLABT(const LONG accu, const SHORT a, const LONG b)
156{
157  INT result ;
158  __asm__ ("smlabt %0, %1, %2,%3"
159     : "=r" (result)
160     : "r" (a), "r" (b), "r" (accu)) ;
161  return result;
162}
163#endif /* compiler selection  */
164
165
166void qmfAnaPrototypeFirSlot( FIXP_QMF *analysisBuffer,
167                             int       no_channels,             /*!< Number channels of analysis filter */
168                             const FIXP_PFT *p_filter,
169                             int       p_stride,                /*!< Stide of analysis filter    */
170                             FIXP_QAS *RESTRICT pFilterStates
171                            )
172{
173  LONG *p_flt = (LONG *) p_filter;
174  LONG flt;
175  FIXP_QMF *RESTRICT pData_0 = analysisBuffer + 2*no_channels - 1;
176  FIXP_QMF *RESTRICT pData_1 = analysisBuffer;
177
178  FIXP_QAS *RESTRICT sta_0 = (FIXP_QAS *)pFilterStates;
179  FIXP_QAS *RESTRICT sta_1 = (FIXP_QAS *)pFilterStates + (2*QMF_NO_POLY*no_channels) - 1;
180
181  FIXP_DBL accu0, accu1;
182  FIXP_QAS sta0, sta1;
183
184  int staStep1 =  no_channels<<1;
185  int staStep2 = (no_channels<<3) - 1; /* Rewind one less */
186
187  if (p_stride == 1)
188  {
189    /* FIR filter 0 */
190    flt = *p_flt++;
191    sta1 = *sta_1;  sta_1 -= staStep1;
192    accu1 = SMULBB(        sta1, flt);
193    sta1 = *sta_1;  sta_1 -= staStep1;
194    accu1 = SMLABT( accu1, sta1, flt);
195
196    flt = *p_flt++;
197    sta1 = *sta_1;  sta_1 -= staStep1;
198    accu1 = SMLABB( accu1, sta1, flt);
199    sta1 = *sta_1;  sta_1 -= staStep1;
200    accu1 = SMLABT( accu1, sta1, flt);
201
202    flt = *p_flt++;
203    sta1 = *sta_1;  sta_1 += staStep2;
204    accu1 = SMLABB( accu1, sta1, flt);
205    *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
206
207    /* FIR filters 1..63 127..65 or 1..31 63..33 */
208    no_channels >>= 1;
209    for (; --no_channels; )
210    {
211      sta0 = *sta_0; sta_0 += staStep1;  /* 1,3,5, ... 29/61 */
212      sta1 = *sta_1; sta_1 -= staStep1;
213      accu0 = SMULBT(        sta0, flt);
214      accu1 = SMULBT(        sta1, flt);
215
216      flt = *p_flt++;
217      sta0 = *sta_0; sta_0 += staStep1;
218      sta1 = *sta_1; sta_1 -= staStep1;
219      accu0 = SMLABB( accu0, sta0, flt);
220      accu1 = SMLABB( accu1, sta1, flt);
221
222      sta0 = *sta_0; sta_0 += staStep1;
223      sta1 = *sta_1; sta_1 -= staStep1;
224      accu0 = SMLABT( accu0, sta0, flt);
225      accu1 = SMLABT( accu1, sta1, flt);
226
227      flt = *p_flt++;
228      sta0 = *sta_0; sta_0 += staStep1;
229      sta1 = *sta_1; sta_1 -= staStep1;
230      accu0 = SMLABB( accu0, sta0, flt);
231      accu1 = SMLABB( accu1, sta1, flt);
232
233      sta0 = *sta_0; sta_0 -= staStep2;
234      sta1 = *sta_1; sta_1 += staStep2;
235      accu0 = SMLABT( accu0, sta0, flt);
236      accu1 = SMLABT( accu1, sta1, flt);
237
238      *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
239      *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
240
241      /* Same sequence as above, but mix B=bottom with T=Top */
242
243      flt = *p_flt++;
244      sta0 = *sta_0; sta_0 += staStep1;  /* 2,4,6, ... 30/62 */
245      sta1 = *sta_1; sta_1 -= staStep1;
246      accu0 = SMULBB(        sta0, flt);
247      accu1 = SMULBB(        sta1, flt);
248
249      sta0 = *sta_0; sta_0 += staStep1;
250      sta1 = *sta_1; sta_1 -= staStep1;
251      accu0 = SMLABT( accu0, sta0, flt);
252      accu1 = SMLABT( accu1, sta1, flt);
253
254      flt = *p_flt++;
255      sta0 = *sta_0; sta_0 += staStep1;
256      sta1 = *sta_1; sta_1 -= staStep1;
257      accu0 = SMLABB( accu0, sta0, flt);
258      accu1 = SMLABB( accu1, sta1, flt);
259
260      sta0 = *sta_0; sta_0 += staStep1;
261      sta1 = *sta_1; sta_1 -= staStep1;
262      accu0 = SMLABT( accu0, sta0, flt);
263      accu1 = SMLABT( accu1, sta1, flt);
264
265      flt = *p_flt++;
266      sta0 = *sta_0; sta_0 -= staStep2;
267      sta1 = *sta_1; sta_1 += staStep2;
268      accu0 = SMLABB( accu0, sta0, flt);
269      accu1 = SMLABB( accu1, sta1, flt);
270
271      *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
272      *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
273    }
274
275    /* FIR filter 31/63 and 33/65 */
276    sta0 = *sta_0; sta_0 += staStep1;
277    sta1 = *sta_1; sta_1 -= staStep1;
278    accu0 = SMULBT(        sta0, flt);
279    accu1 = SMULBT(        sta1, flt);
280
281    flt = *p_flt++;
282    sta0 = *sta_0; sta_0 += staStep1;
283    sta1 = *sta_1; sta_1 -= staStep1;
284    accu0 = SMLABB( accu0, sta0, flt);
285    accu1 = SMLABB( accu1, sta1, flt);
286
287    sta0 = *sta_0; sta_0 += staStep1;
288    sta1 = *sta_1; sta_1 -= staStep1;
289    accu0 = SMLABT( accu0, sta0, flt);
290    accu1 = SMLABT( accu1, sta1, flt);
291
292    flt = *p_flt++;
293    sta0 = *sta_0; sta_0 += staStep1;
294    sta1 = *sta_1; sta_1 -= staStep1;
295    accu0 = SMLABB( accu0, sta0, flt);
296    accu1 = SMLABB( accu1, sta1, flt);
297
298    sta0 = *sta_0; sta_0 -= staStep2;
299    sta1 = *sta_1; sta_1 += staStep2;
300    accu0 = SMLABT( accu0, sta0, flt);
301    accu1 = SMLABT( accu1, sta1, flt);
302
303    *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
304    *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
305
306    /* FIR filter 32/64 */
307    flt = *p_flt++;
308    sta0 = *sta_0; sta_0 += staStep1;
309    sta1 = *sta_1; sta_1 -= staStep1;
310    accu0 = SMULBB(        sta0, flt);
311    accu1 = SMULBB(        sta1, flt);
312
313    sta0 = *sta_0; sta_0 += staStep1;
314    sta1 = *sta_1; sta_1 -= staStep1;
315    accu0 = SMLABT( accu0, sta0, flt);
316    accu1 = SMLABT( accu1, sta1, flt);
317
318    flt = *p_flt++;
319    sta0 = *sta_0; sta_0 += staStep1;
320    sta1 = *sta_1; sta_1 -= staStep1;
321    accu0 = SMLABB( accu0, sta0, flt);
322    accu1 = SMLABB( accu1, sta1, flt);
323
324    sta0 = *sta_0; sta_0 += staStep1;
325    sta1 = *sta_1; sta_1 -= staStep1;
326    accu0 = SMLABT( accu0, sta0, flt);
327    accu1 = SMLABT( accu1, sta1, flt);
328
329    flt = *p_flt;
330    sta0 = *sta_0;
331    sta1 = *sta_1;
332    accu0 = SMLABB( accu0, sta0, flt);
333    accu1 = SMLABB( accu1, sta1, flt);
334
335    *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
336    *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
337  }
338  else
339  {
340    int pfltStep = QMF_NO_POLY * (p_stride-1);
341
342    flt = p_flt[0];
343    sta1 = *sta_1;  sta_1 -= staStep1;
344    accu1 = SMULBB(        sta1, flt);
345    sta1 = *sta_1;  sta_1 -= staStep1;
346    accu1 = SMLABT( accu1, sta1, flt);
347
348    flt = p_flt[1];
349    sta1 = *sta_1;  sta_1 -= staStep1;
350    accu1 = SMLABB( accu1, sta1, flt);
351    sta1 = *sta_1;  sta_1 -= staStep1;
352    accu1 = SMLABT( accu1, sta1, flt);
353
354    flt = p_flt[2]; p_flt += pfltStep;
355    sta1 = *sta_1;  sta_1 += staStep2;
356    accu1 = SMLABB( accu1, sta1, flt);
357    *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
358
359    /* FIR filters 1..63 127..65 or 1..31 63..33 */
360    for (; --no_channels; )
361    {
362      flt = p_flt[0];
363      sta0 = *sta_0; sta_0 += staStep1;
364      sta1 = *sta_1; sta_1 -= staStep1;
365      accu0 = SMULBB(        sta0, flt);
366      accu1 = SMULBB(        sta1, flt);
367
368      sta0 = *sta_0; sta_0 += staStep1;
369      sta1 = *sta_1; sta_1 -= staStep1;
370      accu0 = SMLABT( accu0, sta0, flt);
371      accu1 = SMLABT( accu1, sta1, flt);
372
373      flt = p_flt[1];
374      sta0 = *sta_0; sta_0 += staStep1;
375      sta1 = *sta_1; sta_1 -= staStep1;
376      accu0 = SMLABB( accu0, sta0, flt);
377      accu1 = SMLABB( accu1, sta1, flt);
378
379      sta0 = *sta_0; sta_0 += staStep1;
380      sta1 = *sta_1; sta_1 -= staStep1;
381      accu0 = SMLABT( accu0, sta0, flt);
382      accu1 = SMLABT( accu1, sta1, flt);
383
384      flt = p_flt[2]; p_flt += pfltStep;
385      sta0 = *sta_0; sta_0 -= staStep2;
386      sta1 = *sta_1; sta_1 += staStep2;
387      accu0 = SMLABB( accu0, sta0, flt);
388      accu1 = SMLABB( accu1, sta1, flt);
389
390      *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
391      *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
392    }
393
394    /* FIR filter 32/64 */
395    flt = p_flt[0];
396    sta0 = *sta_0; sta_0 += staStep1;
397    accu0 = SMULBB(        sta0, flt);
398    sta0 = *sta_0; sta_0 += staStep1;
399    accu0 = SMLABT( accu0, sta0, flt);
400
401    flt = p_flt[1];
402    sta0 = *sta_0; sta_0 += staStep1;
403    accu0 = SMLABB( accu0, sta0, flt);
404    sta0 = *sta_0; sta_0 += staStep1;
405    accu0 = SMLABT( accu0, sta0, flt);
406
407    flt = p_flt[2];
408    sta0 = *sta_0;
409    accu0 = SMLABB( accu0, sta0, flt);
410    *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
411  }
412}
413#endif /* FUNCTION_qmfAnaPrototypeFirSlot */
414#endif /* #if defined(__CC_ARM) && defined(__ARM_ARCH_6__) */
415
416#if ( defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL)
417
418#define FUNCTION_qmfSynPrototypeFirSlot
419
420#if defined(FUNCTION_qmfSynPrototypeFirSlot)
421
422#if defined(__GNUC__)	/* cppp replaced: elif */
423
424inline INT SMULWB (const LONG a, const LONG b)
425{
426  INT result ;
427  __asm__ ("smulwb %0, %1, %2"
428    : "=r" (result)
429    : "r" (a), "r" (b)) ;
430
431  return result ;
432}
433inline INT SMULWT (const LONG a, const LONG b)
434{
435  INT result ;
436  __asm__ ("smulwt %0, %1, %2"
437    : "=r" (result)
438    : "r" (a), "r" (b)) ;
439
440  return result ;
441}
442
443inline INT SMLAWB(const LONG accu, const LONG a, const LONG b)
444{
445  INT result;
446  asm("smlawb %0, %1, %2, %3 "
447        : "=r" (result)
448        : "r" (a), "r" (b), "r" (accu) );
449  return result ;
450}
451
452inline INT SMLAWT(const LONG accu, const LONG a, const LONG b)
453{
454  INT result;
455  asm("smlawt %0, %1, %2, %3 "
456        : "=r" (result)
457        : "r" (a), "r" (b), "r" (accu) );
458  return result ;
459}
460
461#endif /* ARM compiler selector */
462
463
464static void qmfSynPrototypeFirSlot1_filter(FIXP_QMF *RESTRICT realSlot,
465                                           FIXP_QMF *RESTRICT imagSlot,
466                                           const FIXP_DBL *RESTRICT p_flt,
467                                           FIXP_QSS *RESTRICT sta,
468                                           FIXP_DBL *pMyTimeOut,
469                                           int no_channels)
470{
471  /* This code was the base for the above listed assembler sequence */
472  /* It can be used for debugging purpose or further optimizations  */
473  const FIXP_DBL *RESTRICT p_fltm = p_flt + 155;
474
475  do
476  {
477     FIXP_DBL result;
478     FIXP_DBL A, B, real, imag, sta0;
479
480     real = *--realSlot;
481     imag = *--imagSlot;
482     B = p_flt[4];                        /* Bottom=[8] Top=[9]     */
483     A = p_fltm[3];                       /* Bottom=[316] Top=[317] */
484     sta0 = sta[0];                       /* save state[0]          */
485     *sta++ = SMLAWT( sta[1], imag, B );  /* index=9...........319  */
486     *sta++ = SMLAWB( sta[1], real, A );  /* index=316...........6  */
487     *sta++ = SMLAWB( sta[1], imag, B );  /* index=8,18,    ...318  */
488     B = p_flt[3];                        /* Bottom=[6] Top=[7]     */
489     *sta++ = SMLAWT( sta[1], real, A );  /* index=317...........7  */
490     A = p_fltm[4];                       /* Bottom=[318] Top=[319] */
491     *sta++ = SMLAWT( sta[1], imag, B );  /* index=7...........317  */
492     *sta++ = SMLAWB( sta[1], real, A );  /* index=318...........8  */
493     *sta++ = SMLAWB( sta[1], imag, B );  /* index=6...........316  */
494     B = p_flt[2];                        /* Bottom=[X] Top=[5]     */
495     *sta++ = SMLAWT( sta[1], real, A );  /* index=9...........319  */
496     A = p_fltm[2];                       /* Bottom=[X] Top=[315]   */
497     *sta++ =         SMULWT( imag, B );  /* index=5,15, ...   315  */
498     result = SMLAWT( sta0,   real, A );  /* index=315...........5  */
499
500     *pMyTimeOut++ = result;
501
502     real = *--realSlot;
503     imag = *--imagSlot;
504     A = p_fltm[0];                       /* Bottom=[310] Top=[311] */
505     B = p_flt[7];                        /* Bottom=[14]  Top=[15]  */
506     result = SMLAWB( sta[0], real, A );  /* index=310...........0  */
507     *sta++ = SMLAWB( sta[1], imag, B );  /* index=14..........324  */
508     *pMyTimeOut++ = result;
509     B = p_flt[6];                        /* Bottom=[12]  Top=[13]  */
510     *sta++ = SMLAWT( sta[1], real, A );  /* index=311...........1  */
511     A = p_fltm[1];                       /* Bottom=[312] Top=[313] */
512     *sta++ = SMLAWT( sta[1], imag, B );  /* index=13..........323  */
513     *sta++ = SMLAWB( sta[1], real, A );  /* index=312...........2  */
514     *sta++ = SMLAWB( sta[1], imag, B );  /* index=12..........322  */
515     *sta++ = SMLAWT( sta[1], real, A );  /* index=313...........3  */
516     A = p_fltm[2];                       /* Bottom=[314] Top=[315] */
517     B = p_flt[5];                        /* Bottom=[10]  Top=[11]  */
518     *sta++ = SMLAWT( sta[1], imag, B );  /* index=11..........321  */
519     *sta++ = SMLAWB( sta[1], real, A );  /* index=314...........4  */
520     *sta++ =         SMULWB( imag, B );  /* index=10..........320  */
521
522
523     p_flt    += 5;
524     p_fltm   -= 5;
525  }
526  while ((--no_channels) != 0);
527
528}
529
530
531
532INT qmfSynPrototypeFirSlot2(
533                             HANDLE_QMF_FILTER_BANK qmf,
534                             FIXP_QMF *RESTRICT realSlot,            /*!< Input: Pointer to real Slot */
535                             FIXP_QMF *RESTRICT imagSlot,            /*!< Input: Pointer to imag Slot */
536                             INT_PCM  *RESTRICT timeOut,             /*!< Time domain data */
537                             INT       stride                        /*!< Time output buffer stride factor*/
538                            )
539{
540  FIXP_QSS *RESTRICT sta = (FIXP_QSS*)qmf->FilterStates;
541  int no_channels = qmf->no_channels;
542  int scale = ((DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor);
543
544  /* We map an arry of 16-bit values upon an array of 2*16-bit values to read 2 values in one shot */
545  const FIXP_DBL *RESTRICT p_flt  = (FIXP_DBL *) qmf->p_filter;           /* low=[0],   high=[1]   */
546  const FIXP_DBL *RESTRICT p_fltm = (FIXP_DBL *) qmf->p_filter + 155;     /* low=[310], high=[311] */
547
548  FDK_ASSERT(SAMPLE_BITS-1-qmf->outScalefactor >= 0); //   (DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor >= 0);
549  FDK_ASSERT(qmf->p_stride==2 && qmf->no_channels == 32);
550
551  FDK_ASSERT((no_channels&3) == 0);  /* should be a multiple of 4 */
552
553  realSlot += no_channels-1;    // ~~"~~
554  imagSlot += no_channels-1;    // no_channels-1 .. 0
555
556  FIXP_DBL MyTimeOut[32];
557  FIXP_DBL *pMyTimeOut = &MyTimeOut[0];
558
559  for (no_channels = no_channels; no_channels--;)
560  {
561     FIXP_DBL result;
562     FIXP_DBL A, B, real, imag;
563
564     real = *realSlot--;
565     imag = *imagSlot--;
566     A = p_fltm[0];                       /* Bottom=[310] Top=[311] */
567     B = p_flt[7];                        /* Bottom=[14]  Top=[15]  */
568     result = SMLAWB( sta[0], real, A );  /* index=310...........0  */
569     *sta++ = SMLAWB( sta[1], imag, B );  /* index=14..........324  */
570     B = p_flt[6];                        /* Bottom=[12]  Top=[13]  */
571     *sta++ = SMLAWT( sta[1], real, A );  /* index=311...........1  */
572     A = p_fltm[1];                       /* Bottom=[312] Top=[313] */
573     *sta++ = SMLAWT( sta[1], imag, B );  /* index=13..........323  */
574     *sta++ = SMLAWB( sta[1], real, A );  /* index=312...........2  */
575     *sta++ = SMLAWB( sta[1], imag, B );  /* index=12..........322  */
576     *sta++ = SMLAWT( sta[1], real, A );  /* index=313...........3  */
577     A = p_fltm[2];                       /* Bottom=[314] Top=[315] */
578     B = p_flt[5];                        /* Bottom=[10]  Top=[11]  */
579     *sta++ = SMLAWT( sta[1], imag, B );  /* index=11..........321  */
580     *sta++ = SMLAWB( sta[1], real, A );  /* index=314...........4  */
581     *sta++ =         SMULWB( imag, B );  /* index=10..........320  */
582
583     *pMyTimeOut++ = result;
584
585     p_fltm   -= 5;
586     p_flt    += 5;
587  }
588
589  pMyTimeOut = &MyTimeOut[0];
590#if (SAMPLE_BITS == 16)
591  const FIXP_DBL max_pos = (FIXP_DBL) 0x00007FFF << scale;
592  const FIXP_DBL max_neg = (FIXP_DBL) 0xFFFF8001 << scale;
593#else
594  scale = -scale;
595  const FIXP_DBL max_pos = (FIXP_DBL) 0x7FFFFFFF >> scale;
596  const FIXP_DBL max_neg = (FIXP_DBL) 0x80000001 >> scale;
597#endif
598  const FIXP_DBL add_neg = (1 << scale) - 1;
599
600  no_channels = qmf->no_channels;
601
602  timeOut += no_channels*stride;
603
604  FDK_ASSERT(scale >= 0);
605
606  if (qmf->outGain != 0x80000000)
607  {
608    FIXP_DBL gain = qmf->outGain;
609    for (no_channels>>=2; no_channels--;)
610    {
611      FIXP_DBL result1, result2;
612
613      result1 = *pMyTimeOut++;
614      result2 = *pMyTimeOut++;
615
616      result1 = fMult(result1,gain);
617      timeOut -= stride;
618      if (result1 < 0)        result1 += add_neg;
619      if (result1 < max_neg)  result1 = max_neg;
620      if (result1 > max_pos)  result1 = max_pos;
621#if (SAMPLE_BITS == 16)
622      timeOut[0] = result1 >> scale;
623#else
624      timeOut[0] = result1 << scale;
625#endif
626
627      result2 = fMult(result2,gain);
628      timeOut -= stride;
629      if (result2 < 0)        result2 += add_neg;
630      if (result2 < max_neg)  result2 = max_neg;
631      if (result2 > max_pos)  result2 = max_pos;
632#if (SAMPLE_BITS == 16)
633      timeOut[0] = result2 >> scale;
634#else
635      timeOut[0] = result2 << scale;
636#endif
637
638      result1 = *pMyTimeOut++;
639      result2 = *pMyTimeOut++;
640
641      result1 = fMult(result1,gain);
642      timeOut -= stride;
643      if (result1 < 0)        result1 += add_neg;
644      if (result1 < max_neg)  result1 = max_neg;
645      if (result1 > max_pos)  result1 = max_pos;
646#if (SAMPLE_BITS == 16)
647      timeOut[0] = result1 >> scale;
648#else
649      timeOut[0] = result1 << scale;
650#endif
651
652      result2 = fMult(result2,gain);
653      timeOut -= stride;
654      if (result2 < 0)        result2 += add_neg;
655      if (result2 < max_neg)  result2 = max_neg;
656      if (result2 > max_pos)  result2 = max_pos;
657#if (SAMPLE_BITS == 16)
658      timeOut[0] = result2 >> scale;
659#else
660      timeOut[0] = result2 << scale;
661#endif
662    }
663  }
664  else
665  {
666    for (no_channels>>=2; no_channels--;)
667    {
668      FIXP_DBL result1, result2;
669      result1 = *pMyTimeOut++;
670      result2 = *pMyTimeOut++;
671      timeOut -= stride;
672      if (result1 < 0)        result1 += add_neg;
673      if (result1 < max_neg)  result1 = max_neg;
674      if (result1 > max_pos)  result1 = max_pos;
675#if (SAMPLE_BITS == 16)
676      timeOut[0] = result1 >> scale;
677#else
678      timeOut[0] = result1 << scale;
679#endif
680
681      timeOut -= stride;
682      if (result2 < 0)        result2 += add_neg;
683      if (result2 < max_neg)  result2 = max_neg;
684      if (result2 > max_pos)  result2 = max_pos;
685#if (SAMPLE_BITS == 16)
686      timeOut[0] = result2 >> scale;
687#else
688      timeOut[0] = result2 << scale;
689#endif
690
691      result1 = *pMyTimeOut++;
692      result2 = *pMyTimeOut++;
693      timeOut -= stride;
694      if (result1 < 0)        result1 += add_neg;
695      if (result1 < max_neg)  result1 = max_neg;
696      if (result1 > max_pos)  result1 = max_pos;
697#if (SAMPLE_BITS == 16)
698      timeOut[0] = result1 >> scale;
699#else
700      timeOut[0] = result1 << scale;
701#endif
702
703      timeOut -= stride;
704      if (result2 < 0)        result2 += add_neg;
705      if (result2 < max_neg)  result2 = max_neg;
706      if (result2 > max_pos)  result2 = max_pos;
707#if (SAMPLE_BITS == 16)
708      timeOut[0] = result2 >> scale;
709#else
710      timeOut[0] = result2 << scale;
711#endif
712    }
713  }
714  return 0;
715}
716
717static
718void qmfSynPrototypeFirSlot_fallback( HANDLE_QMF_FILTER_BANK qmf,
719                             FIXP_DBL *realSlot,      /*!< Input: Pointer to real Slot */
720                             FIXP_DBL *imagSlot,      /*!< Input: Pointer to imag Slot */
721                             INT_PCM  *timeOut,             /*!< Time domain data */
722                             const int       stride
723                            );
724
725/*!
726  \brief Perform Synthesis Prototype Filtering on a single slot of input data.
727
728  The filter takes 2 * #MAX_SYNTHESIS_CHANNELS of input data and
729  generates #MAX_SYNTHESIS_CHANNELS time domain output samples.
730*/
731
732static
733void qmfSynPrototypeFirSlot( HANDLE_QMF_FILTER_BANK qmf,
734                             FIXP_DBL *realSlot,      /*!< Input: Pointer to real Slot */
735                             FIXP_DBL *imagSlot,      /*!< Input: Pointer to imag Slot */
736                             INT_PCM  *timeOut,             /*!< Time domain data */
737                             const int       stride
738                            )
739{
740    INT err = -1;
741
742    switch (qmf->p_stride) {
743    case 2:
744      err = qmfSynPrototypeFirSlot2(qmf, realSlot, imagSlot, timeOut, stride);
745      break;
746    default:
747      err = -1;
748    }
749
750    /* fallback if configuration not available or failed */
751    if(err!=0) {
752        qmfSynPrototypeFirSlot_fallback(qmf, realSlot, imagSlot, timeOut, stride);
753    }
754}
755#endif /* FUNCTION_qmfSynPrototypeFirSlot */
756
757#endif  /*  ( defined(__CC_ARM) && defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL) */
758
759
760
761/* #####################################################################################*/
762
763
764
765#endif  /* (QMF_NO_POLY==5) */
766
767