1
2/* -----------------------------------------------------------------------------------------------------------
3Software License for The Fraunhofer FDK AAC Codec Library for Android
4
5� Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6  All rights reserved.
7
8 1.    INTRODUCTION
9The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12
13AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16of the MPEG specifications.
17
18Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20individually for the purpose of encoding or decoding bit streams in products that are compliant with
21the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23software may already be covered under those patent licenses when it is used for those licensed purposes only.
24
25Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27applications information and documentation.
28
292.    COPYRIGHT LICENSE
30
31Redistribution and use in source and binary forms, with or without modification, are permitted without
32payment of copyright license fees provided that you satisfy the following conditions:
33
34You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35your modifications thereto in source code form.
36
37You must retain the complete text of this software license in the documentation and/or other materials
38provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40modifications thereto to recipients of copies in binary form.
41
42The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43prior written permission.
44
45You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46software or your modifications thereto.
47
48Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49and the date of any change. For modified versions of the FDK AAC Codec, the term
50"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52
533.    NO PATENT LICENSE
54
55NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57respect to this software.
58
59You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60by appropriate patent licenses.
61
624.    DISCLAIMER
63
64This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69or business interruption, however caused and on any theory of liability, whether in contract, strict
70liability, or tort (including negligence), arising in any way out of the use of this software, even if
71advised of the possibility of such damage.
72
735.    CONTACT INFORMATION
74
75Fraunhofer Institute for Integrated Circuits IIS
76Attention: Audio and Multimedia Departments - FDK AAC LL
77Am Wolfsmantel 33
7891058 Erlangen, Germany
79
80www.iis.fraunhofer.de/amm
81amm-info@iis.fraunhofer.de
82----------------------------------------------------------------------------------------------------------- */
83
84/*!
85  \file   dct.cpp
86  \brief  DCT Implementations
87  Library functions to calculate standard DCTs. This will most likely be replaced by hand-optimized
88  functions for the specific target processor.
89
90  Three different implementations of the dct type II and the dct type III transforms are provided.
91
92  By default implementations which are based on a single, standard complex FFT-kernel are used (dctII_f() and dctIII_f()).
93  These are specifically helpful in cases where optimized FFT libraries are already available. The FFT used in these
94  implementation is FFT rad2 from FDK_tools.
95
96  Of course, one might also use DCT-libraries should they be available. The DCT and DST
97  type IV implementations are only available in a version based on a complex FFT kernel.
98*/
99
100#include "dct.h"
101
102
103#include "FDK_tools_rom.h"
104#include "fft.h"
105
106
107#if defined(__arm__)
108#include "arm/dct_arm.cpp"
109#endif
110
111
112#if !defined(FUNCTION_dct_III)
113void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */
114             FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
115             int L,          /*!< lenght of transform */
116             int *pDat_e
117             )
118{
119  FDK_ASSERT(L == 64 || L == 32);
120  int  i;
121  FIXP_DBL xr, accu1, accu2;
122  int inc;
123  int M = L>>1;
124  int ld_M;
125
126  if (L == 64)  ld_M = 5;
127  else          ld_M = 4;
128
129  /* This loop performs multiplication for index i (i*inc) */
130  inc = (64/2) >> ld_M; /* 64/L */
131
132  FIXP_DBL *pTmp_0 = &tmp[2];
133  FIXP_DBL *pTmp_1 = &tmp[(M-1)*2];
134
135  for(i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
136
137    FIXP_DBL accu3,accu4,accu5,accu6;
138
139    cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle_L64[i*inc]);
140    cplxMultDiv2(&accu4, &accu3, pDat[M+i], pDat[M-i], sin_twiddle_L64[(M-i)*inc]);
141    accu3 >>= 1; accu4 >>= 1;
142
143    /* This method is better for ARM926, that uses operand2 shifted right by 1 always */
144    cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1>>1)), ((accu2>>1) + accu4), sin_twiddle_L64[(4*i)*inc]);
145    xr = (accu1>>1) + accu3;
146    pTmp_0[0] = (xr>>1) - accu5;
147    pTmp_1[0] = (xr>>1) + accu5;
148
149    xr = (accu2>>1) - accu4;
150    pTmp_0[1] =  (xr>>1) - accu6;
151    pTmp_1[1] = -((xr>>1) + accu6);
152
153  }
154
155  xr     = fMultDiv2(pDat[M], sin_twiddle_L64[64/2].v.re );/* cos((PI/(2*L))*M); */
156  tmp[0] = ((pDat[0]>>1) + xr)>>1;
157  tmp[1] = ((pDat[0]>>1) - xr)>>1;
158
159  cplxMultDiv2(&accu2, &accu1, pDat[L - (M/2)], pDat[M/2], sin_twiddle_L64[64/4]);
160  tmp[M]   = accu1>>1;
161  tmp[M+1] = accu2>>1;
162
163  /* dit_fft expects 1 bit scaled input values */
164  fft(M, tmp, pDat_e);
165
166  /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */
167  pTmp_1 = &tmp[L];
168  for (i = M>>1; i--;)
169  {
170    FIXP_DBL tmp1, tmp2, tmp3, tmp4;
171    tmp1 = *tmp++;
172    tmp2 = *tmp++;
173    tmp3 = *--pTmp_1;
174    tmp4 = *--pTmp_1;
175    *pDat++ = tmp1;
176    *pDat++ = tmp3;
177    *pDat++ = tmp2;
178    *pDat++ = tmp4;
179  }
180
181  *pDat_e += 2;
182}
183#endif
184
185#if !defined(FUNCTION_dct_II)
186void dct_II(FIXP_DBL *pDat, /*!< pointer to input/output */
187            FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
188            int L,          /*!< lenght of transform */
189            int *pDat_e
190            )
191{
192    FDK_ASSERT(L == 64 || L == 32);
193    FIXP_DBL accu1,accu2;
194    FIXP_DBL *pTmp_0, *pTmp_1;
195
196    int i;
197    int inc;
198    int M =  L>>1;
199    int ld_M;
200
201    FDK_ASSERT(L == 64 || L == 32);
202    ld_M = 4 + (L >> 6);  /* L=64: 5,  L=32: 4 */
203
204    inc = (64/2) >> ld_M; /* L=64: 1,  L=32: 2 */
205
206    FIXP_DBL *pdat  = &pDat[0];
207    FIXP_DBL accu3, accu4;
208    pTmp_0 = &tmp[0];
209    pTmp_1 = &tmp[L-1];
210    for (i = M>>1; i--; )
211    {
212      accu1 = *pdat++;
213      accu2 = *pdat++;
214      accu3 = *pdat++;
215      accu4 = *pdat++;
216      accu1 >>= 1;
217      accu2 >>= 1;
218      accu3 >>= 1;
219      accu4 >>= 1;
220      *pTmp_0++ = accu1;
221      *pTmp_0++ = accu3;
222      *pTmp_1-- = accu2;
223      *pTmp_1-- = accu4;
224    }
225
226
227    fft(M, tmp, pDat_e);
228
229    pTmp_0 = &tmp[2];
230    pTmp_1 = &tmp[(M-1)*2];
231
232    for (i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
233
234      FIXP_DBL a1,a2;
235      FIXP_DBL accu3, accu4;
236
237      a1 = ((pTmp_0[1]>>1) + (pTmp_1[1]>>1));
238      a2 = ((pTmp_1[0]>>1) - (pTmp_0[0]>>1));
239
240      cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle_L64[(4*i)*inc]);
241      accu1<<=1; accu2<<=1;
242
243      a1 = ((pTmp_0[0]>>1) + (pTmp_1[0]>>1));
244      a2 = ((pTmp_0[1]>>1) - (pTmp_1[1]>>1));
245
246      cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), sin_twiddle_L64[i*inc]);
247      pDat[L - i] = accu4;
248      pDat[i]     = accu3;
249
250      cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), sin_twiddle_L64[(M-i)*inc]);
251      pDat[M + i] = accu4;
252      pDat[M - i] = accu3;
253
254    }
255
256    cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M+1], sin_twiddle_L64[(M/2)*inc]);
257    pDat[L - (M/2)] = accu2;
258    pDat[M/2]       = accu1;
259
260    pDat[0] = (tmp[0]>>1)+(tmp[1]>>1);
261    pDat[M] =  fMult(((tmp[0]>>1)-(tmp[1]>>1)), sin_twiddle_L64[64/2].v.re);/* cos((PI/(2*L))*M); */
262
263    *pDat_e += 2;
264}
265#endif
266
267static
268void getTables(const FIXP_WTP **twiddle, const FIXP_STP **sin_twiddle, int *sin_step, int length)
269{
270  int ld2_length;
271
272 /* Get ld2 of length - 2 + 1
273     -2: because first table entry is window of size 4
274     +1: because we already include +1 because of ceil(log2(length)) */
275  ld2_length = DFRACT_BITS-1-fNormz((FIXP_DBL)length) - 1;
276
277  /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */
278  switch ( (length) >> (ld2_length-1) ) {
279    case 0x4: /* radix 2 */
280      *sin_twiddle = SineTable512;
281      *sin_step = 1<<(9 - ld2_length);
282      *twiddle = windowSlopes[0][0][ld2_length-1];
283      break;
284    case 0x7: /* 10 ms */
285      *sin_twiddle = SineTable480;
286      *sin_step = 1<<(8 - ld2_length);
287      *twiddle = windowSlopes[0][1][ld2_length];
288      break;
289    default:
290      *sin_twiddle = NULL;
291      *sin_step = 0;
292      *twiddle = NULL;
293      break;
294  }
295
296  FDK_ASSERT(*twiddle != NULL);
297
298  FDK_ASSERT(*sin_step > 0);
299
300}
301
302#if !defined(FUNCTION_dct_IV)
303
304void dct_IV(FIXP_DBL *pDat,
305            int L,
306            int *pDat_e)
307{
308  int sin_step = 0;
309  int M = L >> 1;
310
311  const FIXP_WTP *twiddle;
312  const FIXP_STP *sin_twiddle;
313
314  FDK_ASSERT(L >= 4);
315
316  getTables(&twiddle, &sin_twiddle, &sin_step, L);
317
318#ifdef FUNCTION_dct_IV_func1
319  if (M>=4 && (M&3) == 0) {
320     /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
321    dct_IV_func1(M>>2, twiddle,  &pDat[0], &pDat[L-1]);
322  } else
323#endif /* FUNCTION_dct_IV_func1 */
324  {
325    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
326    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
327    register int i;
328
329    /* 29 cycles on ARM926 */
330    for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
331    {
332      register FIXP_DBL accu1,accu2,accu3,accu4;
333
334      accu1 = pDat_1[1]; accu2 = pDat_0[0];
335      accu3 = pDat_0[1]; accu4 = pDat_1[0];
336
337      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
338      cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
339
340      pDat_0[0] = accu2; pDat_0[1] = accu1;
341      pDat_1[0] = accu4; pDat_1[1] = -accu3;
342    }
343    if (M&1)
344    {
345      register FIXP_DBL accu1,accu2;
346
347      accu1 = pDat_1[1]; accu2 = pDat_0[0];
348
349      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
350
351      pDat_0[0] = accu2; pDat_0[1] = accu1;
352    }
353  }
354
355  fft(M, pDat, pDat_e);
356
357#ifdef FUNCTION_dct_IV_func2
358  if (M>=4 && (M&3) == 0) {
359     /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
360    dct_IV_func2(M>>2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
361  } else
362#endif /* FUNCTION_dct_IV_func2 */
363  {
364    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
365    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
366    register FIXP_DBL accu1,accu2,accu3,accu4;
367    int idx, i;
368
369    /* Sin and Cos values are 0.0f and 1.0f */
370    accu1 = pDat_1[0];
371    accu2 = pDat_1[1];
372
373    pDat_1[1] = -(pDat_0[1]>>1);
374    pDat_0[0] = (pDat_0[0]>>1);
375
376
377    /* 28 cycles for ARM926 */
378    for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
379    {
380      FIXP_STP twd = sin_twiddle[idx];
381      cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
382      pDat_0[1] =  accu3;
383      pDat_1[0] =  accu4;
384
385      pDat_0+=2;
386      pDat_1-=2;
387
388      cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
389
390      accu1 = pDat_1[0];
391      accu2 = pDat_1[1];
392
393      pDat_1[1] = -accu3;
394      pDat_0[0] =  accu4;
395    }
396
397    if ( (M&1) == 0 )
398    {
399      /* Last Sin and Cos value pair are the same */
400      accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
401      accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
402
403      pDat_1[0] = accu1 + accu2;
404      pDat_0[1] = accu1 - accu2;
405    }
406  }
407
408  /* Add twiddeling scale. */
409  *pDat_e += 2;
410}
411#endif /* defined (FUNCTION_dct_IV) */
412
413#if !defined(FUNCTION_dst_IV)
414void dst_IV(FIXP_DBL *pDat,
415            int L,
416            int *pDat_e )
417{
418  int sin_step = 0;
419  int M = L >> 1;
420
421  const FIXP_WTP *twiddle;
422  const FIXP_STP *sin_twiddle;
423
424#ifdef DSTIV2_ENABLE
425  if (L == 2) {
426    const FIXP_STP tab = STCP(0x7641AF3D, 0x30FB9452);
427    FIXP_DBL tmp1, tmp2;
428
429    cplxMultDiv2(&tmp2, &tmp1, pDat[0], pDat[1], tab);
430
431    pDat[0] = tmp1;
432    pDat[1] = tmp2;
433
434    *pDat_e += 1;
435
436    return;
437  }
438#else
439  FDK_ASSERT(L >= 4);
440#endif
441
442  getTables(&twiddle, &sin_twiddle, &sin_step, L);
443
444#ifdef FUNCTION_dst_IV_func1
445  if ( (M>=4) && ((M&3) == 0) ) {
446    dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
447  } else
448#endif
449  {
450    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
451    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
452
453    register int i;
454
455    /* 34 cycles on ARM926 */
456    for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
457    {
458      register FIXP_DBL accu1,accu2,accu3,accu4;
459
460      accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
461      accu3 =  pDat_0[1]; accu4 = -pDat_1[0];
462
463      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
464      cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
465
466      pDat_0[0] = accu2; pDat_0[1] = accu1;
467      pDat_1[0] = accu4; pDat_1[1] = -accu3;
468    }
469    if (M&1)
470    {
471      register FIXP_DBL accu1,accu2;
472
473      accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
474
475      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
476
477      pDat_0[0] = accu2; pDat_0[1] = accu1;
478    }
479  }
480
481  fft(M, pDat, pDat_e);
482
483#ifdef FUNCTION_dst_IV_func2
484  if ( (M>=4) && ((M&3) == 0) ) {
485    dst_IV_func2(M>>2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], sin_step);
486  } else
487#endif /* FUNCTION_dst_IV_func2 */
488  {
489    FIXP_DBL *RESTRICT pDat_0;
490    FIXP_DBL *RESTRICT pDat_1;
491    register FIXP_DBL accu1,accu2,accu3,accu4;
492    int idx, i;
493
494    pDat_0 = &pDat[0];
495    pDat_1 = &pDat[L - 2];
496
497    /* Sin and Cos values are 0.0f and 1.0f */
498    accu1 = pDat_1[0];
499    accu2 = pDat_1[1];
500
501    pDat_1[1] = -(pDat_0[0]>>1);
502    pDat_0[0] = (pDat_0[1]>>1);
503
504    for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
505    {
506      FIXP_STP twd = sin_twiddle[idx];
507
508      cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
509      pDat_1[0] =  -accu3;
510      pDat_0[1] =  -accu4;
511
512      pDat_0+=2;
513      pDat_1-=2;
514
515      cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
516
517      accu1 = pDat_1[0];
518      accu2 = pDat_1[1];
519
520      pDat_0[0] =  accu3;
521      pDat_1[1] = -accu4;
522    }
523
524    if ( (M&1) == 0 )
525    {
526      /* Last Sin and Cos value pair are the same */
527      accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
528      accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
529
530      pDat_0[1] = - accu1 - accu2;
531      pDat_1[0] =   accu2 - accu1;
532    }
533  }
534
535  /* Add twiddeling scale. */
536  *pDat_e += 2;
537}
538#endif /* !defined(FUNCTION_dst_IV) */
539
540
541