1
2/* -----------------------------------------------------------------------------------------------------------
3Software License for The Fraunhofer FDK AAC Codec Library for Android
4
5� Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6  All rights reserved.
7
8 1.    INTRODUCTION
9The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12
13AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16of the MPEG specifications.
17
18Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20individually for the purpose of encoding or decoding bit streams in products that are compliant with
21the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23software may already be covered under those patent licenses when it is used for those licensed purposes only.
24
25Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27applications information and documentation.
28
292.    COPYRIGHT LICENSE
30
31Redistribution and use in source and binary forms, with or without modification, are permitted without
32payment of copyright license fees provided that you satisfy the following conditions:
33
34You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35your modifications thereto in source code form.
36
37You must retain the complete text of this software license in the documentation and/or other materials
38provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40modifications thereto to recipients of copies in binary form.
41
42The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43prior written permission.
44
45You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46software or your modifications thereto.
47
48Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49and the date of any change. For modified versions of the FDK AAC Codec, the term
50"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52
533.    NO PATENT LICENSE
54
55NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57respect to this software.
58
59You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60by appropriate patent licenses.
61
624.    DISCLAIMER
63
64This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69or business interruption, however caused and on any theory of liability, whether in contract, strict
70liability, or tort (including negligence), arising in any way out of the use of this software, even if
71advised of the possibility of such damage.
72
735.    CONTACT INFORMATION
74
75Fraunhofer Institute for Integrated Circuits IIS
76Attention: Audio and Multimedia Departments - FDK AAC LL
77Am Wolfsmantel 33
7891058 Erlangen, Germany
79
80www.iis.fraunhofer.de/amm
81amm-info@iis.fraunhofer.de
82----------------------------------------------------------------------------------------------------------- */
83
84
85
86#ifdef FUNCTION_dct_IV_func1
87
88/*
89   Note: This assembler routine is here, because the ARM926 compiler does
90         not encode the inline assembler with optimal speed.
91         With this version, we save 2 cycles per loop iteration.
92*/
93
94__asm  void dct_IV_func1(
95    int i,
96    const FIXP_SPK *twiddle,
97    FIXP_DBL *RESTRICT pDat_0,
98    FIXP_DBL *RESTRICT pDat_1)
99{
100    /* Register map:
101       r0   i
102       r1   twiddle
103       r2   pDat_0
104       r3   pDat_1
105       r4   accu1
106       r5   accu2
107       r6   accu3
108       r7   accu4
109       r8   val_tw
110       r9   accuX
111    */
112    PUSH    {r4-r9}
113
114     /* 44 cycles for 2 iterations = 22 cycles/iteration */
115dct_IV_loop1_start
116/*  First iteration */
117    LDR     r8, [r1], #4    // val_tw = *twiddle++;
118    LDR     r5, [r2, #0]    // accu2 = pDat_0[0]
119    LDR     r4, [r3, #0]    // accu1 = pDat_1[0]
120
121    SMULWT  r9, r5, r8      // accuX = accu2*val_tw.l
122    SMULWB  r5, r5, r8      // accu2 = accu2*val_tw.h
123    RSB     r9, r9, #0      // accuX =-accu2*val_tw.l
124    SMLAWT  r5, r4, r8, r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
125    SMLAWB  r4, r4, r8, r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
126
127    LDR     r8, [r1], #4    // val_tw = *twiddle++;
128    LDR     r7, [r3, #-4]   // accu4 = pDat_1[-1]
129    LDR     r6, [r2, #4]    // accu3 = pDat_0[1]
130
131    SMULWB  r9, r7, r8      // accuX = accu4*val_tw.h
132    SMULWT  r7, r7, r8      // accu4 = accu4*val_tw.l
133    RSB     r9, r9, #0      // accuX =-accu4*val_tw.h
134    SMLAWB  r7, r6, r8, r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
135    SMLAWT  r6, r6, r8, r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
136
137    STR     r5, [r2], #4    // *pDat_0++ = accu2
138    STR     r4, [r2], #4    // *pDat_0++ = accu1
139    STR     r6, [r3], #-4   // *pDat_1-- = accu3
140    STR     r7, [r3], #-4   // *pDat_1-- = accu4
141
142/*  Second iteration */
143    LDR     r8, [r1], #4    // val_tw = *twiddle++;
144    LDR     r5, [r2, #0]    // accu2 = pDat_0[0]
145    LDR     r4, [r3, #0]    // accu1 = pDat_1[0]
146
147    SMULWT  r9, r5, r8      // accuX = accu2*val_tw.l
148    SMULWB  r5, r5, r8      // accu2 = accu2*val_tw.h
149    RSB     r9, r9, #0      // accuX =-accu2*val_tw.l
150    SMLAWT  r5, r4, r8, r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
151    SMLAWB  r4, r4, r8, r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
152
153    LDR     r8, [r1], #4    // val_tw = *twiddle++;
154    LDR     r7, [r3, #-4]   // accu4 = pDat_1[-1]
155    LDR     r6, [r2, #4]    // accu3 = pDat_0[1]
156
157    SMULWB  r9, r7, r8      // accuX = accu4*val_tw.h
158    SMULWT  r7, r7, r8      // accu4 = accu4*val_tw.l
159    RSB     r9, r9, #0      // accuX =-accu4*val_tw.h
160    SMLAWB  r7, r6, r8, r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
161    SMLAWT  r6, r6, r8, r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
162
163    STR     r5, [r2], #4    // *pDat_0++ = accu2
164    STR     r4, [r2], #4    // *pDat_0++ = accu1
165    STR     r6, [r3], #-4   // *pDat_1-- = accu3
166    STR     r7, [r3], #-4   // *pDat_1-- = accu4
167
168    SUBS    r0, r0, #1
169    BNE     dct_IV_loop1_start
170
171    POP     {r4-r9}
172
173    BX      lr
174}
175
176#endif /* FUNCTION_dct_IV_func1 */
177
178
179#ifdef FUNCTION_dct_IV_func2
180
181FDK_INLINE
182/* __attribute__((noinline)) */
183static void dct_IV_func2(
184    int i,
185    const FIXP_SPK *twiddle,
186    FIXP_DBL *pDat_0,
187    FIXP_DBL *pDat_1,
188    int inc)
189{
190  FIXP_DBL accu1, accu2, accu3, accu4, accuX;
191  LONG val_tw;
192
193  accu1 = pDat_1[-2];
194  accu2 = pDat_1[-1];
195
196  *--pDat_1 = -(pDat_0[1]>>1);
197  *pDat_0++ = (pDat_0[0]>>1);
198
199  twiddle += inc;
200
201__asm
202  {
203    LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
204    B       dct_IV_loop2_2nd_part
205
206    /* 42 cycles for 2 iterations = 21 cycles/iteration */
207dct_IV_loop2:
208    SMULWT  accuX, accu2, val_tw
209    SMULWB  accu2, accu2, val_tw
210    RSB     accuX, accuX, #0
211    SMLAWB  accuX, accu1, val_tw, accuX
212    SMLAWT  accu2, accu1, val_tw, accu2
213    STR     accuX, [pDat_0], #4
214    STR     accu2, [pDat_1, #-4] !
215
216    LDR     accu4, [pDat_0, #4]
217    LDR     accu3, [pDat_0]
218    SMULWB  accuX, accu4, val_tw
219    SMULWT  accu4, accu4, val_tw
220    RSB     accuX, accuX, #0
221    SMLAWT  accuX, accu3, val_tw, accuX
222    SMLAWB  accu4, accu3, val_tw, accu4
223
224    LDR     accu1, [pDat_1, #-8]
225    LDR     accu2, [pDat_1, #-4]
226
227    LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
228
229    STR     accuX, [pDat_1, #-4] !
230    STR     accu4, [pDat_0], #4
231
232dct_IV_loop2_2nd_part:
233    SMULWT  accuX, accu2, val_tw
234    SMULWB  accu2, accu2, val_tw
235    RSB     accuX, accuX, #0
236    SMLAWB  accuX, accu1, val_tw, accuX
237    SMLAWT  accu2, accu1, val_tw, accu2
238    STR     accuX, [pDat_0], #4
239    STR     accu2, [pDat_1, #-4] !
240
241    LDR     accu4, [pDat_0, #4]
242    LDR     accu3, [pDat_0]
243    SMULWB  accuX, accu4, val_tw
244    SMULWT  accu4, accu4, val_tw
245    RSB     accuX, accuX, #0
246    SMLAWT  accuX, accu3, val_tw, accuX
247    SMLAWB  accu4, accu3, val_tw, accu4
248
249    LDR     accu1, [pDat_1, #-8]
250    LDR     accu2, [pDat_1, #-4]
251
252    STR     accuX, [pDat_1, #-4] !
253    STR     accu4, [pDat_0], #4
254
255    LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
256
257    SUBS    i, i, #1
258    BNE     dct_IV_loop2
259  }
260
261  /* Last Sin and Cos value pair are the same */
262  accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
263  accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
264
265  *--pDat_1 = accu1 + accu2;
266  *pDat_0++ = accu1 - accu2;
267}
268#endif /* FUNCTION_dct_IV_func2 */
269
270
271#ifdef FUNCTION_dst_IV_func1
272
273__asm void dst_IV_func1(
274    int i,
275    const FIXP_SPK *twiddle,
276    FIXP_DBL *pDat_0,
277    FIXP_DBL *pDat_1)
278{
279    /* Register map:
280       r0   i
281       r1   twiddle
282       r2   pDat_0
283       r3   pDat_1
284       r4   accu1
285       r5   accu2
286       r6   accu3
287       r7   accu4
288       r8   val_tw
289       r9   accuX
290    */
291    PUSH    {r4-r9}
292
293dst_IV_loop1
294    LDR     r8, [r1], #4               // val_tw = *twiddle++
295    LDR     r5, [r2]                   // accu2 = pDat_0[0]
296    LDR     r6, [r2, #4]               // accu3 = pDat_0[1]
297    RSB     r5, r5, #0                 // accu2 = -accu2
298    SMULWT  r9, r5, r8                 // accuX = (-accu2)*val_tw.l
299    LDR     r4, [r3, #-4]              // accu1 = pDat_1[-1]
300    RSB     r9, r9, #0                 // accuX = -(-accu2)*val_tw.l
301    SMLAWB  r9, r4, r8, r9             // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
302    SMULWT  r4, r4, r8                 // accu1 = accu1*val_tw.l
303    LDR     r7, [r3, #-8]              // accu4 = pDat_1[-2]
304    SMLAWB  r5, r5, r8, r4             // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
305    LDR     r8, [r1], #4               // val_tw = *twiddle++
306    STR     r5, [r2], #4               // *pDat_0++ = accu2
307    STR     r9, [r2], #4               // *pDat_0++ = accu1 (accuX)
308    RSB     r7, r7, #0                 // accu4 = -accu4
309    SMULWB  r5, r7, r8                 // accu2 = (-accu4)*val_tw.h
310    SMULWB  r4, r6, r8                 // accu1 = (-accu4)*val_tw.l
311    RSB     r5, r5, #0                 // accu2 = -(-accu4)*val_tw.h
312    SMLAWT  r6, r6, r8, r5             // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
313    SMLAWT  r7, r7, r8, r4             // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
314    STR     r6, [r3, #-4] !            // *--pDat_1 = accu3
315    STR     r7, [r3, #-4] !            // *--pDat_1 = accu4
316
317    LDR     r8, [r1], #4               // val_tw = *twiddle++
318    LDR     r5, [r2]                   // accu2 = pDat_0[0]
319    LDR     r6, [r2, #4]               // accu3 = pDat_0[1]
320    RSB     r5, r5, #0                 // accu2 = -accu2
321    SMULWT  r9, r5, r8                 // accuX = (-accu2)*val_tw.l
322    LDR     r4, [r3, #-4]              // accu1 = pDat_1[-1]
323    RSB     r9, r9, #0                 // accuX = -(-accu2)*val_tw.l
324    SMLAWB  r9, r4, r8, r9             // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
325    SMULWT  r4, r4, r8                 // accu1 = accu1*val_tw.l
326    LDR     r7, [r3, #-8]              // accu4 = pDat_1[-2]
327    SMLAWB  r5, r5, r8, r4             // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
328    LDR     r8, [r1], #4               // val_tw = *twiddle++
329    STR     r5, [r2], #4               // *pDat_0++ = accu2
330    STR     r9, [r2], #4               // *pDat_0++ = accu1 (accuX)
331    RSB     r7, r7, #0                 // accu4 = -accu4
332    SMULWB  r5, r7, r8                 // accu2 = (-accu4)*val_tw.h
333    SMULWB  r4, r6, r8                 // accu1 = (-accu4)*val_tw.l
334    RSB     r5, r5, #0                 // accu2 = -(-accu4)*val_tw.h
335    SMLAWT  r6, r6, r8, r5             // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
336    SMLAWT  r7, r7, r8, r4             // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
337    STR     r6, [r3, #-4] !            // *--pDat_1 = accu3
338    STR     r7, [r3, #-4] !            // *--pDat_1 = accu4
339
340    SUBS    r0, r0, #4                 // i-= 4
341    BNE     dst_IV_loop1
342
343    POP     {r4-r9}
344    BX      lr
345}
346#endif /* FUNCTION_dst_IV_func1 */
347
348#ifdef FUNCTION_dst_IV_func2
349
350FDK_INLINE
351/* __attribute__((noinline)) */
352static void dst_IV_func2(
353    int i,
354    const FIXP_SPK *twiddle,
355    FIXP_DBL *RESTRICT pDat_0,
356    FIXP_DBL *RESTRICT pDat_1,
357    int inc)
358{
359  FIXP_DBL accu1,accu2,accu3,accu4;
360  LONG val_tw;
361
362  accu4 = pDat_0[0];
363  accu3 = pDat_0[1];
364  accu4 >>= 1;
365  accu3 >>= 1;
366  accu4 = -accu4;
367
368  accu1 = pDat_1[-1];
369  accu2 = pDat_1[0];
370
371  *pDat_0++ = accu3;
372  *pDat_1-- = accu4;
373
374
375  __asm
376  {
377    B       dst_IV_loop2_2nd_part
378
379    /* 50 cycles for 2 iterations = 25 cycles/iteration */
380
381dst_IV_loop2:
382
383    LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
384
385    RSB     accu2, accu2, #0                  // accu2 = -accu2
386    RSB     accu1, accu1, #0                  // accu1 = -accu1
387    SMULWT  accu3, accu2, val_tw              // accu3 = (-accu2)*val_tw.l
388    SMULWT  accu4, accu1, val_tw              // accu4 = (-accu1)*val_tw.l
389    RSB     accu3, accu3, #0                  // accu3 = -accu2*val_tw.l
390    SMLAWB  accu1, accu1, val_tw, accu3       // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
391    SMLAWB  accu2, accu2, val_tw, accu4       // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
392    STR     accu1, [pDat_1], #-4              // *pDat_1-- = accu1
393  	STR     accu2, [pDat_0], #4               // *pDat_0++ = accu2
394
395  	LDR     accu4, [pDat_0]                   // accu4 = pDat_0[0]
396  	LDR     accu3, [pDat_0, #4]               // accu3 = pDat_0[1]
397
398    RSB     accu4, accu4, #0                  // accu4 = -accu4
399    RSB     accu3, accu3, #0                  // accu3 = -accu3
400
401    SMULWB  accu1, accu3, val_tw              // accu1 = (-accu3)*val_tw.h
402    SMULWT  accu2, accu3, val_tw              // accu2 = (-accu3)*val_tw.l
403    RSB     accu1, accu1, #0                  // accu1 = -(-accu3)*val_tw.h
404    SMLAWT  accu3, accu4, val_tw, accu1       // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
405    SMLAWB  accu4, accu4, val_tw, accu2       // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
406
407    LDR     accu1, [pDat_1, #-4]              // accu1 = pDat_1[-1]
408    LDR     accu2, [pDat_1]                   // accu2 = pDat_1[0]
409
410    STR     accu3, [pDat_0], #4               // *pDat_0++ = accu3
411    STR     accu4, [pDat_1], #-4              // *pDat_1-- = accu4
412
413dst_IV_loop2_2nd_part:
414
415    LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
416
417    RSB     accu2, accu2, #0                  // accu2 = -accu2
418    RSB     accu1, accu1, #0                  // accu1 = -accu1
419    SMULWT  accu3, accu2, val_tw              // accu3 = (-accu2)*val_tw.l
420    SMULWT  accu4, accu1, val_tw              // accu4 = (-accu1)*val_tw.l
421    RSB     accu3, accu3, #0                  // accu3 = -accu2*val_tw.l
422    SMLAWB  accu1, accu1, val_tw, accu3       // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
423    SMLAWB  accu2, accu2, val_tw, accu4       // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
424    STR     accu1, [pDat_1], #-4              // *pDat_1-- = accu1
425  	STR     accu2, [pDat_0], #4               // *pDat_0++ = accu2
426
427  	LDR     accu4, [pDat_0]                   // accu4 = pDat_0[0]
428  	LDR     accu3, [pDat_0, #4]               // accu3 = pDat_0[1]
429
430    RSB     accu4, accu4, #0                  // accu4 = -accu4
431    RSB     accu3, accu3, #0                  // accu3 = -accu3
432
433    SMULWB  accu1, accu3, val_tw              // accu1 = (-accu3)*val_tw.h
434    SMULWT  accu2, accu3, val_tw              // accu2 = (-accu3)*val_tw.l
435    RSB     accu1, accu1, #0                  // accu1 = -(-accu3)*val_tw.h
436    SMLAWT  accu3, accu4, val_tw, accu1       // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
437    SMLAWB  accu4, accu4, val_tw, accu2       // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
438
439    LDR     accu1, [pDat_1, #-4]              // accu1 = pDat_1[-1]
440    LDR     accu2, [pDat_1]                   // accu2 = pDat_1[0]
441
442    STR     accu3, [pDat_0], #4               // *pDat_0++ = accu3
443    STR     accu4, [pDat_1], #-4              // *pDat_1-- = accu4
444
445    SUBS    i, i, #1
446    BNE     dst_IV_loop2
447  }
448
449  /* Last Sin and Cos value pair are the same */
450  accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
451  accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
452
453  *pDat_0 = accu1 + accu2;
454  *pDat_1 = accu1 - accu2;
455}
456#endif /* FUNCTION_dst_IV_func2 */
457