1/* -----------------------------------------------------------------------------
2Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5Forschung e.V. All rights reserved.
6
7 1.    INTRODUCTION
8The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10scheme for digital audio. This FDK AAC Codec software is intended to be used on
11a wide variety of Android devices.
12
13AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14general perceptual audio codecs. AAC-ELD is considered the best-performing
15full-bandwidth communications codec by independent studies and is widely
16deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17specifications.
18
19Patent licenses for necessary patent claims for the FDK AAC Codec (including
20those of Fraunhofer) may be obtained through Via Licensing
21(www.vialicensing.com) or through the respective patent owners individually for
22the purpose of encoding or decoding bit streams in products that are compliant
23with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24Android devices already license these patent claims through Via Licensing or
25directly from the patent owners, and therefore FDK AAC Codec software may
26already be covered under those patent licenses when it is used for those
27licensed purposes only.
28
29Commercially-licensed AAC software libraries, including floating-point versions
30with enhanced sound quality, are also available from Fraunhofer. Users are
31encouraged to check the Fraunhofer website for additional applications
32information and documentation.
33
342.    COPYRIGHT LICENSE
35
36Redistribution and use in source and binary forms, with or without modification,
37are permitted without payment of copyright license fees provided that you
38satisfy the following conditions:
39
40You must retain the complete text of this software license in redistributions of
41the FDK AAC Codec or your modifications thereto in source code form.
42
43You must retain the complete text of this software license in the documentation
44and/or other materials provided with redistributions of the FDK AAC Codec or
45your modifications thereto in binary form. You must make available free of
46charge copies of the complete source code of the FDK AAC Codec and your
47modifications thereto to recipients of copies in binary form.
48
49The name of Fraunhofer may not be used to endorse or promote products derived
50from this library without prior written permission.
51
52You may not charge copyright license fees for anyone to use, copy or distribute
53the FDK AAC Codec software or your modifications thereto.
54
55Your modified versions of the FDK AAC Codec must carry prominent notices stating
56that you changed the software and the date of any change. For modified versions
57of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59AAC Codec Library for Android."
60
613.    NO PATENT LICENSE
62
63NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65Fraunhofer provides no warranty of patent non-infringement with respect to this
66software.
67
68You may use this FDK AAC Codec software or modifications thereto only for
69purposes that are authorized by appropriate patent licenses.
70
714.    DISCLAIMER
72
73This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75including but not limited to the implied warranties of merchantability and
76fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78or consequential damages, including but not limited to procurement of substitute
79goods or services; loss of use, data, or profits, or business interruption,
80however caused and on any theory of liability, whether in contract, strict
81liability, or tort (including negligence), arising in any way out of the use of
82this software, even if advised of the possibility of such damage.
83
845.    CONTACT INFORMATION
85
86Fraunhofer Institute for Integrated Circuits IIS
87Attention: Audio and Multimedia Departments - FDK AAC LL
88Am Wolfsmantel 33
8991058 Erlangen, Germany
90
91www.iis.fraunhofer.de/amm
92amm-info@iis.fraunhofer.de
93----------------------------------------------------------------------------- */
94
95/******************* Library for basic calculation routines ********************
96
97   Author(s):
98
99   Description:
100
101*******************************************************************************/
102
103#ifdef FUNCTION_dct_IV_func1
104
105/*
106   Note: This assembler routine is here, because the ARM926 compiler does
107         not encode the inline assembler with optimal speed.
108         With this version, we save 2 cycles per loop iteration.
109*/
110
111__asm void dct_IV_func1(int i, const FIXP_SPK *twiddle,
112                        FIXP_DBL *RESTRICT pDat_0, FIXP_DBL *RESTRICT pDat_1) {
113  /* Register map:
114     r0   i
115     r1   twiddle
116     r2   pDat_0
117     r3   pDat_1
118     r4   accu1
119     r5   accu2
120     r6   accu3
121     r7   accu4
122     r8   val_tw
123     r9   accuX
124  */
125  PUSH{r4 - r9}
126
127  /* 44 cycles for 2 iterations = 22 cycles/iteration */
128  dct_IV_loop1_start
129      /*  First iteration */
130      LDR r8,
131      [r1],
132# 4 // val_tw = *twiddle++;
133      LDR r5,
134      [ r2, #0 ]  // accu2 = pDat_0[0]
135      LDR r4,
136      [ r3, #0 ]  // accu1 = pDat_1[0]
137
138      SMULWT r9,
139      r5,
140      r8  // accuX = accu2*val_tw.l
141          SMULWB r5,
142      r5,
143      r8  // accu2 = accu2*val_tw.h
144          RSB r9,
145      r9,
146# 0 // accuX =-accu2*val_tw.l
147      SMLAWT r5, r4, r8,
148      r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
149          SMLAWB r4,
150      r4, r8,
151      r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
152
153          LDR r8,
154      [r1],
155# 4 // val_tw = *twiddle++;
156      LDR r7,
157      [ r3, # - 4 ]  // accu4 = pDat_1[-1]
158      LDR r6,
159      [ r2, #4 ]  // accu3 = pDat_0[1]
160
161      SMULWB r9,
162      r7,
163      r8  // accuX = accu4*val_tw.h
164          SMULWT r7,
165      r7,
166      r8  // accu4 = accu4*val_tw.l
167          RSB r9,
168      r9,
169# 0 // accuX =-accu4*val_tw.h
170      SMLAWB r7, r6, r8,
171      r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
172          SMLAWT r6,
173      r6, r8,
174      r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
175
176          STR r5,
177      [r2],
178# 4 // *pDat_0++ = accu2
179      STR r4, [r2],
180# 4 // *pDat_0++ = accu1
181      STR r6, [r3],
182#- 4 // *pDat_1-- = accu3
183      STR r7, [r3],
184#- 4 // *pDat_1-- = accu4
185
186      /*  Second iteration */
187      LDR r8, [r1],
188# 4 // val_tw = *twiddle++;
189      LDR r5,
190      [ r2, #0 ]  // accu2 = pDat_0[0]
191      LDR r4,
192      [ r3, #0 ]  // accu1 = pDat_1[0]
193
194      SMULWT r9,
195      r5,
196      r8  // accuX = accu2*val_tw.l
197          SMULWB r5,
198      r5,
199      r8  // accu2 = accu2*val_tw.h
200          RSB r9,
201      r9,
202# 0 // accuX =-accu2*val_tw.l
203      SMLAWT r5, r4, r8,
204      r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
205          SMLAWB r4,
206      r4, r8,
207      r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
208
209          LDR r8,
210      [r1],
211# 4 // val_tw = *twiddle++;
212      LDR r7,
213      [ r3, # - 4 ]  // accu4 = pDat_1[-1]
214      LDR r6,
215      [ r2, #4 ]  // accu3 = pDat_0[1]
216
217      SMULWB r9,
218      r7,
219      r8  // accuX = accu4*val_tw.h
220          SMULWT r7,
221      r7,
222      r8  // accu4 = accu4*val_tw.l
223          RSB r9,
224      r9,
225# 0 // accuX =-accu4*val_tw.h
226      SMLAWB r7, r6, r8,
227      r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
228          SMLAWT r6,
229      r6, r8,
230      r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
231
232          STR r5,
233      [r2],
234# 4 // *pDat_0++ = accu2
235      STR r4, [r2],
236# 4 // *pDat_0++ = accu1
237      STR r6, [r3],
238#- 4 // *pDat_1-- = accu3
239      STR r7, [r3],
240#- 4 // *pDat_1-- = accu4
241
242      SUBS r0, r0,
243# 1 BNE dct_IV_loop1_start
244
245      POP { r4 - r9 }
246
247  BX lr
248}
249
250#endif /* FUNCTION_dct_IV_func1 */
251
252#ifdef FUNCTION_dct_IV_func2
253
254/* __attribute__((noinline)) */
255static inline void dct_IV_func2(int i, const FIXP_SPK *twiddle,
256                                FIXP_DBL *pDat_0, FIXP_DBL *pDat_1, int inc) {
257  FIXP_DBL accu1, accu2, accu3, accu4, accuX;
258  LONG val_tw;
259
260  accu1 = pDat_1[-2];
261  accu2 = pDat_1[-1];
262
263  *--pDat_1 = -(pDat_0[1] >> 1);
264  *pDat_0++ = (pDat_0[0] >> 1);
265
266  twiddle += inc;
267
268  __asm {
269    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
270    B       dct_IV_loop2_2nd_part
271
272        /* 42 cycles for 2 iterations = 21 cycles/iteration */
273dct_IV_loop2:
274    SMULWT  accuX, accu2, val_tw
275    SMULWB  accu2, accu2, val_tw
276    RSB     accuX, accuX, #0
277    SMLAWB  accuX, accu1, val_tw, accuX
278    SMLAWT  accu2, accu1, val_tw, accu2
279    STR     accuX, [pDat_0], #4
280    STR     accu2, [pDat_1, #-4] !
281
282    LDR     accu4, [pDat_0, #4]
283    LDR     accu3, [pDat_0]
284    SMULWB  accuX, accu4, val_tw
285    SMULWT  accu4, accu4, val_tw
286    RSB     accuX, accuX, #0
287    SMLAWT  accuX, accu3, val_tw, accuX
288    SMLAWB  accu4, accu3, val_tw, accu4
289
290    LDR     accu1, [pDat_1, #-8]
291    LDR     accu2, [pDat_1, #-4]
292
293    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
294
295    STR     accuX, [pDat_1, #-4] !
296    STR     accu4, [pDat_0], #4
297
298dct_IV_loop2_2nd_part:
299    SMULWT  accuX, accu2, val_tw
300    SMULWB  accu2, accu2, val_tw
301    RSB     accuX, accuX, #0
302    SMLAWB  accuX, accu1, val_tw, accuX
303    SMLAWT  accu2, accu1, val_tw, accu2
304    STR     accuX, [pDat_0], #4
305    STR     accu2, [pDat_1, #-4] !
306
307    LDR     accu4, [pDat_0, #4]
308    LDR     accu3, [pDat_0]
309    SMULWB  accuX, accu4, val_tw
310    SMULWT  accu4, accu4, val_tw
311    RSB     accuX, accuX, #0
312    SMLAWT  accuX, accu3, val_tw, accuX
313    SMLAWB  accu4, accu3, val_tw, accu4
314
315    LDR     accu1, [pDat_1, #-8]
316    LDR     accu2, [pDat_1, #-4]
317
318    STR     accuX, [pDat_1, #-4] !
319    STR     accu4, [pDat_0], #4
320
321    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
322
323    SUBS    i, i, #1
324    BNE     dct_IV_loop2
325  }
326
327  /* Last Sin and Cos value pair are the same */
328  accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
329  accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
330
331  *--pDat_1 = accu1 + accu2;
332  *pDat_0++ = accu1 - accu2;
333}
334#endif /* FUNCTION_dct_IV_func2 */
335
336#ifdef FUNCTION_dst_IV_func1
337
338__asm void dst_IV_func1(int i, const FIXP_SPK *twiddle, FIXP_DBL *pDat_0,
339                        FIXP_DBL *pDat_1) {
340  /* Register map:
341     r0   i
342     r1   twiddle
343     r2   pDat_0
344     r3   pDat_1
345     r4   accu1
346     r5   accu2
347     r6   accu3
348     r7   accu4
349     r8   val_tw
350     r9   accuX
351  */
352  PUSH{r4 - r9}
353
354  dst_IV_loop1 LDR r8,
355      [r1],
356# 4 // val_tw = *twiddle++
357      LDR r5,
358      [r2]  // accu2 = pDat_0[0]
359      LDR r6,
360      [ r2, #4 ]  // accu3 = pDat_0[1]
361      RSB r5,
362      r5,
363# 0 // accu2 = -accu2
364      SMULWT r9, r5,
365      r8  // accuX = (-accu2)*val_tw.l
366          LDR r4,
367      [ r3, # - 4 ]  // accu1 = pDat_1[-1]
368      RSB r9,
369      r9,
370# 0 // accuX = -(-accu2)*val_tw.l
371      SMLAWB r9, r4, r8,
372      r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
373          SMULWT r4,
374      r4,
375      r8  // accu1 = accu1*val_tw.l
376          LDR r7,
377      [ r3, # - 8 ]  // accu4 = pDat_1[-2]
378      SMLAWB r5,
379      r5, r8,
380      r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
381          LDR r8,
382      [r1],
383# 4 // val_tw = *twiddle++
384      STR r5, [r2],
385# 4 // *pDat_0++ = accu2
386      STR r9, [r2],
387# 4 // *pDat_0++ = accu1 (accuX)
388      RSB r7, r7,
389# 0 // accu4 = -accu4
390      SMULWB r5, r7,
391      r8  // accu2 = (-accu4)*val_tw.h
392          SMULWB r4,
393      r6,
394      r8  // accu1 = (-accu4)*val_tw.l
395          RSB r5,
396      r5,
397# 0 // accu2 = -(-accu4)*val_tw.h
398      SMLAWT r6, r6, r8,
399      r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
400          SMLAWT r7,
401      r7, r8,
402      r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
403          STR r6,
404      [ r3, # - 4 ] !  // *--pDat_1 = accu3
405      STR r7,
406      [ r3, # - 4 ] !  // *--pDat_1 = accu4
407
408      LDR r8,
409      [r1],
410# 4 // val_tw = *twiddle++
411      LDR r5,
412      [r2]  // accu2 = pDat_0[0]
413      LDR r6,
414      [ r2, #4 ]  // accu3 = pDat_0[1]
415      RSB r5,
416      r5,
417# 0 // accu2 = -accu2
418      SMULWT r9, r5,
419      r8  // accuX = (-accu2)*val_tw.l
420          LDR r4,
421      [ r3, # - 4 ]  // accu1 = pDat_1[-1]
422      RSB r9,
423      r9,
424# 0 // accuX = -(-accu2)*val_tw.l
425      SMLAWB r9, r4, r8,
426      r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
427          SMULWT r4,
428      r4,
429      r8  // accu1 = accu1*val_tw.l
430          LDR r7,
431      [ r3, # - 8 ]  // accu4 = pDat_1[-2]
432      SMLAWB r5,
433      r5, r8,
434      r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
435          LDR r8,
436      [r1],
437# 4 // val_tw = *twiddle++
438      STR r5, [r2],
439# 4 // *pDat_0++ = accu2
440      STR r9, [r2],
441# 4 // *pDat_0++ = accu1 (accuX)
442      RSB r7, r7,
443# 0 // accu4 = -accu4
444      SMULWB r5, r7,
445      r8  // accu2 = (-accu4)*val_tw.h
446          SMULWB r4,
447      r6,
448      r8  // accu1 = (-accu4)*val_tw.l
449          RSB r5,
450      r5,
451# 0 // accu2 = -(-accu4)*val_tw.h
452      SMLAWT r6, r6, r8,
453      r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
454          SMLAWT r7,
455      r7, r8,
456      r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
457          STR r6,
458      [ r3, # - 4 ] !  // *--pDat_1 = accu3
459      STR r7,
460      [ r3, # - 4 ] !  // *--pDat_1 = accu4
461
462      SUBS r0,
463      r0,
464# 4 // i-= 4
465      BNE dst_IV_loop1
466
467          POP{r4 - r9} BX lr
468}
469#endif /* FUNCTION_dst_IV_func1 */
470
471#ifdef FUNCTION_dst_IV_func2
472
473/* __attribute__((noinline)) */
474static inline void dst_IV_func2(int i, const FIXP_SPK *twiddle,
475                                FIXP_DBL *RESTRICT pDat_0,
476                                FIXP_DBL *RESTRICT pDat_1, int inc) {
477  FIXP_DBL accu1, accu2, accu3, accu4;
478  LONG val_tw;
479
480  accu4 = pDat_0[0];
481  accu3 = pDat_0[1];
482  accu4 >>= 1;
483  accu3 >>= 1;
484  accu4 = -accu4;
485
486  accu1 = pDat_1[-1];
487  accu2 = pDat_1[0];
488
489  *pDat_0++ = accu3;
490  *pDat_1-- = accu4;
491
492  __asm {
493    B       dst_IV_loop2_2nd_part
494
495        /* 50 cycles for 2 iterations = 25 cycles/iteration */
496
497dst_IV_loop2:
498
499    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
500
501    RSB     accu2, accu2, #0  // accu2 = -accu2
502    RSB     accu1, accu1, #0  // accu1 = -accu1
503    SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
504    SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
505    RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
506    SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
507    SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
508    STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
509    STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
510
511    LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
512    LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
513
514    RSB     accu4, accu4, #0  // accu4 = -accu4
515    RSB     accu3, accu3, #0  // accu3 = -accu3
516
517    SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
518    SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
519    RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
520    SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
521    SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
522
523    LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
524    LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
525
526    STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
527    STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
528
529dst_IV_loop2_2nd_part:
530
531    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
532
533    RSB     accu2, accu2, #0  // accu2 = -accu2
534    RSB     accu1, accu1, #0  // accu1 = -accu1
535    SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
536    SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
537    RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
538    SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
539    SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
540    STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
541    STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
542
543    LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
544    LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
545
546    RSB     accu4, accu4, #0  // accu4 = -accu4
547    RSB     accu3, accu3, #0  // accu3 = -accu3
548
549    SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
550    SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
551    RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
552    SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
553    SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
554
555    LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
556    LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
557
558    STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
559    STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
560
561    SUBS    i, i, #1
562    BNE     dst_IV_loop2
563  }
564
565  /* Last Sin and Cos value pair are the same */
566  accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
567  accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
568
569  *pDat_0 = accu1 + accu2;
570  *pDat_1 = accu1 - accu2;
571}
572#endif /* FUNCTION_dst_IV_func2 */
573