188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  Use of this source code is governed by a BSD-style license
588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  that can be found in the LICENSE file in the root of the source
688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  tree. An additional intellectual property rights grant can be found
788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  in the file PATENTS.  All contributing project authors may
888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
1188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  to support float instead of SC32.
1288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
1488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Description:
1688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Compute FFT for a real signal
1788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Include standard headers
2288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
2488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
2588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//        M_VARIANTS ARM1136JS
2788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Import symbols required from other files
2988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// (For example tables)
3088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
3188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
3288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
3388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
3488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
3588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
3688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Set debugging level
3788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//DEBUG_ON    SETL {TRUE}
3888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
3988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Guarding implementation by the processor name
4288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//    IF  ARM1136JS
4488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Input Registers
4688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pSrc            r0
4888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pDst            r1
4988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pFFTSpec        r2
5088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Output registers
5388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define result          r0
5488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Local Scratch Registers
5688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// N=1 case
5888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define scaleMinusOne   r2
5988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define rnd             r2
6088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define zero            r8
6188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define Zero            r9
6288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
6388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
6488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argTwiddle      r1
6588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argDst          r2
6688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argScale        r4
6788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pTwiddle        r4
6888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pOut            r5
6988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTSize      r7
7088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTNum       r6
7188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define N               r6
7288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define order           r14
7388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diff            r9
7488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define count           r8
7588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diffMinusOne    r10
7688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define round           r3
7788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
7888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define step            r3
7988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define step1           r6
8088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define twStep          r12
8188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pTwiddleTmp     r14
8288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t0              r12
8388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t1              r14              /*@// pTwiddleTmp*/
8488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t2              r0
8588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t3              r1               /*@// pSrc,argTwiddle*/
8688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t4              r6
8788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t5              r7               /*@// step1,subFFTSize*/
8888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
8988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0r     s0
9088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0i     s1
9188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y0r     s2
9288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y0i     s3
9388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x1r     s4
9488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x1i     s5
9588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w1r     s2
9688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w1i     s3
9788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w0r     s6
9888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w0i     s7
9988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y1r     s2              /*@// w1r,w1i*/
10088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y1i     s3
10188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st0     s8
10288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st1     s9
10388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st2     s10
10488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st3     s11
10588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st4     s12
10688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st5     s13
10788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define half    s15
10888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
10988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    @// Allocate stack memory required by the function
11388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    @// Write function header
11788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        M_START     omxSP_FFTFwd_RToCCS_F32_Sfs_vfp,r11
11888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@ Structure offsets for FFTSpec
12088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_N, 0
12188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pBitRev, 4
12288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pTwiddle, 8
12388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pBuf, 12
12488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
12588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Define stack arguments
12688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
12788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Setup half value
12888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        movw    N, #0                   @// Use N as a temp.
12988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        movt    N, #0x3f00
13088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmov.f32 half, N
13188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
13288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Read the size from structure and take log
13388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
13488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
13588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Read other structure parameters
13688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
13788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
13888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
13988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//  N=1 Treat seperately
14088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CMP     N,#1
14188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BGT     sizeGreaterThanOne
14288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        // N<=1 is not supported
14388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Set return value
14488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     result, #OMX_Sts_NoErr
14588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        B       FunctionEnd
14688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
14788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comsizeGreaterThanOne:
14888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Do a N/2 point complex FFT including the scaling
14988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
15088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     N,N,ASR #1              @// N/2 point complex FFT
15188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CLZ     order,N                 @// N = 2^order
15288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        RSB     order,order,#31
15388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     subFFTSize,#1
15488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//MOV     subFFTNum,N
15588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
15688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
15788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CMP     order,#1
15888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BGT     orderGreaterthan1       @// order > 1
15988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldmlt.f32 pSrc, {x0r, x0i}
16088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstmlt.f32 pOut, {x0r, x0i}
16188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVLT   pSrc,pOut
16288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVLT   argDst,pDst
16388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLT     FFTEnd
16488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
16588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argDst,pOut             @// Set input args to fft stages
16688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     pOut,pDst               @// Set input args to fft stages
16788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argTwiddle,pTwiddle
16888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
16988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
17088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        B     finalComplexToRealFixup
17188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comorderGreaterthan1:
17388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        TST     order, #2               @// Set input args to fft stages
17588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVEQ   argDst,pDst
17688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVNE   argDst,pOut
17788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVNE   pOut,pDst               @// Pass the first stage dest in RN5
17888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argTwiddle,pTwiddle
17988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
18088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//check for even or odd order
18188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
18288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// NOTE: The following combination of BL's would work fine
18388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// eventhough the first BL would corrupt the flags. This is
18488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// because the end of the "grpZeroSetLoop" loop inside
18588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
18688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// the Z flag to EQ
18788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
18888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        TST     order,#0x00000001
18988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLEQ    armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
19088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLNE    armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
19188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
19288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comunscaledRadix4Loop:
19388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CMP        subFFTNum,#1
19488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         BEQ        FFTEnd
19588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         BL        armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
19688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         B        unscaledRadix4Loop
19788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
19888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comFFTEnd:
19988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comfinalComplexToRealFixup:
20088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
20188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// step = N/2 * 8 bytes
20288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     step,subFFTSize,LSL #3
20388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// twStep = 3N/8 * 8 bytes pointing to W^1
20488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUB     twStep,step,subFFTSize,LSL #1
20588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// step1 = N/4 * 8 = N/2*4 bytes
20688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     step1,subFFTSize,LSL #2
20788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// (N/4-1)*8 bytes
20888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUB     step1,step1,#8
20988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
21088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// F(0) = 1/2 [Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
21188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2 [(a+jb) + (a-jb)] - j  [(a+jb) - (a-jb)]
21288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2 [2a+j0] - j [0+j2b]
21388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// (a+b, 0)
21488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
21588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// F(N/2) =1/2 [Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
21688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2 [(a+jb) + (a-jb)] + j  [(a+jb) - (a-jb)]
21788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2 [2a+j0] + j [0+j2b]
21888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// (a-b, 0)
21988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
22088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// F(0) and F(N/2)
22188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 pSrc!, {x0r, x0i}
22288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vadd.f32 y0r,x0r,x0i            @// F(0) = (2(Z0.r+Z0.i) , 0)
22388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 x0r,x0r,x0i            @// F(N/2) = (2(Z0.r-Z0.i) , 0)
22488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 y0i, y0i               @ y0i and x0i set to 0.0
22588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 x0i, x0i
22688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
22788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        add      argDst, step
22888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 argDst, {x0r, x0i}     @// {x0r,x0i}->[argDst, step]
22988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        sub      argDst, step
23088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 argDst!, {y0r, y0i}
23188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
23288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUBS    subFFTSize,subFFTSize,#2
23388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
23488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        ADD     pTwiddleTmp,argTwiddle,#8       @// W^2
23588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        ADD     argTwiddle,argTwiddle,twStep    @// W^1
23688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLT     End
23788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BEQ     lastElement
23888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
23988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
24088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// F(k) = 1/2 [Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
24188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Process 2 elements at a time. E.g: F(1) and F(N/2-1) since
24288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// both of them require Z(1) and Z(N/2-1)
24388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
24488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        ASR     subFFTSize,subFFTSize,#1
24588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comevenOddButterflyLoop:
24688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
24788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUB     step,step,#16           @// (N/2-2)*8 bytes
24888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
24988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        add      pSrc, step
25088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 pSrc, {x1r, x1i}       @// {x1r, x1i} = [pSrc, step]
25188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        sub      pSrc, step
25288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 pSrc!, {x0r, x0i}
25388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        add      argTwiddle, step1
25488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 argTwiddle, {w1r, w1i}  @// {w1r, w1i} = [argTwiddle, step1]
25588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        sub      argTwiddle, step1
25688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 argTwiddle!, {w0r, w0i} @// {w0r, w0i} = [argTwiddle], #8
25788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
25888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUB     step1,step1,#8
25988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUBS    subFFTSize,subFFTSize,#1
26088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
26188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 st2,x0r,x1r            @// a-c
26288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vadd.f32 st3,x0i,x1i            @// b+d
26388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vadd.f32 st0,x0r,x1r            @// a+c
26488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 st1,x0i,x1i            @// b-d
26588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
26688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 x1r,w1r,st2
26788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 x1i,w1r,st3
26888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmla.f32 x1r,w1i,st3            @// x1r = w1r*st2 + w1i*st3
26988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//RSB     x1r,x1r,#0
27088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmls.f32 x1i,w1i,st2            @// x1i = w1r*st3 - wli*st2
27188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
27288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32 y1r, st0, x1i
27388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vadd.f32 y1i, x1r, st1
27488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vneg.f32 y1i, y1i
27588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
27688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32  x0r,w0r,st2
27788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32  x0i,w0r,st3
27888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmls.f32  x0r,w0i,st3           @// x0r = w0r*st2 - w0i*st3
27988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmla.f32  x0i,w0i,st2           @// x0i = w0r*st3 + x0i*st1
28088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
28188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vsub.f32   st4,st0,x0i          @// F(1)
28288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vadd.f32   st5,x0r,st1
28388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
28488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
28588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 y1r, half
28688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 y1i, half
28788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 st4, half
28888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 st5, half
28988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
29088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        add      argDst, step
29188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 argDst, {y1r, y1i}     @// {y1r,y1i} -> [argDst,step]
29288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        sub      argDst, step
29388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 argDst!, {st4, st5}
29488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
29588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
29688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     t0,argTwiddle           @// swap ptr for even and odd twiddles
29788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argTwiddle,pTwiddleTmp
29888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     pTwiddleTmp,t0
29988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
30088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BGT     evenOddButterflyLoop
30188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
30288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Last element can be expanded as follows
30388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
30488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
30588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// 1/2[2a+j0] + j (c+jd) [0+j2b]
30688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// (a-bc, -bd)
30788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
30888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comlastElement:
30988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 pSrc, {x0r, x0i}
31088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vneg.f32 x0i, x0i
31188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 argDst, {x0r, x0i}
31288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
31388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comEnd:
31488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Set return value
31588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     result, #OMX_Sts_NoErr
31688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
31788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comFunctionEnd:
31888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Write function tail
31988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        M_END
32088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//    ENDIF                                           @//ARM1136JS
32288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    @// Guarding implementation by the processor name
32588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
32888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    .end
329