199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Use of this source code is governed by a BSD-style license
599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  that can be found in the LICENSE file in the root of the source
699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  tree. An additional intellectual property rights grant can be found
799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  in the file PATENTS.  All contributing project authors may
899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
1199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  to support float instead of SC32.
1299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
1499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Description:
1699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Compute FFT for a real signal
1799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Include standard headers
2299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
23bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
24bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
2599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files
2899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables)
2999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
3199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
3299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
3399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
3499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
3599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
3699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
3799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Set debugging level
3999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//DEBUG_ON    SETL {TRUE}
4099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name
4499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com    @// Guarding implementation by the processor name
4899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files
5099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables)
5199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
5299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .extern  armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
5399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Input Registers
5699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pSrc            r0
5899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pDst            r1
5999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pFFTSpec        r2
6099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define scale           r3
6199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Output registers
6499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define result          r0
6599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Local Scratch Registers
6799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define argTwiddle      r1
6999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define argDst          r2
7099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define argScale        r4
7199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define tmpOrder        r4
7299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTwiddle        r4
7399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pOut            r5
7499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTSize      r7
7599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTNum       r6
7699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define N               r6
7799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define order           r14
7899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define diff            r9
7999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Total num of radix stages required to comple the FFT
8099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define count           r8
8199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define x0r             r4
8299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define x0i             r5
8399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define diffMinusOne    r2
8499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTSizeTmp   r6
8599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define step            r3
8699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define step1           r4
8799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define twStep          r8
8899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define zero            r9
8999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTwiddleTmp     r5
9099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define t0              r10
9199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Neon registers
9399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX0       d0.f32
9599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dzero     d1.f32
9699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZero     d2.f32
9799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dShift    d3.f32
9899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX0r      d2.f32
9999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX0i      d3.f32
10099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX1r      d4.f32
10199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX1i      d5.f32
10299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dT0       d6.f32
10399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dT1       d7.f32
10499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dT2       d8.f32
10599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dT3       d9.f32
10699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT0       d10.f32
10799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT1       d12.f32
10899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW0r      d14.f32
10999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW0i      d15.f32
11099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW1r      d16.f32
11199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW1i      d17.f32
11299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0r      d14.f32
11399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0i      d15.f32
11499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY1r      d16.f32
11599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY1i      d17.f32
11699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0rS64   d14.s64
11799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0iS64   d15.s64
11899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT2       d18.f32
11999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT3       d20.f32
12099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// lastThreeelements
12199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX1       d3.f32
12299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW0       d4.f32
12399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW1       d5.f32
12499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0       d10.f32
12599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY1       d11.f32
12699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY2       d12.f32
12799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY3       d13.f32
12899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define half      d0.f32
13099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com    @// Allocate stack memory required by the function
13299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com    @// Write function header
13499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_START     omxSP_FFTFwd_RToCCS_F32_Sfs,r11,d15
13599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@ Structure offsets for the FFTSpec
13799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .set    ARMsFFTSpec_N, 0
13899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .set    ARMsFFTSpec_pBitRev, 4
13999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .set    ARMsFFTSpec_pTwiddle, 8
14099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .set    ARMsFFTSpec_pBuf, 12
14199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Define stack arguments
14399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Read the size from structure and take log
14599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
14699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Read other structure parameters
14899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
14999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
15099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
15199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//  N=1 Treat seperately
15299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     N,#1
15399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     sizeGreaterThanOne
15499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dX0[0],[pSrc]
15599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     zero,#0
15699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMOV    dzero[0],zero
15799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMOV    dZero[0],zero
15899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST3    {dX0[0],dzero[0],dZero[0]},[pDst]
15999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        B       End
16199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comsizeGreaterThanOne:
16599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Do a N/2 point complex FFT including the scaling
16699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     N,N,ASR #1                          @// N/2 point complex FFT
16899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CLZ     order,N                             @// N = 2^order
17099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        RSB     order,order,#31
17199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     subFFTSize,#1
17299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//MOV     subFFTNum,N
17399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     order,#3
17599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     orderGreaterthan3                   @// order > 3
17699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     order,#1
17899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGE     orderGreaterthan0                   @// order > 0
17999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dX0,[pSrc]
18099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST1    dX0,[pOut]
18199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     pSrc,pOut
18299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     argDst,pDst
18399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BLT     FFTEnd
18499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comorderGreaterthan0:
18699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// set the buffers appropriately for various orders
18799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     order,#2
18899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVEQ   argDst,pDst
18999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVNE   argDst,pOut
19099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Pass the first stage destination in RN5
19199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVNE   pOut,pDst
19299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     argTwiddle,pTwiddle
19399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
19499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     order,#1
19599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     orderGreaterthan1
19699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// order = 1
19799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
19899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        B       FFTEnd
19999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
20099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comorderGreaterthan1:
20199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP     order,#2
20299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     orderGreaterthan2
20399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// order =2
20499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
20599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
20699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        B       FFTEnd
20799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
20899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comorderGreaterthan2:@// order =3
20999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
21099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
21199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
21299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
21399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        B       FFTEnd
21499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
21599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
21699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
21799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comorderGreaterthan3:
21899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comspecialScaleCase:
21999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Set input args to fft stages
22199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        TST     order, #2
22299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVEQ   argDst,pDst
22399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVNE   argDst,pOut
22499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Pass the first stage destination in RN5
22599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOVNE   pOut,pDst
22699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     argTwiddle,pTwiddle
22799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//check for even or odd order
22999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// NOTE: The following combination of BL's would work fine even though
23099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// the first BL would corrupt the flags. This is because the end of
23199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// the "grpZeroSetLoop" loop inside
23299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
23399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// to EQ
23499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
23599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        TST     order,#0x00000001
23699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BLEQ    armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
23799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BLNE    armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
23899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
23999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        CMP        subFFTNum,#4
24099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BLT     FFTEnd
24199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comunscaledRadix4Loop:
24499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BEQ        lastStageUnscaledRadix4
24599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com         BL        armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
24699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com         CMP        subFFTNum,#4
24799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com         B        unscaledRadix4Loop
24899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comlastStageUnscaledRadix4:
25099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BL      armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
25199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        B        FFTEnd
25299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comFFTEnd:
25599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comfinalComplexToRealFixup:
25699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
25999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[(a+jb) + (a-jb)] - j  [(a+jb) - (a-jb)]
26099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[2a+j0] - j [0+j2b]
26199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// (a+b, 0)
26299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
26499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[(a+jb) + (a-jb)] + j  [(a+jb) - (a-jb)]
26599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[2a+j0] + j [0+j2b]
26699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// (a-b, 0)
26799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// F(0) and F(N/2)
26999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dX0r[0],dX0i[0]},[pSrc]!
27099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     zero,#0
27199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMOV    dX0r[1],zero
27299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     step,subFFTSize,LSL #3            @// step = N/2 * 8 bytes
27399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMOV    dX0i[1],zero
27499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// twStep = 3N/8 * 8 bytes pointing to W^1
27599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     twStep,step,subFFTSize,LSL #1
27699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
27799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dY0r,dX0r,dX0i                    @// F(0) = ((Z0.r+Z0.i) , 0)
27899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     step1,subFFTSize,LSL #2           @// step1 = N/2 * 4 bytes
27999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dY0i,dX0r,dX0i                    @// F(N/2) = ((Z0.r-Z0.i) , 0)
28099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUBS    subFFTSize,subFFTSize,#2
28199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
28299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST1    dY0r,[argDst],step
28399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pTwiddleTmp,argTwiddle,#8         @// W^2
28499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST1    dY0i,[argDst]!
28599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     argTwiddle,argTwiddle,twStep      @// W^1
28699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
28799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VDUP    dzero,zero
28899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     argDst,argDst,step
28999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BLT     End
29199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BEQ     lastElement
29299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     step,step,#24
29399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     step1,step1,#8                    @// (N/4-1)*8 bytes
29499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// F(k) = 1/2[Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
29699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Note: W^k is stored as negative values in the table
29799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
29899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
29999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3006b2bf4b577035e2be7e5b096a7148171e5ffadd2kma@webrtc.org        VMOV    half, #0.5
30199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comevenOddButterflyLoop:
30399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dW0r,[argTwiddle],step1
30699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dW1r,[argTwiddle]!
30799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dX0r,dX0i},[pSrc],step
30999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     argTwiddle,argTwiddle,step1
31099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dX1r,dX1i},[pSrc]!
31199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     step1,step1,#8                    @// (N/4-2)*8 bytes
31599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dW0i,[pTwiddleTmp],step1
31699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dW1i,[pTwiddleTmp]!
31799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pSrc,pSrc,step
31899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pTwiddleTmp,pTwiddleTmp,step1
32099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VREV64  dX1r,dX1r
32199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VREV64  dX1i,dX1i
32299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUBS    subFFTSize,subFFTSize,#4
32399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dT2,dX0r,dX1r                     @// a-c
32799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     step1,step1,#8
32899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dT0,dX0r,dX1r                     @// a+c
32999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dT1,dX0i,dX1i                     @// b-d
33099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dT3,dX0i,dX1i                     @// b+d
33199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   dT0,dT0,half[0]
33299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   dT1,dT1,half[0]
33399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VZIP    dW1r,dW1i
33499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VZIP    dW0r,dW0i
33599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
33699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
33799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   qT0,dW1r,dT2
33899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   qT1,dW1r,dT3
33999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   qT2,dW0r,dT2
34099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL   qT3,dW0r,dT3
34199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
34299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMLA   qT0,dW1i,dT3
34399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMLS   qT1,dW1i,dT2
34499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
34599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMLS   qT2,dW0i,dT3
34699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMLA   qT3,dW0i,dT2
34799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
34899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
34999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL  dX1r,qT0,half[0]
35099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL  dX1i,qT1,half[0]
35199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
35299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dY1r,dT0,dX1i                     @// F(N/2 -1)
35399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dY1i,dT1,dX1r
35499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VNEG    dY1i,dY1i
35599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
35699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VREV64  dY1r,dY1r
35799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VREV64  dY1i,dY1i
35899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
35999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
36099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL  dX0r,qT2,half[0]
36199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VMUL  dX0i,qT3,half[0]
36299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
36399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dY0r,dT0,dX0i                     @// F(1)
36499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dY0i,dT1,dX0r
36599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
36699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
36799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST2    {dY0r,dY0i},[argDst],step
36899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST2    {dY1r,dY1i},[argDst]!
36999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     argDst,argDst,step
37099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     step,step,#32                     @// (N/2-4)*8 bytes
37199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
37299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
37399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     evenOddButterflyLoop
37499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
37599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// set both the ptrs to the last element
37699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pSrc,pSrc,#8
37799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     argDst,argDst,#8
37899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
37999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
38099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
38199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Last element can be expanded as follows
38299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
38399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
38499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// 1/2[2a+j0] + j (c+jd) [0+j2b]
38599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// (a-bc, -bd)
38699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Since (c,d) = (0,1) for the last element, result is just (a,-b)
38799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
38899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comlastElement:
38999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1    dX0r,[pSrc]
39099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
39199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST1    dX0r[0],[argDst]!
39299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VNEG    dX0r,dX0r
39399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST1    dX0r[1],[argDst]!
39499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
39599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comEnd:
39699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Set return value
39799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     result, #OMX_Sts_NoErr
39899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
39999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Write function tail
40099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_END
40199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
40299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .end
403