199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Use of this source code is governed by a BSD-style license
599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  that can be found in the LICENSE file in the root of the source
699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  tree. An additional intellectual property rights grant can be found
799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  in the file PATENTS.  All contributing project authors may
899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  This is a modification of armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
1299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  to support float instead of SC32.
1399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
1599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Description:
1799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Compute a Radix 4 FFT stage for a N point complex signal
1899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
2099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Include standard headers
2399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
25bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
2699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files
2999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables)
3099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Set debugging level
3599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//DEBUG_ON    SETL {TRUE}
3699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name
4099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name
4599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files
4899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables)
4999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Input Registers
5299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pSrc            r0
5499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pDst            r2
5599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTwiddle        r1
5699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTNum       r6
5799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTSize      r7
5899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Output Registers
6299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Local Scratch Registers
6599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define grpCount        r3
6799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pointStep       r4
6899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define outPointStep    r5
6999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define stepTwiddle     r12
7099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define setCount        r14
7199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define srcStep         r8
7299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define setStep         r9
7399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dstStep         r10
7499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define twStep          r11
7599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define t1              r3
7699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
7799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Neon Registers
7899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
7999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW1     D0.F32
8099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW2     D1.F32
8199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW3     D2.F32
8299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
8399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr0    D4.F32
8499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi0    D5.F32
8599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr1    D6.F32
8699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi1    D7.F32
8799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr2    D8.F32
8899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi2    D9.F32
8999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr3    D10.F32
9099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi3    D11.F32
9199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr0    D12.F32
9299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi0    D13.F32
9399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr1    D14.F32
9499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi1    D15.F32
9599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr2    D16.F32
9699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi2    D17.F32
9799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr3    D18.F32
9899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi3    D19.F32
9999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT0     d16.f32
10099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT1     d18.f32
10199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT2     d12.f32
10299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT3     d14.f32
10399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZr0    D20.F32
10499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZi0    D21.F32
10599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZr1    D22.F32
10699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZi1    D23.F32
10799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZr2    D24.F32
10899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZi2    D25.F32
10999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZr3    D26.F32
11099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dZi3    D27.F32
11199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
11299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qY0     Q6.F32
11399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qY1     Q7.F32
11499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qY2     Q8.F32
11599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qY3     Q9.F32
11699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qX0     Q2.F32
11799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qZ0     Q10.F32
11899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qZ1     Q11.F32
11999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qZ2     Q12.F32
12099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qZ3     Q13.F32
12199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .MACRO FFTSTAGE scaled, inverse , name
12399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Define stack arguments
12599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Update grpCount and grpSize rightaway inorder to reuse
12899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pGrpCount and pGrpSize regs
12999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSL     grpCount,subFFTSize,#2
13199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSR     subFFTNum,subFFTNum,#2
13299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     subFFTSize,grpCount
13399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW1,[pTwiddle]                    @//[wi | wr]
13599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pT0+1 increments pT0 by 8 bytes
13699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
13799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     pointStep,subFFTNum,LSL #1
13899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pOut0+1 increments pOut0 by 8 bytes
14199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pOut0+outPointStep == increment of 8*outPointStep bytes
14299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//   = 2*size bytes
14399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     stepTwiddle,#0
14599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW2,[pTwiddle]                    @//[wi | wr]
14699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SMULBB  outPointStep,grpCount,pointStep
14799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSL     pointStep,pointStep,#2             @// 2*grpSize
14899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW3,[pTwiddle]                    @//[wi | wr]
15099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     srcStep,pointStep,LSL #1           @// srcStep = 2*pointStep
15199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     setStep,srcStep,pointStep          @// setStep = 3*pointStep
15299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
15399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        RSB     setStep,setStep,#0                 @// setStep = - 3*pointStep
15499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     srcStep,srcStep,#16                @// srcStep = 2*pointStep-16
15599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
15699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     dstStep,outPointStep,LSL #1
15799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     dstStep,dstStep,outPointStep       @// dstStep = 3*outPointStep
15899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// dstStep = - 3*outPointStep+16
15999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        RSB     dstStep,dstStep,#16
16099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comradix4GrpLoop\name :
16499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr0,dXi0},[pSrc],pointStep       @//  data[0]
16699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD      stepTwiddle,stepTwiddle,pointStep
16799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr1,dXi1},[pSrc],pointStep       @//  data[1]
16899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// set pTwiddle to the first point
16999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD      pTwiddle,pTwiddle,stepTwiddle
17099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr2,dXi2},[pSrc],pointStep       @//  data[2]
17199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV      twStep,stepTwiddle,LSL #2
17299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//  data[3] & update pSrc for the next set
17499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr3,dXi3},[pSrc],setStep
17599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB      twStep,stepTwiddle,twStep         @// twStep = -3*stepTwiddle
17699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV      setCount,pointStep,LSR #3
17899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// set pSrc to data[0] of the next set
17999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pSrc,pSrc,#16
18099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// increment to data[1] of the next set
18199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pSrc,pSrc,pointStep
18299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Loop on the sets
18599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comradix4SetLoop\name :
18799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
19099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .ifeqs  "\inverse", "TRUE"
19199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr1,dXr1,dW1[0]
19299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi1,dXi1,dW1[0]
19399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr2,dXr2,dW2[0]
19499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi2,dXi2,dW2[0]
19599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr3,dXr3,dW3[0]
19699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi3,dXi3,dW3[0]
19799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
19899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZr1,dXi1,dW1[1]                @// real part
19999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZi1,dXr1,dW1[1]                @// imag part
20099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
20199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            @//  data[1] for next iteration
20299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VLD2    {dXr1,dXi1},[pSrc],pointStep
20399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
20499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZr2,dXi2,dW2[1]                @// real part
20599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZi2,dXr2,dW2[1]                @// imag part
20699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
20799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            @//  data[2] for next iteration
20899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VLD2    {dXr2,dXi2},[pSrc],pointStep
20999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
21099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZr3,dXi3,dW3[1]                @// real part
21199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZi3,dXr3,dW3[1]                @// imag part
21299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .else
21399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr1,dXr1,dW1[0]
21499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi1,dXi1,dW1[0]
21599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr2,dXr2,dW2[0]
21699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi2,dXi2,dW2[0]
21799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZr3,dXr3,dW3[0]
21899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   dZi3,dXi3,dW3[0]
21999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZr1,dXi1,dW1[1]                @// real part
22199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZi1,dXr1,dW1[1]                @// imag part
22299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            @//  data[1] for next iteration
22499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VLD2    {dXr1,dXi1},[pSrc],pointStep
22599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZr2,dXi2,dW2[1]                @// real part
22799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZi2,dXr2,dW2[1]                @// imag part
22899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
22999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            @//  data[2] for next iteration
23099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VLD2    {dXr2,dXi2},[pSrc],pointStep
23199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
23299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   dZr3,dXi3,dW3[1]                @// real part
23399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   dZi3,dXr3,dW3[1]                @// imag part
23499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .endif
23599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
23699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//  data[3] & update pSrc to data[0]
2376ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        @// But don't read on the very last iteration because that reads past
2386ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com	@// the end of pSrc. The last iteration is grpCount = 4, setCount = 2.
2396ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        cmp     grpCount, #4
2406ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        cmpeq   setCount, #2                      @// Test setCount if grpCount = 4
2416ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        @// These are executed only if both grpCount = 4 and setCount = 2
2426ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        addeq   pSrc, pSrc, setStep
2436ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        beq     radix4SkipRead\name
24499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr3,dXi3},[pSrc],setStep
2456ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.comradix4SkipRead\name:
2466ae14cb23aa5d478d267519f29d05558fd8f2f0drtoy@google.com        SUBS    setCount,setCount,#2
24799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// finish first stage of 4 point FFT
24999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    qY0,qX0,qZ2
25099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    qY2,qX0,qZ2
25199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//  data[0] for next iteration
25399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dXr0,dXi0},[pSrc :128]!
25499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    qY1,qZ1,qZ3
25599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    qY3,qZ1,qZ3
25699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// finish second stage of 4 point FFT
25899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
25999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    qZ0,qY2,qY1
26099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .ifeqs  "\inverse", "TRUE"
26399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    dZr3,dYr0,dYi3
26599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr0,dZi0},[pDst :128],outPointStep
26699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VSUB    dZi3,dYi0,dYr3
26799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
26899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    qZ2,qY2,qY1
26999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr3,dZi3},[pDst :128],outPointStep
27099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
27199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VSUB    dZr1,dYr0,dYi3
27299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr2,dZi2},[pDst :128],outPointStep
27399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    dZi1,dYi0,dYr3
27499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
27599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr1,dZi1},[pDst :128],dstStep
27699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
27799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
27899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .else
27999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
28099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VSUB    dZr1,dYr0,dYi3
28199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr0,dZi0},[pDst :128],outPointStep
28299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    dZi1,dYi0,dYr3
28399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
28499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    qZ2,qY2,qY1
28599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr1,dZi1},[pDst :128],outPointStep
28699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
28799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VADD    dZr3,dYr0,dYi3
28899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr2,dZi2},[pDst :128],outPointStep
28999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VSUB    dZi3,dYi0,dYr3
29099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VST2    {dZr3,dZi3},[pDst :128],dstStep
29299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .endif
29599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
29699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// increment to data[1] of the next set
29799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pSrc,pSrc,pointStep
29899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     radix4SetLoop\name
29999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
30199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW1,[pTwiddle :64],stepTwiddle    @//[wi | wr]
30299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// subtract 4 since grpCount multiplied by 4
30399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUBS    grpCount,grpCount,#4
30499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW2,[pTwiddle :64],stepTwiddle    @//[wi | wr]
30599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// increment pSrc for the next grp
30699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pSrc,pSrc,srcStep
30799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW3,[pTwiddle :64],twStep         @//[wi | wr]
30899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     radix4GrpLoop\name
30999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Reset and Swap pSrc and pDst for the next stage
31299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     t1,pDst
31399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pDst -= 2*size; pSrc -= 8*size bytes
31499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pDst,pSrc,outPointStep,LSL #2
31599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pSrc,t1,outPointStep
31699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
31899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .endm
31999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_START armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
32299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            FFTSTAGE "FALSE","FALSE",FWD
32399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_END
32499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
32699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
32799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            FFTSTAGE "FALSE","TRUE",INV
32899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_END
32999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
33099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
33199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .end
332