199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  Use of this source code is governed by a BSD-style license
599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  that can be found in the LICENSE file in the root of the source
699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  tree. An additional intellectual property rights grant can be found
799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  in the file PATENTS.  All contributing project authors may
899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
1199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//  to support float instead of SC32.
1299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
1499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Description:
1599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point
1699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// complex signal.  This handles the general stage, not the first or last
1799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// stage.
1899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
1999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//
2099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Include standard headers
2399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
24bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
25bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
2699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
2899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files
2999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables)
3099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Set debugging level
3499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//DEBUG_ON    SETL {TRUE}
3599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
3899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name
3999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name
4499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Input Registers
4799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
4899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pSrc            r0
4999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pDst            r2
5099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTwiddle        r1
5199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTNum       r6
5299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTSize      r7
5399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Output Registers
5699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
5899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Local Scratch Registers
5999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
6099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define outPointStep    r3
6199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pointStep       r4
6299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define grpCount        r5
6399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define setCount        r8
6499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//const           RN  9
6599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define step            r10
6699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dstStep         r11
6799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTable          r9
6899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTmp            r9
6999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
7099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Neon Registers
7199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
7299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dW      D0.F32
7399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX0     D2.F32
7499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX1     D3.F32
7599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX2     D4.F32
7699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dX3     D5.F32
7799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY0     D6.F32
7899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY1     D7.F32
7999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY2     D8.F32
8099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dY3     D9.F32
8199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT0     D10.F32
8299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT1     D11.F32
8399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
8499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
8599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .MACRO FFTSTAGE scaled, inverse, name
8699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
8799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Define stack arguments
8899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
8999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
9199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// and pGrpSize regs
9299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSR     subFFTNum,subFFTNum,#1                      @//grpSize
9499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSL     grpCount,subFFTSize,#1
9599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
9799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pT0+1 increments pT0 by 8 bytes
9899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
9999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     pointStep,subFFTNum,LSL #2
10099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
10199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// update subFFTSize for the next stage
10299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     subFFTSize,grpCount
10399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
10499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pOut0+1 increments pOut0 by 8 bytes
10599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pOut0+outPointStep == increment of 8*outPointStep bytes =
10699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @//    4*size bytes
10799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SMULBB  outPointStep,grpCount,pointStep
10899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        LSL     pointStep,pointStep,#1
10999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
11099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
11199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        RSB      step,pointStep,#16
11299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        RSB      dstStep,outPointStep,#16
11399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
11499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Loop on the groups
11599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
11699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comradix2GrpLoop\name :
11799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV      setCount,pointStep,LSR #3
11899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD1     dW,[pTwiddle],pointStep                @//[wi | wr]
11999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Loop on the sets
12299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comradix2SetLoop\name :
12599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
12799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// point0: dX0-real part dX1-img part
12899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dX0,dX1},[pSrc],pointStep
12999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// point1: dX2-real part dX3-img part
13099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VLD2    {dX2,dX3},[pSrc],step
13199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUBS    setCount,setCount,#2
13399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
13499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .ifeqs  "\inverse", "TRUE"
13599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   qT0,dX2,dW[0]
13699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   qT0,dX3,dW[1]                       @// real part
13799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   qT1,dX3,dW[0]
13899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   qT1,dX2,dW[1]                       @// imag part
13999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .else
14199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   qT0,dX2,dW[0]
14399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLS   qT0,dX3,dW[1]                       @// real part
14499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMUL   qT1,dX3,dW[0]
14599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com            VMLA   qT1,dX2,dW[1]                       @// imag part
14699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .endif
14899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
14999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dY0,dX0,qT0
15099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VSUB    dY1,dX1,qT1
15199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dY2,dX0,qT0
15299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VADD    dY3,dX1,qT1
15399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
15499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST2    {dY0,dY1},[pDst],outPointStep
15599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// dstStep = -outPointStep + 16
15699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        VST2    {dY2,dY3},[pDst],dstStep
15799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
15899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     radix2SetLoop\name
15999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUBS    grpCount,grpCount,#2
16199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        ADD     pSrc,pSrc,pointStep
16299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        BGT     radix2GrpLoop\name
16399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
16599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Reset and Swap pSrc and pDst for the next stage
16699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        MOV     pTmp,pDst
16799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pDst -= 4*size; pSrc -= 8*size bytes
16899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pDst,pSrc,outPointStep,LSL #1
16999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pSrc,pTmp,outPointStep
17099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// Reset pTwiddle for the next stage
17299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        @// pTwiddle -= 4*size bytes
17399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        SUB     pTwiddle,pTwiddle,outPointStep
17499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .endm
17799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
17999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
18199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        FFTSTAGE "FALSE","FALSE",FWD
18299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_END
18399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
18699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
18799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        FFTSTAGE "FALSE","TRUE",INV
18899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        M_END
18999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
19099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com
19199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com        .end
192