14d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
24d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
34d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
44d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  Use of this source code is governed by a BSD-style license
54d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  that can be found in the LICENSE file in the root of the source
64d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  tree. An additional intellectual property rights grant can be found
74d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  in the file PATENTS.  All contributing project authors may
84d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  be found in the AUTHORS file in the root of the source tree.
94d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
104d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
114d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//  to support float instead of SC32.
124d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
134d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
144d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
154d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Description:
164d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
174d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// stage for a N point complex signal.
184d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
194d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//
204d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
214d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
224d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Include standard headers
234d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
244d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#include "dl/api/arm/arm64COMM_s.h"
254d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#include "dl/api/arm/omxtypes_s.h"
264d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
274d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
284d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Import symbols required from other files
294d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// (For example tables)
304d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
314d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
324d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
334d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
344d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Set debugging level
354d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//DEBUG_ON    SETL {TRUE}
364d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
374d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
384d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Guarding implementation by the processor name
394d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
404d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
414d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Input Registers
424d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
434d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pSrc            x0
444d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pDst            x1
454d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pTwiddle        x2
464d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define	pSubFFTNum	x3
474d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pSubFFTSize	x4
484d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
494d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
504d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Output Registers
514d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
524d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
534d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Local Scratch Registers
544d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
554d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
564d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define subFFTNum       x5
574d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define subFFTSize      x6
584d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define outPointStep    x8
594d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define grpCount        x9
604d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dstStep         x10
614d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
624d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Neon Registers
634d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
644d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dWr     v0.2s
654d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dWi     v1.2s
664d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXr0    v2.2s
674d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXi0    v3.2s
684d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXr1    v4.2s
694d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXi1    v5.2s
704d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYr0    v6.2s
714d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYi0    v7.2s
724d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYr1    v8.2s
734d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYi1    v9.2s
744d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define qT0     v10.2s
754d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define qT1     v12.2s
764d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
775d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .macro FFTSTAGE scaled, inverse, name
784d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
794d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // Move parameters into our work registers
804d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        ldr     subFFTSize, [pSubFFTSize]
814d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
824d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        lsl     outPointStep, subFFTSize, #3
834d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
844d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // Update grpCount and grpSize rightaway
854d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
864d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        MOV     subFFTNum,#1                          //after the last stage
874d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        LSL     grpCount,subFFTSize,#1
884d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
894d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // update subFFTSize for the next stage
904d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        MOV     subFFTSize,grpCount
914d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
924d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        rsb     dstStep,outPointStep,#16
934d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
944d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // Loop on 2 grps at a time for the last stage
954d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
964d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.comradix2lsGrpLoop\name :
974d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
984d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
994d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        ld2     {dWr,dWi},[pTwiddle], #16
1004d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1014d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dXr0 = [pSrc[0].Re, pSrc[2].Re]
1024d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dXi0 = [pSrc[0].Im, pSrc[2].Im]
1034d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dXr1 = [pSrc[1].Re, pSrc[3].Re]
1044d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        // dXi1 = [pSrc[1].Im, pSrc[3].Im]
1054d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        ld4     {dXr0,dXi0,dXr1,dXi1}, [pSrc], #32
1064d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1074d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        SUBS    grpCount,grpCount,#4                  // grpCount is multiplied by 2
1084d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1094d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        .ifeqs  "\inverse", "TRUE"
1104d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmul   qT0,dWr,dXr1
1114d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmla   qT0,dWi,dXi1                       // real part
1124d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmul   qT1,dWr,dXi1
1134d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmls   qT1,dWi,dXr1                       // imag part
1144d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1154d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        .else
1164d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1174d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmul   qT0,dWr,dXr1
1184d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmls   qT0,dWi,dXi1                       // real part
1194d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmul   qT1,dWr,dXi1
1204d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com            fmla   qT1,dWi,dXr1                       // imag part
1214d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1224d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        .endif
1234d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1244d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        fsub    dYr0,dXr0,qT0
1254d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        fsub    dYi0,dXi0,qT1
1264d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        fadd    dYr1,dXr0,qT0
1274d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        fadd    dYi1,dXi0,qT1
1284d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1294d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        st2     {dYr0,dYi0},[pDst],outPointStep
1304d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        st2     {dYr1,dYi1},[pDst],dstStep            // dstStep =  step = -outPointStep + 16
1314d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1324d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        BGT     radix2lsGrpLoop\name
1334d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1344d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1354d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        .endm
1364d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1374d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1384d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1394d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace,,d12
1404d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        FFTSTAGE "FALSE","FALSE",fwd
1414d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        M_END
1424d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1434d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1444d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1454d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace,,d12
1464d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        FFTSTAGE "FALSE","TRUE",inv
1474d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        M_END
1484d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com
1494d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com        .end
150