15e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
25e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
35e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
45e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  Use of this source code is governed by a BSD-style license
55e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  that can be found in the LICENSE file in the root of the source
65e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  tree. An additional intellectual property rights grant can be found
75e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  in the file PATENTS.  All contributing project authors may
85e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
95e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
105e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  This file was originally licensed as follows. It has been
115e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  relicensed with permission from the copyright holders.
125e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
135e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
145e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
155e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// File Name:  armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
165e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// OpenMAX DL: v1.0.2
175e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Last Modified Revision:   5892
185e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Last Modified Date:       Thu, 07 Jun 2007
195e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
205e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
215e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
225e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
235e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
245e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Description:
255e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Compute a Radix 2 FFT stage for a N point complex signal
265e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
275e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
285e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
295e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
305e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Include standard headers
315e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
32bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
33bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
345e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
355e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
365e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Import symbols required from other files
375e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// (For example tables)
385e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
395e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
405e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
415e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Set debugging level
425e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//DEBUG_ON    SETL {TRUE}
435e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
445e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
455e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
465e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Guarding implementation by the processor name
475e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
485e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
495e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
505e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
515e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com    @// Guarding implementation by the processor name
525e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
535e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
545e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Input Registers
555e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
565e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pSrc                            r0
575e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pDst                            r2
585e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pTwiddle                        r1
595e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define subFFTNum                       r6
605e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define subFFTSize                      r7
615e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
625e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
635e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Output Registers
645e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
655e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
665e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Local Scratch Registers
675e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
685e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define outPointStep                    r3
695e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pointStep                       r4
705e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define grpCount                        r5
715e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define setCount                        r8
725e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define step                            r10
735e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dstStep                         r11
745e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pTmp                            r9
755e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
765e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Neon Registers
775e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
785e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dW                              D0.S16
795e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX0                             D2.S16
805e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX1                             D3.S16
815e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX2                             D4.S16
825e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX3                             D5.S16
835e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY0                             D6.S16
845e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY1                             D7.S16
855e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY2                             D8.S16
865e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY3                             D9.S16
875e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define qT0                             Q3.S32
885e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define qT1                             Q4.S32
895e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
905e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
915e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
925d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .macro FFTSTAGE scaled, inverse, name
935e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
945e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Define stack arguments
955e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
965e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
975e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
985e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
995e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        LSR     subFFTNum,subFFTNum,#1                      @//grpSize
100a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        LSL     grpCount,subFFTSize,#1
1015e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1025e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1035e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pT0+1 increments pT0 by 8 bytes
1045e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
105a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     pointStep,subFFTNum,LSL #1
1065e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1075e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// update subFFTSize for the next stage
108a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     subFFTSize,grpCount
1095e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1105e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pOut0+1 increments pOut0 by 8 bytes
1115e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
1125e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SMULBB  outPointStep,grpCount,pointStep
1135e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        LSL     pointStep,pointStep,#1
1145e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1155e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
116a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        RSB      step,pointStep,#16
117a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        RSB      dstStep,outPointStep,#16
118a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
1195e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Loop on the groups
1205e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1215e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.comgrpLoop\name:
1225e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1235e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1     dW,[pTwiddle],pointStep                @//[wi | wr]
124a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV      setCount,pointStep,LSR #2
1255e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1265e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1275e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Loop on the sets: 4 at a time
1285e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1295e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1305e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.comsetLoop\name:
1315e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1325e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1335e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD2    {dX0,dX1},[pSrc],pointStep            @// point0: dX0-real part dX1-img part
1345e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD2    {dX2,dX3},[pSrc],step                 @// point1: dX2-real part dX3-img part
1355e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1365e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUBS    setCount,setCount,#4
1375e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1385e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ifeqs  "\inverse", "TRUE"
139a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT0,dX2,dW[0]
1405e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLAL   qT0,dX3,dW[1]                       @// real part
141a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT1,dX3,dW[0]
1425e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLSL   qT1,dX2,dW[1]                       @// imag part
1435e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1445d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .else
1455e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
146a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT0,dX2,dW[0]
1475e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLSL   qT0,dX3,dW[1]                       @// real part
148a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT1,dX3,dW[0]
1495e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLAL   qT1,dX2,dW[1]                       @// imag part
1505e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1515d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .endif
1525e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
153a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VRSHRN  dX2,qT0,#15
154a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VRSHRN  dX3,qT1,#15
1555e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1565e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ifeqs "\scaled", "TRUE"
157a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHSUB    dY0,dX0,dX2
158a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHSUB    dY1,dX1,dX3
159a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHADD    dY2,dX0,dX2
160a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHADD    dY3,dX1,dX3
1615e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1625d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .else
163a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VSUB    dY0,dX0,dX2
164a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VSUB    dY1,dX1,dX3
165a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VADD    dY2,dX0,dX2
166a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VADD    dY3,dX1,dX3
1675e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1685d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com        .endif
1695e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
170a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VST2    {dY0,dY1},[pDst],outPointStep
1715e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VST2    {dY2,dY3},[pDst],dstStep              @// dstStep = -outPointStep + 16
1725e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1735e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        BGT     setLoop\name
1745e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1755e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUBS    grpCount,grpCount,#2
176a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        ADD     pSrc,pSrc,pointStep
1775e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        BGT     grpLoop\name
1785e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1795e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1805e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Reset and Swap pSrc and pDst for the next stage
181a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     pTmp,pDst
1825e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
183a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        SUB     pSrc,pTmp,outPointStep
1845e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1855e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Reset pTwiddle for the next stage
1865e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
1875e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1885e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1895e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .endm
1905e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1915e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1925e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
193a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
1945e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "FALSE","FALSE",FWD
195a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
196a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
1975e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1985e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
199a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
2005e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "FALSE","TRUE",INV
201a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
2025e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2035e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2045e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
205a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
2065e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "TRUE","FALSE",FWDSFS
207a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
208a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
2095e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2105e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
211a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
2125e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "TRUE","TRUE",INVSFS
213a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
214a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
215a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
2165e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2175e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2185e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2195d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com    .end
220