15e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
25e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
35e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
45e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  Use of this source code is governed by a BSD-style license
55e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  that can be found in the LICENSE file in the root of the source
65e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  tree. An additional intellectual property rights grant can be found
75e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  in the file PATENTS.  All contributing project authors may
85e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
95e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
105e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  This file was originally licensed as follows. It has been
115e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//  relicensed with permission from the copyright holders.
125e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
135e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
145e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
155e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// File Name:  armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
165e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// OpenMAX DL: v1.0.2
175e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Last Modified Revision:   6740
185e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Last Modified Date:       Wed, 18 Jul 2007
195e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
205e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
215e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
225e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
235e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
245e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Description:
255e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Compute a Radix 2 FFT stage for a N point complex signal
265e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
275e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//
285e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
295e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
305e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Include standard headers
315e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
32bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
33bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
345e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
355e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
365e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Import symbols required from other files
375e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// (For example tables)
385e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
395e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
405e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
415e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
425e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Set debugging level
435e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//DEBUG_ON    SETL {TRUE}
445e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
455e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
465e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
475e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
485e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Guarding implementation by the processor name
495e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
505e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
515e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Input Registers
525e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
535e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pSrc                            r0
545e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pDst                            r2
555e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pTwiddle                        r1
565e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define subFFTNum                       r6
575e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define subFFTSize                      r7
585e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
595e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
605e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Output Registers
615e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
625e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
635e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@//Local Scratch Registers
645e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
655e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define outPointStep                    r3
665e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define grpCount                        r4
675e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dstStep                         r5
685e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define twStep                          r8
695e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define pTmp                            r4
705e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
715e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com@// Neon Registers
725e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
735e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dW1S32                          D0.S32
745e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dW2S32                          D1.S32
755e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dW1                             D0.S16
765e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dW2                             D1.S16
775e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
785e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX0                             D2.S16
795e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX1                             D3.S16
805e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX2                             D4.S16
815e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dX3                             D5.S16
825e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY0                             D6.S16
835e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY1                             D7.S16
845e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY2                             D8.S16
855e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define dY3                             D9.S16
865e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define qT0                             Q5.S32
875e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com#define qT1                             Q6.S32
885e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
895e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
905e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .MACRO FFTSTAGE scaled, inverse, name
915e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
925e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Define stack arguments
935e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
945e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
955e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
965e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
975e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
98a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        LSL     grpCount,subFFTSize,#1
995e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1005e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1015e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// update subFFTSize for the next stage
102a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     subFFTSize,grpCount
1035e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1045e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pOut0+1 increments pOut0 by 8 bytes
1055e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
1065e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SMULBB  outPointStep,grpCount,subFFTNum
107a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     twStep,subFFTNum,LSL #1
1085e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        LSR     subFFTNum,subFFTNum,#1                      @//grpSize
1095e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1105e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
111a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        RSB      dstStep,outPointStep,#8
1125e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1135e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1145e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Note: pointStep is 8 in this case: so need of extra reg
1155e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Loop on the groups: 2 groups at a time
1165e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1175e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.comgrpLoop\name:
1185e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1195e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1     dW1S32[],[pTwiddle],twStep                @//[wi | wr]
120a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VLD1     dW2S32[],[pTwiddle],twStep
1215e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1225e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Process the sets for each grp:  2 sets at a time (no set looping required)
1235e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1245e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1    dX0,[pSrc]!            @// point0: of set0,set1 of grp0
1255e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1    dX1,[pSrc]!            @// point1: of set0,set1 of grp0
1265e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1    dX2,[pSrc]!            @// point0: of set0,set1 of grp1
1275e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VLD1    dX3,[pSrc]!            @// point1: of set0,set1 of grp1
1285e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1295e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUBS    grpCount,grpCount,#4              @// decrement the loop counter
130a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VUZP    dW1,dW2
131a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VUZP    dX1,dX3
1325e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1335e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ifeqs  "\inverse", "TRUE"
134a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT0,dX1,dW1
1355e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLAL   qT0,dX3,dW2                       @// real part
136a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT1,dX3,dW1
1375e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLSL   qT1,dX1,dW2                       @// imag part
1385e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1395e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ELSE
140a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT0,dX1,dW1
1415e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLSL   qT0,dX3,dW2                       @// real part
142a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VMULL   qT1,dX3,dW1
1435e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com            VMLAL   qT1,dX1,dW2                       @// imag part
1445e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1455e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ENDIF
1465e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
147a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VRSHRN  dX1,qT0,#15
148a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VRSHRN  dX3,qT1,#15
1495e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
150a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        VZIP    dX1,dX3
1515e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1525e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1535e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ifeqs "\scaled", "TRUE"
1545e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
155a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHSUB    dY0,dX0,dX1
156a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHADD    dY1,dX0,dX1
157a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHSUB    dY2,dX2,dX3
158a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VHADD    dY3,dX2,dX3
1595e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1605e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ELSE
1615e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
162a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VSUB    dY0,dX0,dX1
163a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VADD    dY1,dX0,dX1
164a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VSUB    dY2,dX2,dX3
165a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com            VADD    dY3,dX2,dX3
1665e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1675e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1685e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1695e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .ENDIF
1705e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1715e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VST1    dY0,[pDst],outPointStep             @// point0: of set0,set1 of grp0
1725e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VST1    dY1,[pDst],dstStep                  @// dstStep = -outPointStep + 8
1735e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VST1    dY2,[pDst],outPointStep             @// point0: of set0,set1 of grp1
1745e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        VST1    dY3,[pDst],dstStep                  @// point1: of set0,set1 of grp1
1755e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1765e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1775e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        BGT     grpLoop\name
1785e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1795e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1805e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Reset and Swap pSrc and pDst for the next stage
181a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        MOV     pTmp,pDst
1825e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
183a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        SUB     pSrc,pTmp,outPointStep
1845e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1855e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        @// Reset pTwiddle for the next stage
1865e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
1875e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1885e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        .endm
1895e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1905e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1915e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
192a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
1935e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "FALSE","FALSE",FWD
194a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
195a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
1965e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
1975e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
198a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
1995e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "FALSE","TRUE",INV
200a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
2015e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2025e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2035e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
204a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
2055e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "TRUE","FALSE",FWDSFS
206a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
207a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
2085e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2095e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
210a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
2115e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com        FFTSTAGE "TRUE","TRUE",INVSFS
212a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com        M_END
213a2c02e342750946639b77d1a42f9a9da85a84b82rtoy@google.com
2145e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2155e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com
2165e49351d609862d862bd3d3b8dbd133205079b84rtoy@google.com    .END
217