1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//
2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//
4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  Use of this source code is governed by a BSD-style license
5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  that can be found in the LICENSE file in the root of the source
6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  tree. An additional intellectual property rights grant can be found
7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  in the file PATENTS.  All contributing project authors may
8d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//  be found in the AUTHORS file in the root of the source tree.
9d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@//  This file was originally licensed as follows. It has been
1103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//  relicensed with permission from the copyright holders.
12d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//
1303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
1423730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)@//
153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// File Name:  armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.s
163551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// OpenMAX DL: v1.0.2
173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// Last Modified Revision:   7770
1803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// Last Modified Date:       Thu, 27 Sep 2007
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@//
20d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)@//
2203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//
23d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//
2468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)@// Description:
25d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Compute a first stage Radix 8 FFT stage for a N point complex signal
260f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)@//
270f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
28d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
29d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
30d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
3103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// Include standard headers
32d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
33d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "dl/api/arm/armCOMM_s.h"
34d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "dl/api/arm/omxtypes_s.h"
35d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
36d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Import symbols required from other files
37d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// (For example tables)
3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
39d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
40d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Set debugging level
4168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)@//DEBUG_ON    SETL {TRUE}
4268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
430f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
4468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
45d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Guarding implementation by the processor name
4603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
47d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
48a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
49a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
50a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)@// Guarding implementation by the processor name
51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//Input Registers
53d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
54d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pSrc		r0
55d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pDst		r2
56d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pTwiddle	r1
573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define subFFTNum	r6
583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define subFFTSize	r7
5903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// dest buffer for the next stage (not pSrc for first stage)
603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define pPingPongBuf	r5
6123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
6223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
6303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//Output Registers
643551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
653551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
6603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//Local Scratch Registers
673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define grpSize		r3
693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// Reuse grpSize as setCount
704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#define setCount	r3
714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#define pointStep	r4
7203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define outPointStep	r4
733551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define setStep		r8
74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define step1		r9
753551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define step2		r10
76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define t0		r11
77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
79a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)@// Neon Registers
80a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define dXr0	D0.S32
82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define dXi0	D1.S32
8303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr1	D2.S32
8403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi1	D3.S32
8503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr2	D4.S32
8603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi2	D5.S32
8703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr3	D6.S32
8803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi3	D7.S32
8903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr4	D8.S32
9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi4	D9.S32
9103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr5	D10.S32
9203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi5	D11.S32
9303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr6	D12.S32
9403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi6	D13.S32
9503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr7	D14.S32
9603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi7	D15.S32
9703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX0	Q0.S32
9803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX1	Q1.S32
9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX2	Q2.S32
10003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX3	Q3.S32
10103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX4	Q4.S32
10203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX5	Q5.S32
10303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX6	Q6.S32
10403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX7	Q7.S32
10503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
10603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr0	D16.S32
10703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi0	D17.S32
10803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr2	D18.S32
10903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi2	D19.S32
11003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr4	D20.S32
11103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi4	D21.S32
11203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr6	D22.S32
11303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi6	D23.S32
11403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr1	D24.S32
11503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi1	D25.S32
11603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr3	D26.S32
11703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi3	D27.S32
11803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr5	D28.S32
11903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi5	D29.S32
12003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// reuse dXr7 and dXi7
12103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr7	D30.S32
12203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi7	D31.S32
12303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU0	Q8.S32
12403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU1	Q12.S32
12503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU2	Q9.S32
12603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU3	Q13.S32
12703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU4	Q10.S32
12803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU5	Q14.S32
12903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU6	Q11.S32
13003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU7	Q15.S32
13103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr0	D24.S32
13503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi0	D25.S32
13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr2	D26.S32
13703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi2	D27.S32
13803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr4	D28.S32
13903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi4	D29.S32
14003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr6	D30.S32
14103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi6	D31.S32
14203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr1	D16.S32
14303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi1	D17.S32
14403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr3	D18.S32
14503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi3	D19.S32
1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define dVr5	D20.S32
147010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dVi5	D21.S32
14803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr7	D22.S32
14903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi7	D23.S32
15003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qV0	Q12.S32
15103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qV1	Q8.S32
152010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV2	Q13.S32
153010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV3	Q9.S32
154116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define qV4	Q14.S32
155010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV5	Q10.S32
156cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define qV6	Q15.S32
157116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define qV7	Q11.S32
158116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
160cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
161cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dYr0	D16.S32
162cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dYi0	D17.S32
163f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#define dYr2	D18.S32
164010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi2	D19.S32
165116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define dYr4	D20.S32
16603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dYi4	D21.S32
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define dYr6	D22.S32
168010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi6	D23.S32
169010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr1	D24.S32
170010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi1	D25.S32
171010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr3	D26.S32
172010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi3	D27.S32
173010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr5	D28.S32
174010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi5	D29.S32
175010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr7	D30.S32
17603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dYi7	D31.S32
17703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qY0	Q8.S32
17803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qY1	Q12.S32
179010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY2	Q9.S32
180010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY3	Q13.S32
181010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY4	Q10.S32
1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY5	Q14.S32
1831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY6	Q11.S32
1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY7	Q15.S32
18503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#define dT0	D14.S32
188cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dT1	D15.S32
189cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
190cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        .macro FFTSTAGE scaled, inverse, name
191cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
192cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        @// Define stack arguments
193cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
19458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        @// Update pSubFFTSize and pSubFFTNum regs
195f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        MOVW    t0, 0x799A               @// Low half word of 1/sqrt(2) in Q31.
19658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        MOV     subFFTSize,#8
197010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        MOVT    t0, 0x5A82               @// High half word of 1/sqrt(2) in Q31.
198010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
19958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
200010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
201010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        LSR     grpSize,subFFTNum,#3
202010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        MOV     subFFTNum,grpSize
203010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
204010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
20558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        @// pT0+1 increments pT0 by 8 bytes
20658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
20758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)        @// Note: outPointStep = pointStep for firststage
20803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
20903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        MOV     pointStep,grpSize,LSL #3
21003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        @// Calculate the step of input data for the next set
213010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        @//MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
214010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
215010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        MOV     step1,grpSize,LSL #4
21603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
217010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        MOV     step2,pointStep,LSL #3
218010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
21903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        SUB     step2,step2,pointStep                          @// step2 = 7*pointStep
22003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        RSB     setStep,step2,#16                              @// setStep = - 7*pointStep+16
22103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
222116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
223116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
224116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
225116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
22603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
22703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
22803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                                                      @//  setStep = -7*pointStep + 16
229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        @// grp = 0 a special case since all the twiddle factors are 1
23003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        @// Loop on the sets
23103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
23203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)grpZeroSetLoop\name :
233010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
23403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        @// Decrement setcount
235010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        SUBS    setCount,setCount,#2                    @// decrement the set loop counter
236010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
237010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
2381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        .ifeqs	"\scaled", "TRUE"
23958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)            @// finish first stage of 8 point FFT
24003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
24103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qU0,qX0,qX4
24203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qU2,qX1,qX5
24303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qU4,qX2,qX6
24403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qU6,qX3,qX7
24503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
24603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            @// finish second stage of 8 point FFT
24703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
24803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qV0,qU0,qU4
24903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHSUB    qV2,qU0,qU4
25003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qV4,qU2,qU6
25103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHSUB    qV6,qU2,qU6
25203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
25303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            @// finish third stage of 8 point FFT
25403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
25503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHADD    qY0,qV0,qV4
25603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VHSUB    qY4,qV0,qV4
25703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
25803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
25903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)            .ifeqs	"\inverse", "TRUE"
26003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
26103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                VHSUB    dYr2,dVr2,dVi6
26203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                VHADD    dYi2,dVi2,dVr6
26303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
26403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                VHADD    dYr6,dVr2,dVi6
26503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
266                VHSUB    dYi6,dVi2,dVr6
267
268                VHSUB    qU1,qX0,qX4
269                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
270
271                VHSUB    qU3,qX1,qX5
272                VHSUB    qU5,qX2,qX6
273                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
274
275            .else
276
277                VHADD    dYr6,dVr2,dVi6
278                VHSUB    dYi6,dVi2,dVr6
279
280                VHSUB    dYr2,dVr2,dVi6
281                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
282                VHADD    dYi2,dVi2,dVr6
283
284
285                VHSUB    qU1,qX0,qX4
286                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
287                VHSUB    qU3,qX1,qX5
288                VHSUB    qU5,qX2,qX6
289                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
290
291
292            .endif
293
294            @// finish first stage of 8 point FFT
295
296            VHSUB    qU7,qX3,qX7
297            VMOV    dT0[0],t0
298
299            @// finish second stage of 8 point FFT
300
301            VHSUB    dVr1,dUr1,dUi5
302            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
303            VHADD    dVi1,dUi1,dUr5
304            VHADD    dVr3,dUr1,dUi5
305            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
306            VHSUB    dVi3,dUi1,dUr5
307
308            VHSUB    dVr5,dUr3,dUi7
309            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
310            VHADD    dVi5,dUi3,dUr7
311            VHADD    dVr7,dUr3,dUi7
312            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
313            VHSUB    dVi7,dUi3,dUr7
314
315            @// finish third stage of 8 point FFT
316
317            .ifeqs	"\inverse", "TRUE"
318
319                @// calculate a*v5
320                VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
321                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
322                VQRDMULH    dVi5,dVi5,dT0[0]
323
324                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
325                VSUB    dVr5,dT1,dVi5                               @// a * V5
326                VADD    dVi5,dT1,dVi5
327
328                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
329
330                @// calculate  b*v7
331                VQRDMULH    dT1,dVr7,dT0[0]
332                VQRDMULH    dVi7,dVi7,dT0[0]
333
334                VHADD    qY1,qV1,qV5
335                VHSUB    qY5,qV1,qV5
336
337
338                VADD    dVr7,dT1,dVi7                               @// b * V7
339                VSUB    dVi7,dVi7,dT1
340                SUB     pDst, pDst, step2                           @// set pDst to y1
341
342                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
343
344
345                VHSUB    dYr3,dVr3,dVr7
346                VHSUB    dYi3,dVi3,dVi7
347                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
348                VHADD    dYr7,dVr3,dVr7
349                VHADD    dYi7,dVi3,dVi7
350
351
352                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
353                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
354                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
355
356            .else
357
358                @// calculate  b*v7
359                VQRDMULH    dT1,dVr7,dT0[0]
360                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
361                VQRDMULH    dVi7,dVi7,dT0[0]
362
363                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
364                VADD    dVr7,dT1,dVi7                               @// b * V7
365                VSUB    dVi7,dVi7,dT1
366
367                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
368
369                @// calculate a*v5
370                VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
371                VQRDMULH    dVi5,dVi5,dT0[0]
372
373                VHADD    dYr7,dVr3,dVr7
374                VHADD    dYi7,dVi3,dVi7
375                SUB     pDst, pDst, step2                           @// set pDst to y1
376
377                VSUB    dVr5,dT1,dVi5                               @// a * V5
378                VADD    dVi5,dT1,dVi5
379                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
380
381                VHSUB    qY5,qV1,qV5
382
383                VHSUB    dYr3,dVr3,dVr7
384                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
385                VHSUB    dYi3,dVi3,dVi7
386                VHADD    qY1,qV1,qV5
387
388
389                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
390                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
391                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
392
393            .endif
394
395
396
397        .else
398            @// finish first stage of 8 point FFT
399
400            VADD    qU0,qX0,qX4
401            VADD    qU2,qX1,qX5
402            VADD    qU4,qX2,qX6
403            VADD    qU6,qX3,qX7
404
405            @// finish second stage of 8 point FFT
406
407            VADD    qV0,qU0,qU4
408            VSUB    qV2,qU0,qU4
409            VADD    qV4,qU2,qU6
410            VSUB    qV6,qU2,qU6
411
412            @// finish third stage of 8 point FFT
413
414            VADD    qY0,qV0,qV4
415            VSUB    qY4,qV0,qV4
416            VST2    {dYr0,dYi0},[pDst :128],step1                    @// store y0
417
418            .ifeqs	"\inverse", "TRUE"
419
420                VSUB    dYr2,dVr2,dVi6
421                VADD    dYi2,dVi2,dVr6
422
423                VADD    dYr6,dVr2,dVi6
424                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y2
425                VSUB    dYi6,dVi2,dVr6
426
427                VSUB    qU1,qX0,qX4
428                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
429
430                VSUB    qU3,qX1,qX5
431                VSUB    qU5,qX2,qX6
432                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y6
433
434            .else
435
436                VADD    dYr6,dVr2,dVi6
437                VSUB    dYi6,dVi2,dVr6
438
439                VSUB    dYr2,dVr2,dVi6
440                VST2    {dYr6,dYi6},[pDst :128],step1                    @// store y2
441                VADD    dYi2,dVi2,dVr6
442
443
444                VSUB    qU1,qX0,qX4
445                VST2    {dYr4,dYi4},[pDst :128],step1                    @// store y4
446                VSUB    qU3,qX1,qX5
447                VSUB    qU5,qX2,qX6
448                VST2    {dYr2,dYi2},[pDst :128],step1                    @// store y6
449
450
451            .endif
452
453            @// finish first stage of 8 point FFT
454
455            VSUB    qU7,qX3,qX7
456            VMOV    dT0[0],t0
457
458            @// finish second stage of 8 point FFT
459
460            VSUB    dVr1,dUr1,dUi5
461            VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] for next iteration
462            VADD    dVi1,dUi1,dUr5
463            VADD    dVr3,dUr1,dUi5
464            VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
465            VSUB    dVi3,dUi1,dUr5
466
467            VSUB    dVr5,dUr3,dUi7
468            VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
469            VADD    dVi5,dUi3,dUr7
470            VADD    dVr7,dUr3,dUi7
471            VLD2    {dXr3,dXi3},[pSrc :128],pointStep          @//  data[3]
472            VSUB    dVi7,dUi3,dUr7
473
474            @// finish third stage of 8 point FFT
475
476            .ifeqs	"\inverse", "TRUE"
477
478                @// calculate a*v5
479                VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
480                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
481                VQRDMULH    dVi5,dVi5,dT0[0]
482
483                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
484                VSUB    dVr5,dT1,dVi5                               @// a * V5
485                VADD    dVi5,dT1,dVi5
486
487                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
488
489                @// calculate  b*v7
490                VQRDMULH    dT1,dVr7,dT0[0]
491                VQRDMULH    dVi7,dVi7,dT0[0]
492
493                VADD    qY1,qV1,qV5
494                VSUB    qY5,qV1,qV5
495
496
497                VADD    dVr7,dT1,dVi7                               @// b * V7
498                VSUB    dVi7,dVi7,dT1
499                SUB     pDst, pDst, step2                           @// set pDst to y1
500
501                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
502
503
504                VSUB    dYr3,dVr3,dVr7
505                VSUB    dYi3,dVi3,dVi7
506                VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
507                VADD    dYr7,dVr3,dVr7
508                VADD    dYi7,dVi3,dVi7
509
510
511                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y3
512                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y5
513                VST2    {dYr7,dYi7},[pDst :128]!                      @// store y7
514
515            .else
516
517                @// calculate  b*v7
518                VQRDMULH    dT1,dVr7,dT0[0]
519                VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
520                VQRDMULH    dVi7,dVi7,dT0[0]
521
522                VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
523                VADD    dVr7,dT1,dVi7                               @// b * V7
524                VSUB    dVi7,dVi7,dT1
525
526                VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
527
528                @// calculate a*v5
529                VQRDMULH    dT1,dVr5,dT0[0]                         @// use dVi0 for dT1
530                VQRDMULH    dVi5,dVi5,dT0[0]
531
532                VADD    dYr7,dVr3,dVr7
533                VADD    dYi7,dVi3,dVi7
534                SUB     pDst, pDst, step2                           @// set pDst to y1
535
536                VSUB    dVr5,dT1,dVi5                               @// a * V5
537                VADD    dVi5,dT1,dVi5
538                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
539
540                VSUB    qY5,qV1,qV5
541
542                VSUB    dYr3,dVr3,dVr7
543                VST2    {dYr7,dYi7},[pDst :128],step1                    @// store y1
544                VSUB    dYi3,dVi3,dVi7
545                VADD    qY1,qV1,qV5
546
547
548                VST2    {dYr5,dYi5},[pDst :128],step1                    @// store y3
549                VST2    {dYr3,dYi3},[pDst :128],step1                    @// store y5
550                VST2    {dYr1,dYi1},[pDst :128]!                      @// store y7
551
552            .endif
553
554
555        .endif
556
557        SUB     pDst, pDst, step2                               @// update pDst for the next set
558        BGT     grpZeroSetLoop\name
559
560
561        @// reset pSrc to pDst for the next stage
562        SUB     pSrc,pDst,pointStep                             @// pDst -= 2*grpSize
563        MOV     pDst,pPingPongBuf
564
565
566
567        .endm
568
569
570        @// Allocate stack memory required by the function
571
572
573        M_START armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
574            FFTSTAGE "FALSE","FALSE",FWD
575        M_END
576
577
578        M_START armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
579            FFTSTAGE "FALSE","TRUE",INV
580        M_END
581
582
583        M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
584            FFTSTAGE "TRUE","FALSE",FWDSFS
585        M_END
586
587
588        M_START armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
589            FFTSTAGE "TRUE","TRUE",INVSFS
590        M_END
591
592
593	.end
594