1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// 2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// 4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Use of this source code is governed by a BSD-style license 5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// that can be found in the LICENSE file in the root of the source 6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// tree. An additional intellectual property rights grant can be found 7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// in the file PATENTS. All contributing project authors may 8d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// be found in the AUTHORS file in the root of the source tree. 9d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// 101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@// This file was originally licensed as follows. It has been 1103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// relicensed with permission from the copyright holders. 12d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// 1303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 1423730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)@// 153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// File Name: armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.s 163551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// OpenMAX DL: v1.0.2 173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// Last Modified Revision: 7770 1803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// Last Modified Date: Thu, 27 Sep 2007 191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@// 20d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)@// 2203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// 23d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// 2468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)@// Description: 25d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Compute a first stage Radix 8 FFT stage for a N point complex signal 260f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)@// 270f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 28d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 29d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 30d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 3103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// Include standard headers 32d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 33d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "dl/api/arm/armCOMM_s.h" 34d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "dl/api/arm/omxtypes_s.h" 35d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 36d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Import symbols required from other files 37d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// (For example tables) 3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 39d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 40d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Set debugging level 4168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)@//DEBUG_ON SETL {TRUE} 4268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 430f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 4468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 45d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@// Guarding implementation by the processor name 4603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 47d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 48a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 49a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 50a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)@// Guarding implementation by the processor name 51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) 52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch@//Input Registers 53d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 54d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pSrc r0 55d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pDst r2 56d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define pTwiddle r1 573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define subFFTNum r6 583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define subFFTSize r7 5903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// dest buffer for the next stage (not pSrc for first stage) 603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define pPingPongBuf r5 6123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 6223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 6303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//Output Registers 643551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 653551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 6603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@//Local Scratch Registers 673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define grpSize r3 693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)@// Reuse grpSize as setCount 704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#define setCount r3 714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#define pointStep r4 7203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define outPointStep r4 733551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define setStep r8 74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define step1 r9 753551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#define step2 r10 76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define t0 r11 77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 79a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)@// Neon Registers 80a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define dXr0 D0.S32 82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define dXi0 D1.S32 8303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr1 D2.S32 8403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi1 D3.S32 8503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr2 D4.S32 8603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi2 D5.S32 8703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr3 D6.S32 8803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi3 D7.S32 8903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr4 D8.S32 9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi4 D9.S32 9103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr5 D10.S32 9203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi5 D11.S32 9303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr6 D12.S32 9403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi6 D13.S32 9503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXr7 D14.S32 9603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dXi7 D15.S32 9703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX0 Q0.S32 9803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX1 Q1.S32 9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX2 Q2.S32 10003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX3 Q3.S32 10103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX4 Q4.S32 10203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX5 Q5.S32 10303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX6 Q6.S32 10403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qX7 Q7.S32 10503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 10603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr0 D16.S32 10703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi0 D17.S32 10803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr2 D18.S32 10903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi2 D19.S32 11003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr4 D20.S32 11103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi4 D21.S32 11203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr6 D22.S32 11303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi6 D23.S32 11403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr1 D24.S32 11503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi1 D25.S32 11603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr3 D26.S32 11703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi3 D27.S32 11803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr5 D28.S32 11903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi5 D29.S32 12003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)@// reuse dXr7 and dXi7 12103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUr7 D30.S32 12203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dUi7 D31.S32 12303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU0 Q8.S32 12403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU1 Q12.S32 12503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU2 Q9.S32 12603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU3 Q13.S32 12703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU4 Q10.S32 12803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU5 Q14.S32 12903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU6 Q11.S32 13003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qU7 Q15.S32 13103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 13203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 13303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 13403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr0 D24.S32 13503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi0 D25.S32 13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr2 D26.S32 13703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi2 D27.S32 13803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr4 D28.S32 13903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi4 D29.S32 14003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr6 D30.S32 14103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi6 D31.S32 14203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr1 D16.S32 14303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi1 D17.S32 14403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr3 D18.S32 14503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi3 D19.S32 1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define dVr5 D20.S32 147010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dVi5 D21.S32 14803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVr7 D22.S32 14903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dVi7 D23.S32 15003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qV0 Q12.S32 15103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qV1 Q8.S32 152010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV2 Q13.S32 153010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV3 Q9.S32 154116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define qV4 Q14.S32 155010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qV5 Q10.S32 156cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define qV6 Q15.S32 157116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define qV7 Q11.S32 158116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 160cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 161cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dYr0 D16.S32 162cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dYi0 D17.S32 163f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#define dYr2 D18.S32 164010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi2 D19.S32 165116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define dYr4 D20.S32 16603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dYi4 D21.S32 1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define dYr6 D22.S32 168010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi6 D23.S32 169010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr1 D24.S32 170010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi1 D25.S32 171010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr3 D26.S32 172010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi3 D27.S32 173010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr5 D28.S32 174010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYi5 D29.S32 175010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define dYr7 D30.S32 17603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define dYi7 D31.S32 17703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qY0 Q8.S32 17803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)#define qY1 Q12.S32 179010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY2 Q9.S32 180010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY3 Q13.S32 181010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define qY4 Q10.S32 1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY5 Q14.S32 1831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY6 Q11.S32 1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define qY7 Q15.S32 18503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 18603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 18758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#define dT0 D14.S32 188cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define dT1 D15.S32 189cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 190cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) .macro FFTSTAGE scaled, inverse, name 191cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 192cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) @// Define stack arguments 193cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 19458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) @// Update pSubFFTSize and pSubFFTNum regs 195f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) MOVW t0, 0x799A @// Low half word of 1/sqrt(2) in Q31. 19658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) MOV subFFTSize,#8 197010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) MOVT t0, 0x5A82 @// High half word of 1/sqrt(2) in Q31. 198010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 19958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 200010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount) 201010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) LSR grpSize,subFFTNum,#3 202010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) MOV subFFTNum,grpSize 203010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 204010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 20558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) @// pT0+1 increments pT0 by 8 bytes 20658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes 20758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) @// Note: outPointStep = pointStep for firststage 20803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 20903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) MOV pointStep,grpSize,LSL #3 21003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 21103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 21203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// Calculate the step of input data for the next set 213010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) @//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep 214010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] 215010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) MOV step1,grpSize,LSL #4 21603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 217010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) MOV step2,pointStep,LSL #3 218010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 21903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) SUB step2,step2,pointStep @// step2 = 7*pointStep 22003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) RSB setStep,step2,#16 @// setStep = - 7*pointStep+16 22103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 222116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 223116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] 224116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] 225116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] 22603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] 22703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set 22803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// setStep = -7*pointStep + 16 229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch @// grp = 0 a special case since all the twiddle factors are 1 23003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// Loop on the sets 23103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 23203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)grpZeroSetLoop\name : 233010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 23403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// Decrement setcount 235010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) SUBS setCount,setCount,#2 @// decrement the set loop counter 236010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 237010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 2381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci .ifeqs "\scaled", "TRUE" 23958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) @// finish first stage of 8 point FFT 24003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 24103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qU0,qX0,qX4 24203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qU2,qX1,qX5 24303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qU4,qX2,qX6 24403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qU6,qX3,qX7 24503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 24603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// finish second stage of 8 point FFT 24703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 24803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qV0,qU0,qU4 24903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHSUB qV2,qU0,qU4 25003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qV4,qU2,qU6 25103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHSUB qV6,qU2,qU6 25203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 25303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) @// finish third stage of 8 point FFT 25403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 25503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD qY0,qV0,qV4 25603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHSUB qY4,qV0,qV4 25703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0 25803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 25903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) .ifeqs "\inverse", "TRUE" 26003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 26103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHSUB dYr2,dVr2,dVi6 26203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD dYi2,dVi2,dVr6 26303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 26403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VHADD dYr6,dVr2,dVi6 26503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2 266 VHSUB dYi6,dVi2,dVr6 267 268 VHSUB qU1,qX0,qX4 269 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4 270 271 VHSUB qU3,qX1,qX5 272 VHSUB qU5,qX2,qX6 273 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6 274 275 .else 276 277 VHADD dYr6,dVr2,dVi6 278 VHSUB dYi6,dVi2,dVr6 279 280 VHSUB dYr2,dVr2,dVi6 281 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2 282 VHADD dYi2,dVi2,dVr6 283 284 285 VHSUB qU1,qX0,qX4 286 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4 287 VHSUB qU3,qX1,qX5 288 VHSUB qU5,qX2,qX6 289 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6 290 291 292 .endif 293 294 @// finish first stage of 8 point FFT 295 296 VHSUB qU7,qX3,qX7 297 VMOV dT0[0],t0 298 299 @// finish second stage of 8 point FFT 300 301 VHSUB dVr1,dUr1,dUi5 302 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration 303 VHADD dVi1,dUi1,dUr5 304 VHADD dVr3,dUr1,dUi5 305 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 306 VHSUB dVi3,dUi1,dUr5 307 308 VHSUB dVr5,dUr3,dUi7 309 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 310 VHADD dVi5,dUi3,dUr7 311 VHADD dVr7,dUr3,dUi7 312 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] 313 VHSUB dVi7,dUi3,dUr7 314 315 @// finish third stage of 8 point FFT 316 317 .ifeqs "\inverse", "TRUE" 318 319 @// calculate a*v5 320 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 321 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] 322 VQRDMULH dVi5,dVi5,dT0[0] 323 324 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] 325 VSUB dVr5,dT1,dVi5 @// a * V5 326 VADD dVi5,dT1,dVi5 327 328 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] 329 330 @// calculate b*v7 331 VQRDMULH dT1,dVr7,dT0[0] 332 VQRDMULH dVi7,dVi7,dT0[0] 333 334 VHADD qY1,qV1,qV5 335 VHSUB qY5,qV1,qV5 336 337 338 VADD dVr7,dT1,dVi7 @// b * V7 339 VSUB dVi7,dVi7,dT1 340 SUB pDst, pDst, step2 @// set pDst to y1 341 342 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] 343 344 345 VHSUB dYr3,dVr3,dVr7 346 VHSUB dYi3,dVi3,dVi7 347 VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1 348 VHADD dYr7,dVr3,dVr7 349 VHADD dYi7,dVi3,dVi7 350 351 352 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3 353 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5 354 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 355 356 .else 357 358 @// calculate b*v7 359 VQRDMULH dT1,dVr7,dT0[0] 360 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] 361 VQRDMULH dVi7,dVi7,dT0[0] 362 363 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] 364 VADD dVr7,dT1,dVi7 @// b * V7 365 VSUB dVi7,dVi7,dT1 366 367 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] 368 369 @// calculate a*v5 370 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 371 VQRDMULH dVi5,dVi5,dT0[0] 372 373 VHADD dYr7,dVr3,dVr7 374 VHADD dYi7,dVi3,dVi7 375 SUB pDst, pDst, step2 @// set pDst to y1 376 377 VSUB dVr5,dT1,dVi5 @// a * V5 378 VADD dVi5,dT1,dVi5 379 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] 380 381 VHSUB qY5,qV1,qV5 382 383 VHSUB dYr3,dVr3,dVr7 384 VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1 385 VHSUB dYi3,dVi3,dVi7 386 VHADD qY1,qV1,qV5 387 388 389 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3 390 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5 391 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 392 393 .endif 394 395 396 397 .else 398 @// finish first stage of 8 point FFT 399 400 VADD qU0,qX0,qX4 401 VADD qU2,qX1,qX5 402 VADD qU4,qX2,qX6 403 VADD qU6,qX3,qX7 404 405 @// finish second stage of 8 point FFT 406 407 VADD qV0,qU0,qU4 408 VSUB qV2,qU0,qU4 409 VADD qV4,qU2,qU6 410 VSUB qV6,qU2,qU6 411 412 @// finish third stage of 8 point FFT 413 414 VADD qY0,qV0,qV4 415 VSUB qY4,qV0,qV4 416 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0 417 418 .ifeqs "\inverse", "TRUE" 419 420 VSUB dYr2,dVr2,dVi6 421 VADD dYi2,dVi2,dVr6 422 423 VADD dYr6,dVr2,dVi6 424 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2 425 VSUB dYi6,dVi2,dVr6 426 427 VSUB qU1,qX0,qX4 428 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4 429 430 VSUB qU3,qX1,qX5 431 VSUB qU5,qX2,qX6 432 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6 433 434 .else 435 436 VADD dYr6,dVr2,dVi6 437 VSUB dYi6,dVi2,dVr6 438 439 VSUB dYr2,dVr2,dVi6 440 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2 441 VADD dYi2,dVi2,dVr6 442 443 444 VSUB qU1,qX0,qX4 445 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4 446 VSUB qU3,qX1,qX5 447 VSUB qU5,qX2,qX6 448 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6 449 450 451 .endif 452 453 @// finish first stage of 8 point FFT 454 455 VSUB qU7,qX3,qX7 456 VMOV dT0[0],t0 457 458 @// finish second stage of 8 point FFT 459 460 VSUB dVr1,dUr1,dUi5 461 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration 462 VADD dVi1,dUi1,dUr5 463 VADD dVr3,dUr1,dUi5 464 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 465 VSUB dVi3,dUi1,dUr5 466 467 VSUB dVr5,dUr3,dUi7 468 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 469 VADD dVi5,dUi3,dUr7 470 VADD dVr7,dUr3,dUi7 471 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] 472 VSUB dVi7,dUi3,dUr7 473 474 @// finish third stage of 8 point FFT 475 476 .ifeqs "\inverse", "TRUE" 477 478 @// calculate a*v5 479 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 480 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] 481 VQRDMULH dVi5,dVi5,dT0[0] 482 483 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] 484 VSUB dVr5,dT1,dVi5 @// a * V5 485 VADD dVi5,dT1,dVi5 486 487 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] 488 489 @// calculate b*v7 490 VQRDMULH dT1,dVr7,dT0[0] 491 VQRDMULH dVi7,dVi7,dT0[0] 492 493 VADD qY1,qV1,qV5 494 VSUB qY5,qV1,qV5 495 496 497 VADD dVr7,dT1,dVi7 @// b * V7 498 VSUB dVi7,dVi7,dT1 499 SUB pDst, pDst, step2 @// set pDst to y1 500 501 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] 502 503 504 VSUB dYr3,dVr3,dVr7 505 VSUB dYi3,dVi3,dVi7 506 VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1 507 VADD dYr7,dVr3,dVr7 508 VADD dYi7,dVi3,dVi7 509 510 511 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3 512 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5 513 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 514 515 .else 516 517 @// calculate b*v7 518 VQRDMULH dT1,dVr7,dT0[0] 519 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] 520 VQRDMULH dVi7,dVi7,dT0[0] 521 522 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] 523 VADD dVr7,dT1,dVi7 @// b * V7 524 VSUB dVi7,dVi7,dT1 525 526 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] 527 528 @// calculate a*v5 529 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 530 VQRDMULH dVi5,dVi5,dT0[0] 531 532 VADD dYr7,dVr3,dVr7 533 VADD dYi7,dVi3,dVi7 534 SUB pDst, pDst, step2 @// set pDst to y1 535 536 VSUB dVr5,dT1,dVi5 @// a * V5 537 VADD dVi5,dT1,dVi5 538 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] 539 540 VSUB qY5,qV1,qV5 541 542 VSUB dYr3,dVr3,dVr7 543 VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1 544 VSUB dYi3,dVi3,dVi7 545 VADD qY1,qV1,qV5 546 547 548 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3 549 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5 550 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 551 552 .endif 553 554 555 .endif 556 557 SUB pDst, pDst, step2 @// update pDst for the next set 558 BGT grpZeroSetLoop\name 559 560 561 @// reset pSrc to pDst for the next stage 562 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize 563 MOV pDst,pPingPongBuf 564 565 566 567 .endm 568 569 570 @// Allocate stack memory required by the function 571 572 573 M_START armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4 574 FFTSTAGE "FALSE","FALSE",FWD 575 M_END 576 577 578 M_START armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4 579 FFTSTAGE "FALSE","TRUE",INV 580 M_END 581 582 583 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 584 FFTSTAGE "TRUE","FALSE",FWDSFS 585 M_END 586 587 588 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 589 FFTSTAGE "TRUE","TRUE",INVSFS 590 M_END 591 592 593 .end 594