1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12 13@// 14@// 15@// File Name: armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 6740 18@// Last Modified Date: Wed, 18 Jul 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a Radix 2 FFT stage for a N point complex signal 26@// 27@// 28 29 30@// Include standard headers 31 32#include "dl/api/arm/armCOMM_s.h" 33#include "dl/api/arm/omxtypes_s.h" 34 35 36@// Import symbols required from other files 37@// (For example tables) 38 39 40 41 42@// Set debugging level 43@//DEBUG_ON SETL {TRUE} 44 45 46 47 48@// Guarding implementation by the processor name 49 50 51@//Input Registers 52 53#define pSrc r0 54#define pDst r2 55#define pTwiddle r1 56#define subFFTNum r6 57#define subFFTSize r7 58 59 60@//Output Registers 61 62 63@//Local Scratch Registers 64 65#define outPointStep r3 66#define grpCount r4 67#define dstStep r5 68#define twStep r8 69#define pTmp r4 70 71@// Neon Registers 72 73#define dW1S32 D0.S32 74#define dW2S32 D1.S32 75#define dW1 D0.S16 76#define dW2 D1.S16 77 78#define dX0 D2.S16 79#define dX1 D3.S16 80#define dX2 D4.S16 81#define dX3 D5.S16 82#define dY0 D6.S16 83#define dY1 D7.S16 84#define dY2 D8.S16 85#define dY3 D9.S16 86#define qT0 Q5.S32 87#define qT1 Q6.S32 88 89 90 .macro FFTSTAGE scaled, inverse, name 91 92 @// Define stack arguments 93 94 95 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs 96 97 98 LSL grpCount,subFFTSize,#1 99 100 101 @// update subFFTSize for the next stage 102 MOV subFFTSize,grpCount 103 104 @// pOut0+1 increments pOut0 by 8 bytes 105 @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes 106 SMULBB outPointStep,grpCount,subFFTNum 107 MOV twStep,subFFTNum,LSL #1 108 LSR subFFTNum,subFFTNum,#1 @//grpSize 109 110 111 RSB dstStep,outPointStep,#8 112 113 114 @// Note: pointStep is 8 in this case: so need of extra reg 115 @// Loop on the groups: 2 groups at a time 116 117grpLoop\name: 118 119 VLD1 dW1S32[],[pTwiddle],twStep @//[wi | wr] 120 VLD1 dW2S32[],[pTwiddle],twStep 121 122 @// Process the sets for each grp: 2 sets at a time (no set looping required) 123 124 VLD1 dX0,[pSrc]! @// point0: of set0,set1 of grp0 125 VLD1 dX1,[pSrc]! @// point1: of set0,set1 of grp0 126 VLD1 dX2,[pSrc]! @// point0: of set0,set1 of grp1 127 VLD1 dX3,[pSrc]! @// point1: of set0,set1 of grp1 128 129 SUBS grpCount,grpCount,#4 @// decrement the loop counter 130 VUZP dW1,dW2 131 VUZP dX1,dX3 132 133 .ifeqs "\inverse", "TRUE" 134 VMULL qT0,dX1,dW1 135 VMLAL qT0,dX3,dW2 @// real part 136 VMULL qT1,dX3,dW1 137 VMLSL qT1,dX1,dW2 @// imag part 138 139 .else 140 VMULL qT0,dX1,dW1 141 VMLSL qT0,dX3,dW2 @// real part 142 VMULL qT1,dX3,dW1 143 VMLAL qT1,dX1,dW2 @// imag part 144 145 .endif 146 147 VRSHRN dX1,qT0,#15 148 VRSHRN dX3,qT1,#15 149 150 VZIP dX1,dX3 151 152 153 .ifeqs "\scaled", "TRUE" 154 155 VHSUB dY0,dX0,dX1 156 VHADD dY1,dX0,dX1 157 VHSUB dY2,dX2,dX3 158 VHADD dY3,dX2,dX3 159 160 .else 161 162 VSUB dY0,dX0,dX1 163 VADD dY1,dX0,dX1 164 VSUB dY2,dX2,dX3 165 VADD dY3,dX2,dX3 166 167 168 169 .endif 170 171 VST1 dY0,[pDst],outPointStep @// point0: of set0,set1 of grp0 172 VST1 dY1,[pDst],dstStep @// dstStep = -outPointStep + 8 173 VST1 dY2,[pDst],outPointStep @// point0: of set0,set1 of grp1 174 VST1 dY3,[pDst],dstStep @// point1: of set0,set1 of grp1 175 176 177 BGT grpLoop\name 178 179 180 @// Reset and Swap pSrc and pDst for the next stage 181 MOV pTmp,pDst 182 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes 183 SUB pSrc,pTmp,outPointStep 184 185 @// Reset pTwiddle for the next stage 186 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes 187 188 .endm 189 190 191 192 M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4 193 FFTSTAGE "FALSE","FALSE",FWD 194 M_END 195 196 197 198 M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4 199 FFTSTAGE "FALSE","TRUE",INV 200 M_END 201 202 203 204 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4 205 FFTSTAGE "TRUE","FALSE",FWDSFS 206 M_END 207 208 209 210 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4 211 FFTSTAGE "TRUE","TRUE",INVSFS 212 M_END 213 214 215 216 .end 217