1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12@// 13 14@// 15@// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 7767 18@// Last Modified Date: Thu, 27 Sep 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a first stage Radix 4 FFT stage for a N point complex signal 26@// 27 28 29 30@// Include standard headers 31 32#include "dl/api/arm/armCOMM_s.h" 33#include "dl/api/arm/omxtypes_s.h" 34 35@// Import symbols required from other files 36@// (For example tables) 37 38 39 40 41@// Set debugging level 42@//DEBUG_ON SETL {TRUE} 43 44 45 46@// Guarding implementation by the processor name 47 48 49 50@// Guarding implementation by the processor name 51 52 53@//Input Registers 54 55#define pSrc r0 56#define pDst r2 57#define pTwiddle r1 58#define pPingPongBuf r5 59#define subFFTNum r6 60#define subFFTSize r7 61 62 63@//Output Registers 64 65 66@//Local Scratch Registers 67 68#define grpSize r3 69@// Reuse grpSize as setCount 70#define setCount r3 71#define pointStep r4 72#define outPointStep r4 73#define setStep r8 74#define step1 r9 75#define step3 r10 76 77@// Neon Registers 78 79#define dXr0 D0.S32 80#define dXi0 D1.S32 81#define dXr1 D2.S32 82#define dXi1 D3.S32 83#define dXr2 D4.S32 84#define dXi2 D5.S32 85#define dXr3 D6.S32 86#define dXi3 D7.S32 87#define dYr0 D8.S32 88#define dYi0 D9.S32 89#define dYr1 D10.S32 90#define dYi1 D11.S32 91#define dYr2 D12.S32 92#define dYi2 D13.S32 93#define dYr3 D14.S32 94#define dYi3 D15.S32 95#define qX0 Q0.S32 96#define qX1 Q1.S32 97#define qX2 Q2.S32 98#define qX3 Q3.S32 99#define qY0 Q4.S32 100#define qY1 Q5.S32 101#define qY2 Q6.S32 102#define qY3 Q7.S32 103#define dZr0 D16.S32 104#define dZi0 D17.S32 105#define dZr1 D18.S32 106#define dZi1 D19.S32 107#define dZr2 D20.S32 108#define dZi2 D21.S32 109#define dZr3 D22.S32 110#define dZi3 D23.S32 111#define qZ0 Q8.S32 112#define qZ1 Q9.S32 113#define qZ2 Q10.S32 114#define qZ3 Q11.S32 115 116 117 .macro FFTSTAGE scaled, inverse, name 118 119 @// Define stack arguments 120 121 @// pT0+1 increments pT0 by 8 bytes 122 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes 123 @// Note: outPointStep = pointStep for firststage 124 125 MOV pointStep,subFFTNum,LSL #1 126 127 128 @// Update pSubFFTSize and pSubFFTNum regs 129 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] 130 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage 131 132 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) 133 LSR grpSize,subFFTNum,#2 134 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 135 MOV subFFTNum,grpSize 136 137 138 @// Calculate the step of input data for the next set 139 @//MOV setStep,pointStep,LSL #1 140 MOV setStep,grpSize,LSL #4 141 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 142 ADD setStep,setStep,pointStep @// setStep = 3*pointStep 143 RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16 144 145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 146 MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep 147 148 .ifeqs "\scaled", "TRUE" 149 VHADD qY0,qX0,qX2 150 .else 151 VADD qY0,qX0,qX2 152 .endif 153 154 RSB step3,pointStep,#0 @// step3 = -pointStep 155 156 @// grp = 0 a special case since all the twiddle factors are 1 157 @// Loop on the sets : 2 sets at a time 158 159grpZeroSetLoop\name : 160 161 162 163 @// Decrement setcount 164 SUBS setCount,setCount,#2 @// decrement the set loop counter 165 166 .ifeqs "\scaled", "TRUE" 167 168 @// finish first stage of 4 point FFT 169 170 VHSUB qY2,qX0,qX2 171 172 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 173 VHADD qY1,qX1,qX3 174 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2] 175 VHSUB qY3,qX1,qX3 176 177 178 @// finish second stage of 4 point FFT 179 180 .ifeqs "\inverse", "TRUE" 181 182 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 183 VHADD qZ0,qY0,qY1 184 185 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 186 VHSUB dZr3,dYr2,dYi3 187 188 VST2 {dZr0,dZi0},[pDst :128],outPointStep 189 VHADD dZi3,dYi2,dYr3 190 191 VHSUB qZ1,qY0,qY1 192 VST2 {dZr3,dZi3},[pDst :128],outPointStep 193 194 VHADD dZr2,dYr2,dYi3 195 VST2 {dZr1,dZi1},[pDst :128],outPointStep 196 VHSUB dZi2,dYi2,dYr3 197 198 VHADD qY0,qX0,qX2 @// u0 for next iteration 199 VST2 {dZr2,dZi2},[pDst :128],setStep 200 201 202 .else 203 204 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 205 VHADD qZ0,qY0,qY1 206 207 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 208 VHADD dZr2,dYr2,dYi3 209 210 VST2 {dZr0,dZi0},[pDst :128],outPointStep 211 VHSUB dZi2,dYi2,dYr3 212 213 VHSUB qZ1,qY0,qY1 214 VST2 {dZr2,dZi2},[pDst :128],outPointStep 215 216 VHSUB dZr3,dYr2,dYi3 217 VST2 {dZr1,dZi1},[pDst :128],outPointStep 218 VHADD dZi3,dYi2,dYr3 219 220 VHADD qY0,qX0,qX2 @// u0 for next iteration 221 VST2 {dZr3,dZi3},[pDst :128],setStep 222 223 .endif 224 225 226 227 .else 228 229 @// finish first stage of 4 point FFT 230 231 232 VSUB qY2,qX0,qX2 233 234 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 235 VADD qY1,qX1,qX3 236 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2] 237 VSUB qY3,qX1,qX3 238 239 240 @// finish second stage of 4 point FFT 241 242 .ifeqs "\inverse", "TRUE" 243 244 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 245 VADD qZ0,qY0,qY1 246 247 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 248 VSUB dZr3,dYr2,dYi3 249 250 VST2 {dZr0,dZi0},[pDst :128],outPointStep 251 VADD dZi3,dYi2,dYr3 252 253 VSUB qZ1,qY0,qY1 254 VST2 {dZr3,dZi3},[pDst :128],outPointStep 255 256 VADD dZr2,dYr2,dYi3 257 VST2 {dZr1,dZi1},[pDst :128],outPointStep 258 VSUB dZi2,dYi2,dYr3 259 260 VADD qY0,qX0,qX2 @// u0 for next iteration 261 VST2 {dZr2,dZi2},[pDst :128],setStep 262 263 264 .else 265 266 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 267 VADD qZ0,qY0,qY1 268 269 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 270 VADD dZr2,dYr2,dYi3 271 272 VST2 {dZr0,dZi0},[pDst :128],outPointStep 273 VSUB dZi2,dYi2,dYr3 274 275 VSUB qZ1,qY0,qY1 276 VST2 {dZr2,dZi2},[pDst :128],outPointStep 277 278 VSUB dZr3,dYr2,dYi3 279 VST2 {dZr1,dZi1},[pDst :128],outPointStep 280 VADD dZi3,dYi2,dYr3 281 282 VADD qY0,qX0,qX2 @// u0 for next iteration 283 VST2 {dZr3,dZi3},[pDst :128],setStep 284 285 .endif 286 287 .endif 288 289 BGT grpZeroSetLoop\name 290 291 @// reset pSrc to pDst for the next stage 292 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize 293 MOV pDst,pPingPongBuf 294 295 296 .endm 297 298 299 300 M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4 301 FFTSTAGE "FALSE","FALSE",fwd 302 M_END 303 304 305 306 M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4 307 FFTSTAGE "FALSE","TRUE",inv 308 M_END 309 310 311 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 312 FFTSTAGE "TRUE","FALSE",fwdsfs 313 M_END 314 315 316 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 317 FFTSTAGE "TRUE","TRUE",invsfs 318 M_END 319 320 .end 321