1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12 13@// 14@// 15@// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 7761 18@// Last Modified Date: Wed, 26 Sep 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a first stage Radix 4 FFT stage for a N point complex signal 26@// 27@// 28 29 30@// Include standard headers 31 32#include "dl/api/arm/armCOMM_s.h" 33#include "dl/api/arm/omxtypes_s.h" 34 35@// Import symbols required from other files 36@// (For example tables) 37 38 39 40 41@// Set debugging level 42@//DEBUG_ON SETL {TRUE} 43 44 45 46@// Guarding implementation by the processor name 47 48 49 50@// Guarding implementation by the processor name 51 52 53@//Input Registers 54 55#define pSrc r0 56#define pDst r2 57#define pTwiddle r1 58#define pPingPongBuf r5 59#define subFFTNum r6 60#define subFFTSize r7 61 62 63@//Output Registers 64 65 66@//Local Scratch Registers 67 68#define grpSize r3 69@// Reuse grpSize as setCount 70#define setCount r3 71#define pointStep r4 72#define outPointStep r4 73#define setStep r8 74#define step1 r9 75#define step3 r10 76 77@// Neon Registers 78 79#define dXr0 D0.S16 80#define dXi0 D1.S16 81#define dXr1 D2.S16 82#define dXi1 D3.S16 83#define dXr2 D4.S16 84#define dXi2 D5.S16 85#define dXr3 D6.S16 86#define dXi3 D7.S16 87#define dYr0 D8.S16 88#define dYi0 D9.S16 89#define dYr1 D10.S16 90#define dYi1 D11.S16 91#define dYr2 D12.S16 92#define dYi2 D13.S16 93#define dYr3 D14.S16 94#define dYi3 D15.S16 95#define dZr0 D16.S16 96#define dZi0 D17.S16 97#define dZr1 D18.S16 98#define dZi1 D19.S16 99#define dZr2 D20.S16 100#define dZi2 D21.S16 101#define dZr3 D22.S16 102#define dZi3 D23.S16 103#define qY0 Q4.S16 104#define qY2 Q6.S16 105#define qX0 Q0.S16 106#define qX2 Q2.S16 107 108#define qY1 Q5.S16 109#define qY3 Q7.S16 110#define qX1 Q1.S16 111#define qX3 Q3.S16 112#define qZ0 Q8.S16 113#define qZ1 Q9.S16 114 115 116 .macro FFTSTAGE scaled, inverse, name 117 118 @// Define stack arguments 119 120 MOV pointStep,subFFTNum 121 @// Update pSubFFTSize and pSubFFTNum regs 122 123 124 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] 125 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) 126 LSR grpSize,subFFTNum,#2 127 MOV subFFTNum,grpSize 128 129 130 @// pT0+1 increments pT0 by 4 bytes 131 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes 132 @// Note: outPointStep = pointStep for firststage 133 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 134 135 136 @// Calculate the step of input data for the next set 137 @//MOV setStep,pointStep,LSL #1 138 MOV setStep,grpSize,LSL #3 139 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 140 MOV step1,setStep 141 ADD setStep,setStep,pointStep @// setStep = 3*pointStep 142 RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16 143 144 145 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage 146 147 148 .ifeqs "\scaled", "TRUE" 149 VHADD qY0,qX0,qX2 @// u0 150 .else 151 VADD qY0,qX0,qX2 @// u0 152 .endif 153 RSB step3,pointStep,#0 154 155 @// grp = 0 a special case since all the twiddle factors are 1 156 @// Loop on the sets: 4 sets at a time 157 158grpZeroSetLoop\name: 159 160 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] 161 162 .ifeqs "\scaled", "TRUE" 163 164 @// finish first stage of 4 point FFT 165 166 VHSUB qY2,qX0,qX2 @// u1 167 SUBS setCount,setCount,#4 @// decrement the set loop counter 168 169 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 170 VHADD qY1,qX1,qX3 @// u2 171 VLD2 {dXr2,dXi2},[pSrc :128],step3 172 VHSUB qY3,qX1,qX3 @// u3 173 174 175 176 @// finish second stage of 4 point FFT 177 178 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 179 VHADD qZ0,qY0,qY1 @// y0 180 181 .ifeqs "\inverse", "TRUE" 182 183 VHSUB dZr3,dYr2,dYi3 @// y3 184 VHADD dZi3,dYi2,dYr3 185 VST2 {dZr0,dZi0},[pDst :128],outPointStep 186 187 VHSUB qZ1,qY0,qY1 @// y2 188 VST2 {dZr3,dZi3},[pDst :128],outPointStep 189 190 VHADD dZr2,dYr2,dYi3 @// y1 191 VST2 {dZr1,dZi1},[pDst :128],outPointStep 192 VHSUB dZi2,dYi2,dYr3 193 194 VHADD qY0,qX0,qX2 @// u0 (next loop) 195 VST2 {dZr2,dZi2},[pDst :128],setStep 196 197 198 .else 199 200 VHADD dZr2,dYr2,dYi3 @// y1 201 VHSUB dZi2,dYi2,dYr3 202 203 VST2 {dZr0,dZi0},[pDst :128],outPointStep 204 VHSUB qZ1,qY0,qY1 @// y2 205 206 VST2 {dZr2,dZi2},[pDst :128],outPointStep 207 VHSUB dZr3,dYr2,dYi3 @// y3 208 VHADD dZi3,dYi2,dYr3 209 VST2 {dZr1,dZi1},[pDst :128],outPointStep 210 VHADD qY0,qX0,qX2 @// u0 (next loop) 211 VST2 {dZr3,dZi3},[pDst :128],setStep 212 213 .endif 214 215 216 .else 217 218 @// finish first stage of 4 point FFT 219 220 VSUB qY2,qX0,qX2 @// u1 221 SUBS setCount,setCount,#4 @// decrement the set loop counter 222 223 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 224 VADD qY1,qX1,qX3 @// u2 225 VLD2 {dXr2,dXi2},[pSrc :128],step3 226 VSUB qY3,qX1,qX3 @// u3 227 228 229 230 @// finish second stage of 4 point FFT 231 232 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 233 VADD qZ0,qY0,qY1 @// y0 234 235 .ifeqs "\inverse", "TRUE" 236 237 VSUB dZr3,dYr2,dYi3 @// y3 238 VADD dZi3,dYi2,dYr3 239 VST2 {dZr0,dZi0},[pDst :128],outPointStep 240 241 VSUB qZ1,qY0,qY1 @// y2 242 VST2 {dZr3,dZi3},[pDst :128],outPointStep 243 244 VADD dZr2,dYr2,dYi3 @// y1 245 VST2 {dZr1,dZi1},[pDst :128],outPointStep 246 VSUB dZi2,dYi2,dYr3 247 248 VADD qY0,qX0,qX2 @// u0 (next loop) 249 VST2 {dZr2,dZi2},[pDst :128],setStep 250 251 252 .else 253 254 VADD dZr2,dYr2,dYi3 @// y1 255 VSUB dZi2,dYi2,dYr3 256 257 VST2 {dZr0,dZi0},[pDst :128],outPointStep 258 VSUB qZ1,qY0,qY1 @// y2 259 260 VST2 {dZr2,dZi2},[pDst :128],outPointStep 261 VSUB dZr3,dYr2,dYi3 @// y3 262 VADD dZi3,dYi2,dYr3 263 VST2 {dZr1,dZi1},[pDst :128],outPointStep 264 VADD qY0,qX0,qX2 @// u0 (next loop) 265 VST2 {dZr3,dZi3},[pDst :128],setStep 266 267 .endif 268 269 270 .endif 271 272 BGT grpZeroSetLoop\name 273 274 275 @// reset pSrc to pDst for the next stage 276 SUB pSrc,pDst,pointStep @// pDst -= grpSize 277 MOV pDst,pPingPongBuf 278 279 280 .endm 281 282 283 284 M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4 285 FFTSTAGE "FALSE","FALSE",FWD 286 M_END 287 288 289 290 M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4 291 FFTSTAGE "FALSE","TRUE",INV 292 M_END 293 294 295 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 296 FFTSTAGE "TRUE","FALSE",FWDSFS 297 M_END 298 299 300 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 301 FFTSTAGE "TRUE","TRUE",INVSFS 302 M_END 303 304 305 306 307 308 .end 309