1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12 13@// 14@// 15@// File Name: armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 6741 18@// Last Modified Date: Wed, 18 Jul 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a Radix 2 FFT stage for a N point complex signal 26@// 27@// 28 29 30@// Include standard headers 31 32#include "dl/api/arm/armCOMM_s.h" 33#include "dl/api/arm/omxtypes_s.h" 34 35 36@// Import symbols required from other files 37@// (For example tables) 38 39 40 41 42@// Set debugging level 43@//DEBUG_ON SETL {TRUE} 44 45 46@// Guarding implementation by the processor name 47 48 49 50 51 52 53 54@// Guarding implementation by the processor name 55 56 57@//Input Registers 58 59#define pSrc r0 60#define pDst r2 61#define pTwiddle r1 62#define subFFTNum r6 63#define subFFTSize r7 64 65 66@//Output Registers 67 68 69@//Local Scratch Registers 70 71 72#define outPointStep r3 73#define grpCount r4 74#define dstStep r5 75#define pTmp r4 76#define step r8 77 78@// Neon Registers 79 80#define dWr D0.S16 81#define dWi D1.S16 82#define dXr0 D2.S16 83#define dXi0 D3.S16 84#define dXr1 D4.S16 85#define dXi1 D5.S16 86#define dYr0 D6.S16 87#define dYi0 D7.S16 88#define dYr1 D8.S16 89#define dYi1 D9.S16 90#define qT0 Q5.S32 91#define qT1 Q6.S32 92 93 94 .macro FFTSTAGE scaled, inverse, name 95 96 97 MOV outPointStep,subFFTSize,LSL #2 98 @// Update grpCount and grpSize rightaway 99 100 MOV subFFTNum,#1 @//after the last stage 101 LSL grpCount,subFFTSize,#1 102 103 @// update subFFTSize for the next stage 104 MOV subFFTSize,grpCount 105 106 SUB step,outPointStep,#4 @// step = -4+outPointStep 107 RSB dstStep,step,#0 @// dstStep = -4-outPointStep+8 = -step 108 @//RSB dstStep,outPointStep,#16 109 110 111 @// Loop on 2 grps at a time for the last stage 112 113grpLoop\name: 114 VLD2 {dWr[0],dWi[0]},[pTwiddle]! @// grp 0 115 VLD2 {dWr[1],dWi[1]},[pTwiddle]! @// grp 1 116 117 @//VLD2 {dWr,dWi},[pTwiddle],#16 118 119 VLD4 {dXr0[0],dXi0[0],dXr1[0],dXi1[0]},[pSrc]! @// grp 0 120 VLD4 {dXr0[1],dXi0[1],dXr1[1],dXi1[1]},[pSrc]! @// grp 1 121 122 123 @//VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc],#32 124 SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2 125 126 .ifeqs "\inverse", "TRUE" 127 VMULL qT0,dXr1,dWr 128 VMLAL qT0,dXi1,dWi @// real part 129 VMULL qT1,dXi1,dWr 130 VMLSL qT1,dXr1,dWi @// imag part 131 132 .else 133 VMULL qT0,dXr1,dWr 134 VMLSL qT0,dXi1,dWi @// real part 135 VMULL qT1,dXi1,dWr 136 VMLAL qT1,dXr1,dWi @// imag part 137 138 .endif 139 140 VRSHRN dXr1,qT0,#15 141 VRSHRN dXi1,qT1,#15 142 143 144 .ifeqs "\scaled", "TRUE" 145 146 VHSUB dYr0,dXr0,dXr1 147 VHSUB dYi0,dXi0,dXi1 148 VHADD dYr1,dXr0,dXr1 149 VHADD dYi1,dXi0,dXi1 150 151 .else 152 153 VSUB dYr0,dXr0,dXr1 154 VSUB dYi0,dXi0,dXi1 155 VADD dYr1,dXr0,dXr1 156 VADD dYi1,dXi0,dXi1 157 158 159 .endif 160 161 VST2 {dYr0[0],dYi0[0]},[pDst]! 162 VST2 {dYr0[1],dYi0[1]},[pDst],step @// step = -4+outPointStep 163 164 VST2 {dYr1[0],dYi1[0]},[pDst]! 165 VST2 {dYr1[1],dYi1[1]},[pDst],dstStep @// dstStep = -4-outPointStep+8 = -step 166 167 @//VST2 {dYr0,dYi0},[pDst],outPointStep 168 @//VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16 169 170 BGT grpLoop\name 171 172 173 @// Reset and Swap pSrc and pDst for the next stage 174 MOV pTmp,pDst 175 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes 176 SUB pSrc,pTmp,outPointStep 177 178 @// Reset pTwiddle for the next stage 179 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes 180 181 .endm 182 183 184 185 M_START armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4 186 FFTSTAGE "FALSE","FALSE",FWD 187 M_END 188 189 190 191 M_START armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4 192 FFTSTAGE "FALSE","TRUE",INV 193 M_END 194 195 196 197 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4 198 FFTSTAGE "TRUE","FALSE",FWDSFS 199 M_END 200 201 202 203 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4 204 FFTSTAGE "TRUE","TRUE",INVSFS 205 M_END 206 207 208 209 210 .end 211