1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S 11@// to support float instead of SC32. 12@// 13 14@// 15@// Description: 16@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT 17@// stage for a N point complex signal. 18@// 19@// 20 21 22@// Include standard headers 23 24#include "dl/api/arm/armCOMM_s.h" 25#include "dl/api/arm/omxtypes_s.h" 26 27 28@// Import symbols required from other files 29@// (For example tables) 30 31 32 33 34@// Set debugging level 35@//DEBUG_ON SETL {TRUE} 36 37 38@// Guarding implementation by the processor name 39 40 41@//Input Registers 42 43#define pSrc r0 44#define pDst r2 45#define pTwiddle r1 46#define subFFTNum r6 47#define subFFTSize r7 48 49 50@//Output Registers 51 52 53@//Local Scratch Registers 54 55 56#define outPointStep r3 57#define grpCount r4 58#define dstStep r5 59#define pTmp r4 60 61@// Neon Registers 62 63#define dWr d0.f32 64#define dWi d1.f32 65#define dXr0 d2.f32 66#define dXi0 d3.f32 67#define dXr1 d4.f32 68#define dXi1 d5.f32 69#define dYr0 d6.f32 70#define dYi0 d7.f32 71#define dYr1 d8.f32 72#define dYi1 d9.f32 73#define qT0 d10.f32 74#define qT1 d12.f32 75 76 .macro FFTSTAGE scaled, inverse, name 77 78 79 MOV outPointStep,subFFTSize,LSL #3 80 @// Update grpCount and grpSize rightaway 81 82 MOV subFFTNum,#1 @//after the last stage 83 LSL grpCount,subFFTSize,#1 84 85 @// update subFFTSize for the next stage 86 MOV subFFTSize,grpCount 87 88 RSB dstStep,outPointStep,#16 89 90 91 @// Loop on 2 grps at a time for the last stage 92 93radix2lsGrpLoop\name : 94 @ dWr = [pTwiddle[0].Re, pTwiddle[1].Re] 95 @ dWi = [pTwiddle[0].Im, pTwiddle[1].Im] 96 VLD2 {dWr,dWi},[pTwiddle :64]! 97 98 @ dXr0 = [pSrc[0].Re, pSrc[2].Re] 99 @ dXi0 = [pSrc[0].Im, pSrc[2].Im] 100 @ dXr1 = [pSrc[1].Re, pSrc[3].Re] 101 @ dXi1 = [pSrc[1].Im, pSrc[3].Im] 102 VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]! 103 SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2 104 105 .ifeqs "\inverse", "TRUE" 106 VMUL qT0,dWr,dXr1 107 VMLA qT0,dWi,dXi1 @// real part 108 VMUL qT1,dWr,dXi1 109 VMLS qT1,dWi,dXr1 @// imag part 110 111 .else 112 113 VMUL qT0,dWr,dXr1 114 VMLS qT0,dWi,dXi1 @// real part 115 VMUL qT1,dWr,dXi1 116 VMLA qT1,dWi,dXr1 @// imag part 117 118 .endif 119 120 VSUB dYr0,dXr0,qT0 121 VSUB dYi0,dXi0,qT1 122 VADD dYr1,dXr0,qT0 123 VADD dYi1,dXi0,qT1 124 125 VST2 {dYr0,dYi0},[pDst],outPointStep 126 VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16 127 128 BGT radix2lsGrpLoop\name 129 130 131 @// Reset and Swap pSrc and pDst for the next stage 132 MOV pTmp,pDst 133 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes 134 SUB pSrc,pTmp,outPointStep 135 136 @// Reset pTwiddle for the next stage 137 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes 138 139 .endm 140 141 142 143 M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,"" 144 FFTSTAGE "FALSE","FALSE",fwd 145 M_END 146 147 148 149 M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4 150 FFTSTAGE "FALSE","TRUE",inv 151 M_END 152 153 .end 154