1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12@// 13 14@// 15@// File Name: armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 7493 18@// Last Modified Date: Mon, 24 Sep 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT 26@// stage for a N point complex signal. 27@// 28 29 30 31@// Include standard headers 32 33#include "dl/api/arm/armCOMM_s.h" 34#include "dl/api/arm/omxtypes_s.h" 35 36 37@// Import symbols required from other files 38@// (For example tables) 39 40 41 42 43@// Set debugging level 44@//DEBUG_ON SETL {TRUE} 45 46 47@// Guarding implementation by the processor name 48 49 50@//Input Registers 51 52#define pSrc r0 53#define pDst r2 54#define pTwiddle r1 55#define subFFTNum r6 56#define subFFTSize r7 57 58 59@//Output Registers 60 61 62@//Local Scratch Registers 63 64 65#define outPointStep r3 66#define grpCount r4 67#define dstStep r5 68#define pTmp r4 69 70@// Neon Registers 71 72#define dWr D0.S32 73#define dWi d1.s32 74#define dXr0 d2.s32 75#define dXi0 d3.s32 76#define dXr1 d4.s32 77#define dXi1 d5.s32 78#define dYr0 d6.s32 79#define dYi0 d7.s32 80#define dYr1 d8.s32 81#define dYi1 d9.s32 82#define qT0 q5.s64 83#define qT1 q6.s64 84 85 .macro FFTSTAGE scaled, inverse, name 86 87 88 MOV outPointStep,subFFTSize,LSL #3 89 @// Update grpCount and grpSize rightaway 90 91 MOV subFFTNum,#1 @//after the last stage 92 LSL grpCount,subFFTSize,#1 93 94 @// update subFFTSize for the next stage 95 MOV subFFTSize,grpCount 96 97 RSB dstStep,outPointStep,#16 98 99 100 @// Loop on 2 grps at a time for the last stage 101 102grpLoop\name : 103 VLD2 {dWr,dWi},[pTwiddle :64]! 104 105 VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]! 106 SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2 107 108 .ifeqs "\inverse", "TRUE" 109 VMULL qT0,dWr,dXr1 110 VMLAL qT0,dWi,dXi1 @// real part 111 VMULL qT1,dWr,dXi1 112 VMLSL qT1,dWi,dXr1 @// imag part 113 114 .else 115 116 VMULL qT0,dWr,dXr1 117 VMLSL qT0,dWi,dXi1 @// real part 118 VMULL qT1,dWr,dXi1 119 VMLAL qT1,dWi,dXr1 @// imag part 120 121 .endif 122 123 VRSHRN dXr1,qT0,#31 124 VRSHRN dXi1,qT1,#31 125 126 127 .ifeqs "\scaled", "TRUE" 128 129 VHSUB dYr0,dXr0,dXr1 130 VHSUB dYi0,dXi0,dXi1 131 VHADD dYr1,dXr0,dXr1 132 VHADD dYi1,dXi0,dXi1 133 134 .else 135 136 VSUB dYr0,dXr0,dXr1 137 VSUB dYi0,dXi0,dXi1 138 VADD dYr1,dXr0,dXr1 139 VADD dYi1,dXi0,dXi1 140 141 142 .endif 143 144 VST2 {dYr0,dYi0},[pDst],outPointStep 145 VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16 146 147 bgt grpLoop\name 148 149 150 @// Reset and Swap pSrc and pDst for the next stage 151 MOV pTmp,pDst 152 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes 153 SUB pSrc,pTmp,outPointStep 154 155 @// Reset pTwiddle for the next stage 156 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes 157 158 .endm 159 160 161 162 M_START armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4,"" 163 FFTSTAGE "FALSE","FALSE",fwd 164 M_END 165 166 167 168 M_START armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4 169 FFTSTAGE "FALSE","TRUE",inv 170 M_END 171 172 173 174 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4 175 FFTSTAGE "TRUE","FALSE",fwdsfs 176 M_END 177 178 179 180 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4 181 FFTSTAGE "TRUE","TRUE",invsfs 182 M_END 183 184 .end 185