199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Use of this source code is governed by a BSD-style license 599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// that can be found in the LICENSE file in the root of the source 699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// tree. An additional intellectual property rights grant can be found 799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// in the file PATENTS. All contributing project authors may 899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// be found in the AUTHORS file in the root of the source tree. 999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 1099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S 1199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// to support float instead of SC32. 1299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 1399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 1499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 1599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Description: 1699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT 1799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// stage for a N point complex signal. 1899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 1999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// 2099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 2199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 2299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Include standard headers 2399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 24bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/armCOMM_s.h" 25bdf981cb383b7ec472ee86d2fedb53937285f894rtoy@google.com#include "dl/api/arm/omxtypes_s.h" 2699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 2799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 2899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Import symbols required from other files 2999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// (For example tables) 3099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Set debugging level 3599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//DEBUG_ON SETL {TRUE} 3699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 3899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Guarding implementation by the processor name 3999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 4099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 4199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Input Registers 4299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 4399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pSrc r0 4499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pDst r2 4599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTwiddle r1 4699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTNum r6 4799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define subFFTSize r7 4899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 4999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 5099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Output Registers 5199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 5299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 5399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@//Local Scratch Registers 5499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 5599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 5699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define outPointStep r3 5799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define grpCount r4 5899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dstStep r5 5999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define pTmp r4 6099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 6199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com@// Neon Registers 6299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 6399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dWr d0.f32 6499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dWi d1.f32 6599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr0 d2.f32 6699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi0 d3.f32 6799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXr1 d4.f32 6899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dXi1 d5.f32 6999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr0 d6.f32 7099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi0 d7.f32 7199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYr1 d8.f32 7299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define dYi1 d9.f32 7399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT0 d10.f32 7499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com#define qT1 d12.f32 7599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 765d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com .macro FFTSTAGE scaled, inverse, name 7799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 7899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 7999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com MOV outPointStep,subFFTSize,LSL #3 8099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @// Update grpCount and grpSize rightaway 8199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 8299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com MOV subFFTNum,#1 @//after the last stage 8399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com LSL grpCount,subFFTSize,#1 8499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 8599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @// update subFFTSize for the next stage 8699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com MOV subFFTSize,grpCount 8799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 8899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com RSB dstStep,outPointStep,#16 8999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 9099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 9199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @// Loop on 2 grps at a time for the last stage 9299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 9399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.comradix2lsGrpLoop\name : 9499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dWr = [pTwiddle[0].Re, pTwiddle[1].Re] 9599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dWi = [pTwiddle[0].Im, pTwiddle[1].Im] 9699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VLD2 {dWr,dWi},[pTwiddle :64]! 9799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 9899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dXr0 = [pSrc[0].Re, pSrc[2].Re] 9999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dXi0 = [pSrc[0].Im, pSrc[2].Im] 10099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dXr1 = [pSrc[1].Re, pSrc[3].Re] 10199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @ dXi1 = [pSrc[1].Im, pSrc[3].Im] 10299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]! 10399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2 10499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 10599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com .ifeqs "\inverse", "TRUE" 10699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMUL qT0,dWr,dXr1 10799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMLA qT0,dWi,dXi1 @// real part 10899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMUL qT1,dWr,dXi1 10999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMLS qT1,dWi,dXr1 @// imag part 11099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 11199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com .else 11299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 11399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMUL qT0,dWr,dXr1 11499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMLS qT0,dWi,dXi1 @// real part 11599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMUL qT1,dWr,dXi1 11699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VMLA qT1,dWi,dXr1 @// imag part 11799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 11899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com .endif 11999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 12099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VSUB dYr0,dXr0,qT0 12199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VSUB dYi0,dXi0,qT1 12299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VADD dYr1,dXr0,qT0 12399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VADD dYi1,dXi0,qT1 12499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 12599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VST2 {dYr0,dYi0},[pDst],outPointStep 12699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16 12799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 12899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com BGT radix2lsGrpLoop\name 12999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 13099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 13199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @// Reset and Swap pSrc and pDst for the next stage 13299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com MOV pTmp,pDst 13399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes 13499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com SUB pSrc,pTmp,outPointStep 13599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 13699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com @// Reset pTwiddle for the next stage 13799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes 13899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 13999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com .endm 14099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,"" 14499b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com FFTSTAGE "FALSE","FALSE",fwd 14599b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com M_END 14699b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14799b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14899b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 14999b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4 15099b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com FFTSTAGE "FALSE","TRUE",inv 15199b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com M_END 15299b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com 15399b31ea5d0f4629ceddf4852f96757c9c73654artoy@google.com .end 154