14d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 24d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 34d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 44d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Use of this source code is governed by a BSD-style license 54d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// that can be found in the LICENSE file in the root of the source 64d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// tree. An additional intellectual property rights grant can be found 74d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// in the file PATENTS. All contributing project authors may 84d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// be found in the AUTHORS file in the root of the source tree. 94d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 104d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S 114d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// to support float instead of SC32. 124d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 134d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 144d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 154d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Description: 164d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT 174d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// stage for a N point complex signal. 184d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 194d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// 204d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 214d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 224d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Include standard headers 234d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 244d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#include "dl/api/arm/arm64COMM_s.h" 254d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#include "dl/api/arm/omxtypes_s.h" 264d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 274d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 284d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Import symbols required from other files 294d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// (For example tables) 304d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 314d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 324d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 334d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 344d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Set debugging level 354d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//DEBUG_ON SETL {TRUE} 364d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 374d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 384d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Guarding implementation by the processor name 394d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 404d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 414d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Input Registers 424d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 434d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pSrc x0 444d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pDst x1 454d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pTwiddle x2 464d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pSubFFTNum x3 474d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define pSubFFTSize x4 484d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 494d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 504d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Output Registers 514d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 524d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 534d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com//Local Scratch Registers 544d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 554d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 564d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define subFFTNum x5 574d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define subFFTSize x6 584d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define outPointStep x8 594d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define grpCount x9 604d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dstStep x10 614d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 624d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com// Neon Registers 634d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 644d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dWr v0.2s 654d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dWi v1.2s 664d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXr0 v2.2s 674d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXi0 v3.2s 684d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXr1 v4.2s 694d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dXi1 v5.2s 704d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYr0 v6.2s 714d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYi0 v7.2s 724d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYr1 v8.2s 734d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define dYi1 v9.2s 744d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define qT0 v10.2s 754d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com#define qT1 v12.2s 764d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 775d8507771824df2b96d9c6f2fd55a47fcfd9dec9rtoy@google.com .macro FFTSTAGE scaled, inverse, name 784d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 794d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // Move parameters into our work registers 804d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com ldr subFFTSize, [pSubFFTSize] 814d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 824d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com lsl outPointStep, subFFTSize, #3 834d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 844d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // Update grpCount and grpSize rightaway 854d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 864d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com MOV subFFTNum,#1 //after the last stage 874d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com LSL grpCount,subFFTSize,#1 884d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 894d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // update subFFTSize for the next stage 904d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com MOV subFFTSize,grpCount 914d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 924d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com rsb dstStep,outPointStep,#16 934d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 944d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // Loop on 2 grps at a time for the last stage 954d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 964d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.comradix2lsGrpLoop\name : 974d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dWr = [pTwiddle[0].Re, pTwiddle[1].Re] 984d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dWi = [pTwiddle[0].Im, pTwiddle[1].Im] 994d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com ld2 {dWr,dWi},[pTwiddle], #16 1004d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1014d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dXr0 = [pSrc[0].Re, pSrc[2].Re] 1024d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dXi0 = [pSrc[0].Im, pSrc[2].Im] 1034d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dXr1 = [pSrc[1].Re, pSrc[3].Re] 1044d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com // dXi1 = [pSrc[1].Im, pSrc[3].Im] 1054d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com ld4 {dXr0,dXi0,dXr1,dXi1}, [pSrc], #32 1064d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1074d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com SUBS grpCount,grpCount,#4 // grpCount is multiplied by 2 1084d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1094d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com .ifeqs "\inverse", "TRUE" 1104d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmul qT0,dWr,dXr1 1114d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmla qT0,dWi,dXi1 // real part 1124d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmul qT1,dWr,dXi1 1134d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmls qT1,dWi,dXr1 // imag part 1144d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1154d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com .else 1164d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1174d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmul qT0,dWr,dXr1 1184d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmls qT0,dWi,dXi1 // real part 1194d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmul qT1,dWr,dXi1 1204d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fmla qT1,dWi,dXr1 // imag part 1214d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1224d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com .endif 1234d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1244d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fsub dYr0,dXr0,qT0 1254d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fsub dYi0,dXi0,qT1 1264d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fadd dYr1,dXr0,qT0 1274d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com fadd dYi1,dXi0,qT1 1284d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1294d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com st2 {dYr0,dYi0},[pDst],outPointStep 1304d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com st2 {dYr1,dYi1},[pDst],dstStep // dstStep = step = -outPointStep + 16 1314d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1324d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com BGT radix2lsGrpLoop\name 1334d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1344d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1354d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com .endm 1364d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1374d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1384d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1394d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace,,d12 1404d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com FFTSTAGE "FALSE","FALSE",fwd 1414d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com M_END 1424d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1434d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1444d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1454d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace,,d12 1464d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com FFTSTAGE "FALSE","TRUE",inv 1474d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com M_END 1484d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com 1494d09f5abfba45163d1296272ac7d2ddc2cd5f9fbrtoy@google.com .end 150