188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Use of this source code is governed by a BSD-style license 588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// that can be found in the LICENSE file in the root of the source 688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// tree. An additional intellectual property rights grant can be found 788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// in the file PATENTS. All contributing project authors may 888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// be found in the AUTHORS file in the root of the source tree. 988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 1088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s 1188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// to support float instead of SC32. 1288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 1388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 1488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 1588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Description: 1688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Compute FFT for a real signal 1788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 1888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// 1988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 2088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 2188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Include standard headers 2288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 2388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/armCOMM_s.h" 2488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/omxtypes_s.h" 2588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 2688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// M_VARIANTS ARM1136JS 2788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 2888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Import symbols required from other files 2988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// (For example tables) 3088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 3188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 3288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 3388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 3488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 3588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 3688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Set debugging level 3788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//DEBUG_ON SETL {TRUE} 3888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 3988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 4088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 4188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Guarding implementation by the processor name 4288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 4388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// IF ARM1136JS 4488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 4588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Input Registers 4688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 4788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pSrc r0 4888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pDst r1 4988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pFFTSpec r2 5088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 5188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 5288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Output registers 5388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define result r0 5488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 5588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Local Scratch Registers 5688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 5788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// N=1 case 5888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define scaleMinusOne r2 5988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define rnd r2 6088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define zero r8 6188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define Zero r9 6288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 6388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 6488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argTwiddle r1 6588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argDst r2 6688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argScale r4 6788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pTwiddle r4 6888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pOut r5 6988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTSize r7 7088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTNum r6 7188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define N r6 7288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define order r14 7388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diff r9 7488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define count r8 7588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diffMinusOne r10 7688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define round r3 7788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 7888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define step r3 7988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define step1 r6 8088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define twStep r12 8188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pTwiddleTmp r14 8288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t0 r12 8388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t1 r14 /*@// pTwiddleTmp*/ 8488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t2 r0 8588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t3 r1 /*@// pSrc,argTwiddle*/ 8688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t4 r6 8788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define t5 r7 /*@// step1,subFFTSize*/ 8888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 8988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0r s0 9088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0i s1 9188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y0r s2 9288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y0i s3 9388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x1r s4 9488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x1i s5 9588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w1r s2 9688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w1i s3 9788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w0r s6 9888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define w0i s7 9988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y1r s2 /*@// w1r,w1i*/ 10088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define y1i s3 10188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st0 s8 10288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st1 s9 10388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st2 s10 10488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st3 s11 10588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st4 s12 10688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define st5 s13 10788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define half s15 10888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 10988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Allocate stack memory required by the function 11388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Write function header 11788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com M_START omxSP_FFTFwd_RToCCS_F32_Sfs_vfp,r11 11888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 11988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@ Structure offsets for FFTSpec 12088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .set ARMsFFTSpec_N, 0 12188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .set ARMsFFTSpec_pBitRev, 4 12288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .set ARMsFFTSpec_pTwiddle, 8 12388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .set ARMsFFTSpec_pBuf, 12 12488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 12588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Define stack arguments 12688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 12788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Setup half value 12888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com movw N, #0 @// Use N as a temp. 12988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com movt N, #0x3f00 13088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmov.f32 half, N 13188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 13288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Read the size from structure and take log 13388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com LDR N, [pFFTSpec, #ARMsFFTSpec_N] 13488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 13588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Read other structure parameters 13688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 13788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 13888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 13988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// N=1 Treat seperately 14088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com CMP N,#1 14188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BGT sizeGreaterThanOne 14288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com // N<=1 is not supported 14388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Set return value 14488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV result, #OMX_Sts_NoErr 14588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com B FunctionEnd 14688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 14788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comsizeGreaterThanOne: 14888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Do a N/2 point complex FFT including the scaling 14988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 15088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV N,N,ASR #1 @// N/2 point complex FFT 15188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com CLZ order,N @// N = 2^order 15288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com RSB order,order,#31 15388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV subFFTSize,#1 15488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @//MOV subFFTNum,N 15588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 15688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 15788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com CMP order,#1 15888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BGT orderGreaterthan1 @// order > 1 15988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldmlt.f32 pSrc, {x0r, x0i} 16088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstmlt.f32 pOut, {x0r, x0i} 16188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOVLT pSrc,pOut 16288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOVLT argDst,pDst 16388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BLT FFTEnd 16488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 16588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV argDst,pOut @// Set input args to fft stages 16688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV pOut,pDst @// Set input args to fft stages 16788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV argTwiddle,pTwiddle 16888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 16988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 17088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com B finalComplexToRealFixup 17188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 17288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comorderGreaterthan1: 17388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 17488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com TST order, #2 @// Set input args to fft stages 17588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOVEQ argDst,pDst 17688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOVNE argDst,pOut 17788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOVNE pOut,pDst @// Pass the first stage dest in RN5 17888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV argTwiddle,pTwiddle 17988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 18088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @//check for even or odd order 18188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 18288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// NOTE: The following combination of BL's would work fine 18388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// eventhough the first BL would corrupt the flags. This is 18488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// because the end of the "grpZeroSetLoop" loop inside 18588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets 18688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// the Z flag to EQ 18788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 18888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com TST order,#0x00000001 18988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 19088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 19188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 19288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comunscaledRadix4Loop: 19388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com CMP subFFTNum,#1 19488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BEQ FFTEnd 19588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 19688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com B unscaledRadix4Loop 19788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 19888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comFFTEnd: 19988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comfinalComplexToRealFixup: 20088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 20188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// step = N/2 * 8 bytes 20288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV step,subFFTSize,LSL #3 20388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// twStep = 3N/8 * 8 bytes pointing to W^1 20488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUB twStep,step,subFFTSize,LSL #1 20588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// step1 = N/4 * 8 = N/2*4 bytes 20688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV step1,subFFTSize,LSL #2 20788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// (N/4-1)*8 bytes 20888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUB step1,step1,#8 20988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 21088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// F(0) = 1/2 [Z(0) + Z'(0)] - j [Z(0) - Z'(0)] 21188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2 [(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] 21288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2 [2a+j0] - j [0+j2b] 21388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// (a+b, 0) 21488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 21588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// F(N/2) =1/2 [Z(0) + Z'(0)] + j [Z(0) - Z'(0)] 21688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2 [(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] 21788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2 [2a+j0] + j [0+j2b] 21888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// (a-b, 0) 21988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 22088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// F(0) and F(N/2) 22188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 pSrc!, {x0r, x0i} 22288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vadd.f32 y0r,x0r,x0i @// F(0) = (2(Z0.r+Z0.i) , 0) 22388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 x0r,x0r,x0i @// F(N/2) = (2(Z0.r-Z0.i) , 0) 22488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 y0i, y0i @ y0i and x0i set to 0.0 22588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 x0i, x0i 22688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 22788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com add argDst, step 22888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstm.f32 argDst, {x0r, x0i} @// {x0r,x0i}->[argDst, step] 22988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com sub argDst, step 23088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstm.f32 argDst!, {y0r, y0i} 23188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 23288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUBS subFFTSize,subFFTSize,#2 23388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 23488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com ADD pTwiddleTmp,argTwiddle,#8 @// W^2 23588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com ADD argTwiddle,argTwiddle,twStep @// W^1 23688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BLT End 23788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BEQ lastElement 23888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 23988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 24088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// F(k) = 1/2 [Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)] 24188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Process 2 elements at a time. E.g: F(1) and F(N/2-1) since 24288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// both of them require Z(1) and Z(N/2-1) 24388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 24488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com ASR subFFTSize,subFFTSize,#1 24588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comevenOddButterflyLoop: 24688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 24788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUB step,step,#16 @// (N/2-2)*8 bytes 24888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 24988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com add pSrc, step 25088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 pSrc, {x1r, x1i} @// {x1r, x1i} = [pSrc, step] 25188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com sub pSrc, step 25288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 pSrc!, {x0r, x0i} 25388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com add argTwiddle, step1 25488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 argTwiddle, {w1r, w1i} @// {w1r, w1i} = [argTwiddle, step1] 25588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com sub argTwiddle, step1 25688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 argTwiddle!, {w0r, w0i} @// {w0r, w0i} = [argTwiddle], #8 25788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 25888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUB step1,step1,#8 25988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com SUBS subFFTSize,subFFTSize,#1 26088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 26188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 st2,x0r,x1r @// a-c 26288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vadd.f32 st3,x0i,x1i @// b+d 26388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vadd.f32 st0,x0r,x1r @// a+c 26488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 st1,x0i,x1i @// b-d 26588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 26688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 x1r,w1r,st2 26788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 x1i,w1r,st3 26888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmla.f32 x1r,w1i,st3 @// x1r = w1r*st2 + w1i*st3 26988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @//RSB x1r,x1r,#0 27088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmls.f32 x1i,w1i,st2 @// x1i = w1r*st3 - wli*st2 27188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 27288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 y1r, st0, x1i 27388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vadd.f32 y1i, x1r, st1 27488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vneg.f32 y1i, y1i 27588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 27688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 x0r,w0r,st2 27788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 x0i,w0r,st3 27888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmls.f32 x0r,w0i,st3 @// x0r = w0r*st2 - w0i*st3 27988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmla.f32 x0i,w0i,st2 @// x0i = w0r*st3 + x0i*st1 28088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 28188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vsub.f32 st4,st0,x0i @// F(1) 28288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vadd.f32 st5,x0r,st1 28388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 28488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 28588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 y1r, half 28688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 y1i, half 28788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 st4, half 28888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vmul.f32 st5, half 28988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 29088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com add argDst, step 29188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstm.f32 argDst, {y1r, y1i} @// {y1r,y1i} -> [argDst,step] 29288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com sub argDst, step 29388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstm.f32 argDst!, {st4, st5} 29488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 29588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 29688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV t0,argTwiddle @// swap ptr for even and odd twiddles 29788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV argTwiddle,pTwiddleTmp 29888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV pTwiddleTmp,t0 29988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 30088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com BGT evenOddButterflyLoop 30188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 30288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Last element can be expanded as follows 30388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)] 30488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)] 30588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// 1/2[2a+j0] + j (c+jd) [0+j2b] 30688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// (a-bc, -bd) 30788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 30888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comlastElement: 30988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vldm.f32 pSrc, {x0r, x0i} 31088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vneg.f32 x0i, x0i 31188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com vstm.f32 argDst, {x0r, x0i} 31288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 31388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comEnd: 31488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Set return value 31588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com MOV result, #OMX_Sts_NoErr 31688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 31788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comFunctionEnd: 31888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Write function tail 31988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com M_END 32088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// ENDIF @//ARM1136JS 32288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com @// Guarding implementation by the processor name 32588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com 32888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com .end 329