188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  Use of this source code is governed by a BSD-style license
588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  that can be found in the LICENSE file in the root of the source
688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  tree. An additional intellectual property rights grant can be found
788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  in the file PATENTS.  All contributing project authors may
888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  be found in the AUTHORS file in the root of the source tree.
988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  This is a modification of omxSP_FFTInv_CToC_SC32_Sfs_s.s
1188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//  to support float instead of SC32.
1288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
1488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Description:
1688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Compute an inverse FFT for a complex signal
1788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//
1988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Include standard headers
2288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/armCOMM_s.h"
2488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#include "dl/api/arm/omxtypes_s.h"
2588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//        M_VARIANTS ARM1136JS
2788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
2888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Import symbols required from other files
2988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// (For example tables)
3088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
3188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTInv_CToC_FC32_Sfs_Radix2_fs_OutOfPlace_unsafe_vfp
3288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
3388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
3488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
3588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .extern  armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
3688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
3788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Set debugging level
3888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//DEBUG_ON    SETL {TRUE}
3988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Guarding implementation by the processor name
4388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//    IF  ARM1136JS
4588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Input Registers
4788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
4888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pSrc            r0
4988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pDst            r1
5088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pFFTSpec        r2
5188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@// Output registers
5488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define result          r0
5588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//Local Scratch Registers
5788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
5888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argTwiddle      r1
5988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argDst          r2
6088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define argScale        r4
6188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pTwiddle        r4
6288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define pOut            r5
6388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTSize      r7
6488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define subFFTNum       r6
6588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define N               r6
6688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define order           r14
6788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diff            r9
6888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define count           r8
6988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define diffMinusOne    r2
7088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define round           r3
7188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
7288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0r     s0
7388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define x0i     s1
7488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define fone    s2
7588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com#define fscale  s3
7688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
7788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
7888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    @// Allocate stack memory required by the function
7988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
8088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    @// Write function header
8188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        M_START     omxSP_FFTInv_CToC_FC32_Sfs_vfp,r11
8288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
8388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@ Structure offsets for FFTSpec
8488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_N, 0
8588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pBitRev, 4
8688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pTwiddle, 8
8788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        .set    ARMsFFTSpec_pBuf, 12
8888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
8988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Define stack arguments
9088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
9188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Read the size from structure and take log
9288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
9388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
9488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Read other structure parameters
9588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
9688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
9788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
9888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CLZ     order,N                 @// N = 2^order
9988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        RSB     order,order,#31
10088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     subFFTSize,#1
10188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//MOV     subFFTNum,N
10288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
10388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CMP     order,#1
10488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BGT     orderGreaterthan1       @// order > 1
10588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Order = 0 or 1
10688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldmlt.f32 pSrc, {x0r, x0i}
10788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstmlt.f32 pDst, {x0r, x0i}
10888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
10988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVLT   pSrc,pDst
11088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLT     FFTEnd
11188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Handle order = 1
11388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argDst,pDst
11488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argTwiddle,pTwiddle
11588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BL      armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
11788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        B       FFTEnd
11888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
11988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comorderGreaterthan1:
12088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
12188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        TST     order, #2               @// Set input args to fft stages
12288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVNE   argDst,pDst
12388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVEQ   argDst,pOut
12488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOVEQ   pOut,pDst               @// Pass the first stage dest in RN5
12588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     argTwiddle,pTwiddle
12688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
12788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
12888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @//check for even or odd order
12988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// NOTE: The following combination of BL's would work fine
13088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// eventhough the first BL would corrupt the flags. This is
13188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// because the end of the "grpZeroSetLoop" loop inside
13288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
13388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// the Z flag to EQ
13488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
13588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        TST     order,#0x00000001
13688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLEQ    armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
13788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        BLNE    armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
13888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
13988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comunscaledRadix4Loop:
14088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        CMP        subFFTNum,#1
14188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         BEQ        FFTEnd
14288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         BL        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
14388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com         B        unscaledRadix4Loop
14488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
14588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
14688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comFFTEnd:
14788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
14888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldm.f32 pSrc, {x0r, x0i}
14988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
15088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmov.f32     fscale, subFFTSize
15188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vcvt.f32.s32 fscale, fscale             @// fscale = N as a float
15288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        movw         round, #0
15388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        movt         round, #0x3f80             @// round = 1.0
15488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmov.f32     fone, round
15588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vdiv.f32     fscale, fone, fscale       @// fscale = 1/N
15688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.comscaleFFTData:                                   @// N = subFFTSize
15788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        SUBS    subFFTSize,subFFTSize,#1
15888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 x0r, x0r, fscale
15988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vmul.f32 x0i, x0i, fscale
16088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vstm.f32 pSrc, {x0r, x0i}
16188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        add      pSrc, #8
16288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        vldmgt.f32 pSrc, {x0r, x0i}
16388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
16488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        bgt     scaleFFTData
16588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
16688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
16788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Set return value
16888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        MOV     result, #OMX_Sts_NoErr
16988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        @// Write function tail
17188b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com        M_END
17288b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17388b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com@//    ENDIF                                           @//ARM1136JS
17488b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17588b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17688b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com      @// Guarding implementation by the processor name
17788b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17888b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
17988b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com
18088b02237dfdb25a9b602a7b6dd49665c96e8e890rtoy@google.com    .end
181