10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_InterpolateLuma_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function:
140c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     omxVCM4P10_InterpolateLuma
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Performs quarter pel interpolation of inter luma MB.
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It's assumed that the frame is already padded when calling this function.
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Parameters:
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    pSrc        Pointer to the source reference frame buffer
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    srcStep     Reference frame step in byte
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dx          Fractional part of horizontal motion vector
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dy          Fractional part of vertical motion vector
260c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         be equal to either 4, 8, or 16.
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [out]   pDst        Pointer to the destination frame buffer.
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==4,  4-byte alignment required
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==8,  8-byte alignment required
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==16, 16-byte alignment required
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Return Value:
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If the function runs without error, it returns OMX_Sts_NoErr.
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It is assued that following cases are satisfied before calling this function:
370c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  pSrc or pDst is not NULL.
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  srcStep or dstStep >= roi.width.
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     dx or dy is in the range [0-3].
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     roi.width or roi.height is not out of range {4, 8, 16}.
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 4, Dst is 4 byte aligned.
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 8, pDst is 8 byte aligned.
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 16, pDst is 16 byte aligned.
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     srcStep and dstStep is multiple of 8.
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
460c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
470c1bc742181ded4930842b46e9507372f0b1b963James Dong
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
500c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
530c1bc742181ded4930842b46e9507372f0b1b963James Dong
540c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT omxVCM4P10_InterpolateLuma
550c1bc742181ded4930842b46e9507372f0b1b963James Dong
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
580c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
590c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
600c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
610c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
620c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
670c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
680c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
690c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
700c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
710c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight         RN 4
720c1bc742181ded4930842b46e9507372f0b1b963James DongiWidth          RN 5
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers
750c1bc742181ded4930842b46e9507372f0b1b963James Dongidx             RN 6
760c1bc742181ded4930842b46e9507372f0b1b963James Dongidy             RN 7
770c1bc742181ded4930842b46e9507372f0b1b963James Dongindex           RN 6
780c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 12
790c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs           RN 11
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dong
820c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
830c1bc742181ded4930842b46e9507372f0b1b963James Dong
840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4    ppArgs, 16
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_InterpolateLuma, r11, d15
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcBK          RN 8
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Neon registers
950c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff5         DN 30.S16
960c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff20        DN 31.S16
970c1bc742181ded4930842b46e9507372f0b1b963James Dong
980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Horizontal interpolation
990c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0c          DN 14.U8
1000c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1c          DN 16.U8
1010c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2c          DN 18.U8
1020c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3c          DN 20.U8
1030c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0d          DN 15.U8
1040c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1d          DN 17.U8
1050c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2d          DN 19.U8
1060c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3d          DN 21.U8
1070c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH0          DN 22.U8
1080c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH1          DN 24.U8
1090c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH2          DN 26.U8
1100c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH3          DN 28.U8
1110c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH0       DN 22.U32
1120c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH1       DN 24.U32
1130c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH2       DN 26.U32
1140c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH3       DN 28.U32
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Vertical interpolation
1170c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0           DN 9.U8
1180c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1           DN 10.U8
1190c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2           DN 11.U8
1200c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3           DN 12.U8
1210c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc4           DN 13.U8
1220c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV0          DN 0.U8
1230c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV1          DN 2.U8
1240c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV2          DN 4.U8
1250c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV3          DN 6.U8
1260c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV0       DN 0.U32
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV1       DN 2.U32
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV2       DN 4.U32
1290c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV3       DN 6.U32
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Diagonal interpolation
1320c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc0          DN 0.U8
1330c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc1          DN 2.U8
1340c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc2          DN 4.U8
1350c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc3          DN 6.U8
1360c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes0          DN 0.32
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes1          DN 2.32
1380c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes2          DN 4.32
1390c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes3          DN 6.32
1400c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult0       DN 14.U8
1410c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult1       DN 16.U8
1420c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult2       DN 18.U8
1430c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult3       DN 20.U8
1440c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP0         DN 18.S16
1450c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP1         DN 19.S16
1460c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ0         DN 20.S16
1470c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ1         DN 21.S16
1480c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR0         DN 22.S16
1490c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR1         DN 23.S16
1500c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS0         DN 24.S16
1510c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS1         DN 25.S16
1520c1bc742181ded4930842b46e9507372f0b1b963James DongqTempP01        QN 9.S16
1530c1bc742181ded4930842b46e9507372f0b1b963James DongqTempQ01        QN 10.S16
1540c1bc742181ded4930842b46e9507372f0b1b963James DongqTempR01        QN 11.S16
1550c1bc742181ded4930842b46e9507372f0b1b963James DongqTempS01        QN 12.S16
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate values for averaging
1580c1bc742181ded4930842b46e9507372f0b1b963James DongqRes2           QN 7.S16
1590c1bc742181ded4930842b46e9507372f0b1b963James DongqRes3           QN 8.S16
1600c1bc742181ded4930842b46e9507372f0b1b963James DongqRes4           QN 9.S16
1610c1bc742181ded4930842b46e9507372f0b1b963James DongqRes5           QN 10.S16
1620c1bc742181ded4930842b46e9507372f0b1b963James DongqRes6           QN 11.S16
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// For implementing copy
1650c1bc742181ded4930842b46e9507372f0b1b963James DongdDst0            DN 9.32
1660c1bc742181ded4930842b46e9507372f0b1b963James DongdDst1            DN 10.32
1670c1bc742181ded4930842b46e9507372f0b1b963James DongdDst2            DN 11.32
1680c1bc742181ded4930842b46e9507372f0b1b963James DongdDst3            DN 12.32
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptridx, 4
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptridy, 4
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptrWidth, 4
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptrHeight, 4
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load structure elements of roi
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       idx, ptridx
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       idy, ptridy
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iWidth, ptrWidth
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iHeight, ptrHeight
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Move coefficients Neon registers
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV        dCoeff20, #20
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV        dCoeff5, #5
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong
1890c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4WidthLoop
1900c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4HeightLoop
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM         pArgs, {pSrc,srcStep,pDst,dstStep}
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// switch table using motion vector as index
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pc, pc, index, LSL #2
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_f
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_0
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_1
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_2
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_3
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_4
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_5
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_6
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_7
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_8
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_9
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_a
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_b
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_c
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_d
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_e
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_f
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong
2140c1bc742181ded4930842b46e9507372f0b1b963James DongCase_0
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case G
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 0 \n"
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Loads a 4x4 block of .8 and stores as .32
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pSrc, srcStep, LSL #1
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc0, [pSrc], srcStep
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc2, [Temp], srcStep
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc1, [pSrc]
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc3, [Temp]
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst0[0], [pDst], dstStep
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst2[0], [Temp], dstStep
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst1[0], [pDst]
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst3[0], [Temp]
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2320c1bc742181ded4930842b46e9507372f0b1b963James DongCase_1
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case a
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 1 \n"
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dSrc0c
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dSrc2c
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dSrc1c
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dSrc3c
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2490c1bc742181ded4930842b46e9507372f0b1b963James DongCase_2
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case b
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 2 \n"
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2620c1bc742181ded4930842b46e9507372f0b1b963James DongCase_3
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case c
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 3 \n"
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dSrc0d
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dSrc2d
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dSrc1d
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dSrc3d
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2790c1bc742181ded4930842b46e9507372f0b1b963James DongCase_4
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case d
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 4 \n"
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV0, dAccV0, dSrc0
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV2, dAccV2, dSrc2
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV1, dAccV1, dSrc1
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV3, dAccV3, dSrc3
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2960c1bc742181ded4930842b46e9507372f0b1b963James DongCase_5
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case e
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 5 \n"
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrcBK, #2
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3170c1bc742181ded4930842b46e9507372f0b1b963James DongCase_6
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case f
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 6 \n"
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qRes2, #5
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qRes3, #5
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qRes4, #5
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qRes5, #5
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3400c1bc742181ded4930842b46e9507372f0b1b963James DongCase_7
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case g
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 7 \n"
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #1
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrcBK, #2
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3610c1bc742181ded4930842b46e9507372f0b1b963James DongCase_8
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case h
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 8 \n"
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
3740c1bc742181ded4930842b46e9507372f0b1b963James DongCase_9
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case i
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 9 \n"
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempP0, dTempP0, dTempP1, #2
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempQ0, dTempQ0, dTempQ1, #2
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempR0, dTempR0, dTempR1, #2
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempS0, dTempS0, dTempS1, #2
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qTempP01, #5
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qTempQ01, #5
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qTempR01, #5
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qTempS01, #5
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4010c1bc742181ded4930842b46e9507372f0b1b963James DongCase_a
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case j
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case a \n"
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4150c1bc742181ded4930842b46e9507372f0b1b963James DongCase_b
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case k
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case b \n"
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempP0, dTempP0, dTempP1, #3
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempQ0, dTempQ0, dTempQ1, #3
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempR0, dTempR0, dTempR1, #3
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempS0, dTempS0, dTempS1, #3
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qTempP01, #5
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qTempQ01, #5
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qTempR01, #5
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qTempS01, #5
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4420c1bc742181ded4930842b46e9507372f0b1b963James DongCase_c
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case n
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case c \n"
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV0, dAccV0, dSrc1
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV2, dAccV2, dSrc3
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV1, dAccV1, dSrc2
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV3, dAccV3, dSrc4
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
4590c1bc742181ded4930842b46e9507372f0b1b963James DongCase_d
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case p
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case d \n"
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrcBK, srcStep
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4800c1bc742181ded4930842b46e9507372f0b1b963James DongCase_e
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case q
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case e \n"
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qRes3, #5
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qRes4, #5
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qRes5, #5
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qRes6, #5
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5020c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
5030c1bc742181ded4930842b46e9507372f0b1b963James DongCase_f
5040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case r
5050c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case f \n"
5060c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
5070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #1
5080c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
5090c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
5100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrcBK, srcStep
5110c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
5120c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
5130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
5140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
5150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
5160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
5170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
5180c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
5190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
5200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
5210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
5220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5230c1bc742181ded4930842b46e9507372f0b1b963James Dong
5240c1bc742181ded4930842b46e9507372f0b1b963James Dong
5250c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4LoopEnd
5260c1bc742181ded4930842b46e9507372f0b1b963James Dong
5270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Width Loop
5280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//M_ADR       pArgs, ppArgs
5290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM         pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
5300c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        iWidth, iWidth, #4
5310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #4
5320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, #4
5330c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         Block4x4WidthLoop
5340c1bc742181ded4930842b46e9507372f0b1b963James Dong
5350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Height Loop
5360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        iHeight, iHeight, #4
5370c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iWidth, ptrWidth
5380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #2
5400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, dstStep, LSL #2
5410c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, iWidth
5420c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, iWidth
5430c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         Block4x4HeightLoop
5440c1bc742181ded4930842b46e9507372f0b1b963James Dong
5450c1bc742181ded4930842b46e9507372f0b1b963James DongEndOfInterpolation
5460c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         r0, #0
5470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
5480c1bc742181ded4930842b46e9507372f0b1b963James Dong
5490c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
5500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// End of CortexA8
5510c1bc742181ded4930842b46e9507372f0b1b963James Dong
5520c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
5530c1bc742181ded4930842b46e9507372f0b1b963James Dong
554