10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_InterpolateLuma_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function:
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     omxVCM4P10_InterpolateLuma
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Performs quarter pel interpolation of inter luma MB.
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It's assumed that the frame is already padded when calling this function.
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Parameters:
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    pSrc        Pointer to the source reference frame buffer
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    srcStep     Reference frame step in byte
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dx          Fractional part of horizontal motion vector
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dy          Fractional part of vertical motion vector
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         be equal to either 4, 8, or 16.
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [out]   pDst        Pointer to the destination frame buffer.
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==4,  4-byte alignment required
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==8,  8-byte alignment required
460c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==16, 16-byte alignment required
470c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Return Value:
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If the function runs without error, it returns OMX_Sts_NoErr.
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It is assued that following cases are satisfied before calling this function:
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  pSrc or pDst is not NULL.
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  srcStep or dstStep >= roi.width.
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     dx or dy is in the range [0-3].
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     roi.width or roi.height is not out of range {4, 8, 16}.
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 4, Dst is 4 byte aligned.
560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 8, pDst is 8 byte aligned.
570c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 16, pDst is 16 byte aligned.
580c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     srcStep and dstStep is multiple of 8.
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
640c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT omxVCM4P10_InterpolateLuma
690c1bc742181ded4930842b46e9507372f0b1b963James Dong
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
720c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
730c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
740c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
750c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
760c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
810c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
820c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
830c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
840c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
850c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight         RN 4
860c1bc742181ded4930842b46e9507372f0b1b963James DongiWidth          RN 5
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers
890c1bc742181ded4930842b46e9507372f0b1b963James Dongidx             RN 6
900c1bc742181ded4930842b46e9507372f0b1b963James Dongidy             RN 7
910c1bc742181ded4930842b46e9507372f0b1b963James Dongindex           RN 6
920c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 12
930c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs           RN 11
940c1bc742181ded4930842b46e9507372f0b1b963James Dong
950c1bc742181ded4930842b46e9507372f0b1b963James Dong
960c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
970c1bc742181ded4930842b46e9507372f0b1b963James Dong
980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4    ppArgs, 16
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_InterpolateLuma, r11, d15
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcBK          RN 8
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Neon registers
1090c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff5         DN 30.S16
1100c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff20        DN 31.S16
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Horizontal interpolation
1130c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0c          DN 14.U8
1140c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1c          DN 16.U8
1150c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2c          DN 18.U8
1160c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3c          DN 20.U8
1170c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0d          DN 15.U8
1180c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1d          DN 17.U8
1190c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2d          DN 19.U8
1200c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3d          DN 21.U8
1210c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH0          DN 22.U8
1220c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH1          DN 24.U8
1230c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH2          DN 26.U8
1240c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH3          DN 28.U8
1250c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH0       DN 22.U32
1260c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH1       DN 24.U32
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH2       DN 26.U32
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH3       DN 28.U32
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Vertical interpolation
1310c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0           DN 9.U8
1320c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1           DN 10.U8
1330c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2           DN 11.U8
1340c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3           DN 12.U8
1350c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc4           DN 13.U8
1360c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV0          DN 0.U8
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV1          DN 2.U8
1380c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV2          DN 4.U8
1390c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV3          DN 6.U8
1400c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV0       DN 0.U32
1410c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV1       DN 2.U32
1420c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV2       DN 4.U32
1430c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV3       DN 6.U32
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Diagonal interpolation
1460c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc0          DN 0.U8
1470c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc1          DN 2.U8
1480c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc2          DN 4.U8
1490c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc3          DN 6.U8
1500c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes0          DN 0.32
1510c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes1          DN 2.32
1520c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes2          DN 4.32
1530c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes3          DN 6.32
1540c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult0       DN 14.U8
1550c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult1       DN 16.U8
1560c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult2       DN 18.U8
1570c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult3       DN 20.U8
1580c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP0         DN 18.S16
1590c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP1         DN 19.S16
1600c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ0         DN 20.S16
1610c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ1         DN 21.S16
1620c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR0         DN 22.S16
1630c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR1         DN 23.S16
1640c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS0         DN 24.S16
1650c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS1         DN 25.S16
1660c1bc742181ded4930842b46e9507372f0b1b963James DongqTempP01        QN 9.S16
1670c1bc742181ded4930842b46e9507372f0b1b963James DongqTempQ01        QN 10.S16
1680c1bc742181ded4930842b46e9507372f0b1b963James DongqTempR01        QN 11.S16
1690c1bc742181ded4930842b46e9507372f0b1b963James DongqTempS01        QN 12.S16
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate values for averaging
1720c1bc742181ded4930842b46e9507372f0b1b963James DongqRes2           QN 7.S16
1730c1bc742181ded4930842b46e9507372f0b1b963James DongqRes3           QN 8.S16
1740c1bc742181ded4930842b46e9507372f0b1b963James DongqRes4           QN 9.S16
1750c1bc742181ded4930842b46e9507372f0b1b963James DongqRes5           QN 10.S16
1760c1bc742181ded4930842b46e9507372f0b1b963James DongqRes6           QN 11.S16
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// For implementing copy
1790c1bc742181ded4930842b46e9507372f0b1b963James DongdDst0            DN 9.32
1800c1bc742181ded4930842b46e9507372f0b1b963James DongdDst1            DN 10.32
1810c1bc742181ded4930842b46e9507372f0b1b963James DongdDst2            DN 11.32
1820c1bc742181ded4930842b46e9507372f0b1b963James DongdDst3            DN 12.32
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptridx, 4
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptridy, 4
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptrWidth, 4
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG       ptrHeight, 4
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load structure elements of roi
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       idx, ptridx
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       idy, ptridy
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iWidth, ptrWidth
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iHeight, ptrHeight
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Move coefficients Neon registers
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV        dCoeff20, #20
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV        dCoeff5, #5
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong
2030c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4WidthLoop
2040c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4HeightLoop
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM         pArgs, {pSrc,srcStep,pDst,dstStep}
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// switch table using motion vector as index
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pc, pc, index, LSL #2
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_f
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_0
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_1
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_2
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_3
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_4
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_5
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_6
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_7
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_8
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_9
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_a
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_b
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_c
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_d
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_e
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Case_f
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James DongCase_0
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case G
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 0 \n"
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Loads a 4x4 block of .8 and stores as .32
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pSrc, srcStep, LSL #1
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc0, [pSrc], srcStep
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc2, [Temp], srcStep
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc1, [pSrc]
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        dSrc3, [Temp]
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst0[0], [pDst], dstStep
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst2[0], [Temp], dstStep
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst1[0], [pDst]
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dDst3[0], [Temp]
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2460c1bc742181ded4930842b46e9507372f0b1b963James DongCase_1
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case a
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 1 \n"
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dSrc0c
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dSrc2c
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dSrc1c
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dSrc3c
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2630c1bc742181ded4930842b46e9507372f0b1b963James DongCase_2
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case b
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 2 \n"
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2760c1bc742181ded4930842b46e9507372f0b1b963James DongCase_3
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case c
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 3 \n"
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dSrc0d
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dSrc2d
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dSrc1d
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dSrc3d
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
2930c1bc742181ded4930842b46e9507372f0b1b963James DongCase_4
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case d
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 4 \n"
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV0, dAccV0, dSrc0
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV2, dAccV2, dSrc2
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV1, dAccV1, dSrc1
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV3, dAccV3, dSrc3
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
3100c1bc742181ded4930842b46e9507372f0b1b963James DongCase_5
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case e
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 5 \n"
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrcBK, #2
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3310c1bc742181ded4930842b46e9507372f0b1b963James DongCase_6
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case f
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 6 \n"
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qRes2, #5
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qRes3, #5
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qRes4, #5
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qRes5, #5
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3540c1bc742181ded4930842b46e9507372f0b1b963James DongCase_7
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case g
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 7 \n"
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #1
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrcBK, #2
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3750c1bc742181ded4930842b46e9507372f0b1b963James DongCase_8
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case h
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 8 \n"
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
3880c1bc742181ded4930842b46e9507372f0b1b963James DongCase_9
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case i
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 9 \n"
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempP0, dTempP0, dTempP1, #2
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempQ0, dTempQ0, dTempQ1, #2
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempR0, dTempR0, dTempR1, #2
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempS0, dTempS0, dTempS1, #2
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qTempP01, #5
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qTempQ01, #5
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qTempR01, #5
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qTempS01, #5
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4150c1bc742181ded4930842b46e9507372f0b1b963James DongCase_a
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case j
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case a \n"
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4290c1bc742181ded4930842b46e9507372f0b1b963James DongCase_b
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case k
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case b \n"
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempP0, dTempP0, dTempP1, #3
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempQ0, dTempQ0, dTempQ1, #3
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempR0, dTempR0, dTempR1, #3
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT        dTempS0, dTempS0, dTempS1, #3
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qTempP01, #5
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qTempQ01, #5
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qTempR01, #5
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qTempS01, #5
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4560c1bc742181ded4930842b46e9507372f0b1b963James DongCase_c
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case n
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case c \n"
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV0, dAccV0, dSrc1
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV2, dAccV2, dSrc3
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV1, dAccV1, dSrc2
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccV3, dAccV3, dSrc4
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV0[0], [pDst], dstStep
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV2[0], [Temp], dstStep
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV1[0], [pDst]
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultV3[0], [Temp]
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           Block4x4LoopEnd
4730c1bc742181ded4930842b46e9507372f0b1b963James DongCase_d
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case p
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case d \n"
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrcBK, srcStep
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
4940c1bc742181ded4930842b46e9507372f0b1b963James DongCase_e
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case q
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case e \n"
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult0, qRes3, #5
5020c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult1, qRes4, #5
5030c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult2, qRes5, #5
5040c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN    dTResult3, qRes6, #5
5050c1bc742181ded4930842b46e9507372f0b1b963James Dong
5060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc0, dTAcc0, dTResult0
5070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc2, dTAcc2, dTResult2
5080c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc1, dTAcc1, dTResult1
5090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dTAcc3, dTAcc3, dTResult3
5100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
5110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes0[0], [pDst], dstStep
5120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes2[0], [Temp], dstStep
5130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes1[0], [pDst]
5140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dTRes3[0], [Temp]
5150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5160c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
5170c1bc742181ded4930842b46e9507372f0b1b963James DongCase_f
5180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case r
5190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case f \n"
5200c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pSrcBK, pSrc
5210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #1
5220c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #1
5230c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
5240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrcBK, srcStep
5250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #2
5260c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
5270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH0, dAccH0, dAccV0
5280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH2, dAccH2, dAccV2
5290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH1, dAccH1, dAccV1
5300c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dAccH3, dAccH3, dAccV3
5310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Temp, pDst, dstStep, LSL #1
5320c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH0[0], [pDst], dstStep
5330c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH2[0], [Temp], dstStep
5340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH1[0], [pDst]
5350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1        dResultH3[0], [Temp]
5360c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5370c1bc742181ded4930842b46e9507372f0b1b963James Dong
5380c1bc742181ded4930842b46e9507372f0b1b963James Dong
5390c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4LoopEnd
5400c1bc742181ded4930842b46e9507372f0b1b963James Dong
5410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Width Loop
5420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//M_ADR       pArgs, ppArgs
5430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM         pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
5440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        iWidth, iWidth, #4
5450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #4
5460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, #4
5470c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         Block4x4WidthLoop
5480c1bc742181ded4930842b46e9507372f0b1b963James Dong
5490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Height Loop
5500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        iHeight, iHeight, #4
5510c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iWidth, ptrWidth
5520c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR       pArgs, ppArgs
5530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #2
5540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, dstStep, LSL #2
5550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, iWidth
5560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, iWidth
5570c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         Block4x4HeightLoop
5580c1bc742181ded4930842b46e9507372f0b1b963James Dong
5590c1bc742181ded4930842b46e9507372f0b1b963James DongEndOfInterpolation
5600c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         r0, #0
5610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
5620c1bc742181ded4930842b46e9507372f0b1b963James Dong
5630c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
5640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// End of CortexA8
5650c1bc742181ded4930842b46e9507372f0b1b963James Dong
5660c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
5670c1bc742181ded4930842b46e9507372f0b1b963James Dong
568