10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_InterpolateLuma_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function:
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     omxVCM4P10_InterpolateLuma
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Performs quarter pel interpolation of inter luma MB.
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It's assumed that the frame is already padded when calling this function.
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Parameters:
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    pSrc        Pointer to the source reference frame buffer
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    srcStep     Reference frame step in byte
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dx          Fractional part of horizontal motion vector
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    dy          Fractional part of vertical motion vector
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         component in 1/4 pixel unit; valid in the range [0,3]
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                         be equal to either 4, 8, or 16.
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [out]   pDst        Pointer to the destination frame buffer.
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==4,  4-byte alignment required
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==8,  8-byte alignment required
460c1bc742181ded4930842b46e9507372f0b1b963James Dong;//                   if roi.width==16, 16-byte alignment required
470c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Return Value:
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If the function runs without error, it returns OMX_Sts_NoErr.
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It is assued that following cases are satisfied before calling this function:
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  pSrc or pDst is not NULL.
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  srcStep or dstStep >= roi.width.
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     dx or dy is in the range [0-3].
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     roi.width or roi.height is not out of range {4, 8, 16}.
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 4, Dst is 4 byte aligned.
560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 8, pDst is 8 byte aligned.
570c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     If roi.width is equal to 16, pDst is 16 byte aligned.
580c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     srcStep and dstStep is multiple of 8.
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
640c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT omxVCM4P10_InterpolateLuma
690c1bc742181ded4930842b46e9507372f0b1b963James Dong
700c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
710c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
720c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
730c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
740c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_Average_4x4_Align0_unsafe
750c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_Average_4x4_Align2_unsafe
760c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_Average_4x4_Align3_unsafe
770c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
780c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
790c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
820c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
830c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
840c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
850c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
860c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dong
900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
910c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
920c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
930c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
940c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
950c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight         RN 4
960c1bc742181ded4930842b46e9507372f0b1b963James DongiWidth          RN 5
970c1bc742181ded4930842b46e9507372f0b1b963James Dong
980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers
990c1bc742181ded4930842b46e9507372f0b1b963James Dongidx             RN 6
1000c1bc742181ded4930842b46e9507372f0b1b963James Dongidy             RN 7
1010c1bc742181ded4930842b46e9507372f0b1b963James Dongindex           RN 6
1020c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 12
1030c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs           RN 11
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// End of CortexA8
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------------------------------------------------------------------------
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------------------------------------------------------------------------
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppDst, 8
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppSrc, 8
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppArgs, 16
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r4 - iHeight
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r5 - iWidth
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r6 - index
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_InterpolateLuma, r11
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers
1300c1bc742181ded4930842b46e9507372f0b1b963James Dongidx             RN 6
1310c1bc742181ded4930842b46e9507372f0b1b963James Dongidy             RN 7
1320c1bc742181ded4930842b46e9507372f0b1b963James Dongindex           RN 6
1330c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 12
1340c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs           RN 11
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong
1360c1bc742181ded4930842b46e9507372f0b1b963James DongpBuf            RN 8
1370c1bc742181ded4930842b46e9507372f0b1b963James DongHeight          RN 9
1380c1bc742181ded4930842b46e9507372f0b1b963James DongbufStep         RN 9
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ptridx, 4
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ptridy, 4
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ptrWidth, 4
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   ptrHeight, 4
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load structure elements of roi
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   idx, ptridx
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   idy, ptridy
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   iWidth, ptrWidth
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   iHeight, ptrHeight
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "roi.width %d\n", iWidth
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "roi.height %d\n", iHeight
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong
1580c1bc742181ded4930842b46e9507372f0b1b963James DongInterpolateLuma
1590c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4WidthLoop
1600c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4HeightLoop
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM     pArgs, {pSrc,srcStep,pDst,dstStep}
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pBuffer
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// switch table using motion vector as index
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_SWITCH index, L
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_0
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_1
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_2
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_3
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_4
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_5
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_6
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_7
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_8
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_9
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_a
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_b
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_c
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_d
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_e
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE  Case_f
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ENDSWITCH
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong
1850c1bc742181ded4930842b46e9507372f0b1b963James DongCase_0
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case G
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 0 \n"
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James DongCase_1
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case a
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 1 \n"
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align2_unsafe
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2020c1bc742181ded4930842b46e9507372f0b1b963James DongCase_2
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case b
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 2 \n"
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2110c1bc742181ded4930842b46e9507372f0b1b963James DongCase_3
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case c
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 3 \n"
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align3_unsafe
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2210c1bc742181ded4930842b46e9507372f0b1b963James DongCase_4
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case d
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 4 \n"
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2320c1bc742181ded4930842b46e9507372f0b1b963James DongCase_5
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case e
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 5 \n"
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDst, pTempBuf
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     dstStep, #4
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pBuffer
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pSrc, pTempBuf
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #4
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2550c1bc742181ded4930842b46e9507372f0b1b963James DongCase_6
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case f
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 6 \n"
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pInterBuf
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   idy, pTempBuf
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2690c1bc742181ded4930842b46e9507372f0b1b963James DongCase_7
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case g
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 7 \n"
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDst, pTempBuf
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     dstStep, #4
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, #1
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pBuffer
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pSrc, pTempBuf
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #4
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
2920c1bc742181ded4930842b46e9507372f0b1b963James DongCase_8
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case h
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 8 \n"
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3010c1bc742181ded4930842b46e9507372f0b1b963James DongCase_9
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case i
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case 9 \n"
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep, LSL #1
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pInterBuf
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   idy, pTempBuf
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align2_unsafe
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3160c1bc742181ded4930842b46e9507372f0b1b963James DongCase_a
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case j
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case a \n"
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep, LSL #1
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pInterBuf
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3280c1bc742181ded4930842b46e9507372f0b1b963James DongCase_b
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case k
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case b \n"
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep, LSL #1
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pInterBuf
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   idy, pTempBuf
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align3_unsafe
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3420c1bc742181ded4930842b46e9507372f0b1b963James DongCase_c
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case n
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case c \n"
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3530c1bc742181ded4930842b46e9507372f0b1b963James DongCase_d
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case p
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case d \n"
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDst, pTempBuf
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     dstStep, #4
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pBuffer
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pSrc, pTempBuf
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #4
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3740c1bc742181ded4930842b46e9507372f0b1b963James DongCase_e
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case q
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case e \n"
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pInterBuf
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   idy, pTempBuf
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, #4
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       Block4x4LoopEnd
3900c1bc742181ded4930842b46e9507372f0b1b963James DongCase_f
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Case r
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_PRINTF "Case f \n"
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, #2
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #4
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDst, pTempBuf
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     dstStep, #4
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #1
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, #1
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pBuf, pBuffer
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Height, #9
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pSrc, pTempBuf
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #4
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        BL      armVCM4P10_Average_4x4_Align0_unsafe
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong
4120c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4LoopEnd
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Width Loop
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iWidth, iWidth, #4
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, #4
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pDst, pDst, #4
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Block4x4WidthLoop
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Height Loop
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #4
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   iWidth, ptrWidth
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pArgs, ppArgs
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep, LSL #2
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pDst, pDst, dstStep, LSL #2
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, iWidth
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pDst, pDst, iWidth
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Block4x4HeightLoop
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong
4320c1bc742181ded4930842b46e9507372f0b1b963James DongEndOfInterpolation
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     r0, #0
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
44078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar
441