10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_InterpolateLuma_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_InterpolateLuma 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Performs quarter pel interpolation of inter luma MB. 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It's assumed that the frame is already padded when calling this function. 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Parameters: 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] pSrc Pointer to the source reference frame buffer 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] srcStep Reference frame step in byte 360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width 370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dx Fractional part of horizontal motion vector 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// component in 1/4 pixel unit; valid in the range [0,3] 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dy Fractional part of vertical motion vector 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// component in 1/4 pixel unit; valid in the range [0,3] 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// be equal to either 4, 8, or 16. 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [out] pDst Pointer to the destination frame buffer. 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==4, 4-byte alignment required 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==8, 8-byte alignment required 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==16, 16-byte alignment required 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Return Value: 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If the function runs without error, it returns OMX_Sts_NoErr. 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It is assued that following cases are satisfied before calling this function: 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// pSrc or pDst is not NULL. 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// srcStep or dstStep >= roi.width. 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// dx or dy is in the range [0-3]. 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// roi.width or roi.height is not out of range {4, 8, 16}. 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 4, Dst is 4 byte aligned. 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 8, pDst is 8 byte aligned. 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 16, pDst is 16 byte aligned. 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// srcStep and dstStep is multiple of 8. 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 640c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT omxVCM4P10_InterpolateLuma 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 710c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe 720c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 730c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 740c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_Average_4x4_Align0_unsafe 750c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_Average_4x4_Align2_unsafe 760c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_Average_4x4_Align3_unsafe 770c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 780c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 790c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 820c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 830c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 840c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 850c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 860c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 870c1bc742181ded4930842b46e9507372f0b1b963James Dong 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dong 900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 910c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 920c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 930c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 940c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 950c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight RN 4 960c1bc742181ded4930842b46e9507372f0b1b963James DongiWidth RN 5 970c1bc742181ded4930842b46e9507372f0b1b963James Dong 980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers 990c1bc742181ded4930842b46e9507372f0b1b963James Dongidx RN 6 1000c1bc742181ded4930842b46e9507372f0b1b963James Dongidy RN 7 1010c1bc742181ded4930842b46e9507372f0b1b963James Dongindex RN 6 1020c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 12 1030c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs RN 11 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// End of CortexA8 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------------------------------------------------------------------------- 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------------------------------------------------------------------------- 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppDst, 8 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppSrc, 8 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppArgs, 16 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers r4, r5, r6 to be preserved by internal unsafe functions 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r4 - iHeight 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r5 - iWidth 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r6 - index 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_InterpolateLuma, r11 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers 1300c1bc742181ded4930842b46e9507372f0b1b963James Dongidx RN 6 1310c1bc742181ded4930842b46e9507372f0b1b963James Dongidy RN 7 1320c1bc742181ded4930842b46e9507372f0b1b963James Dongindex RN 6 1330c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 12 1340c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs RN 11 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong 1360c1bc742181ded4930842b46e9507372f0b1b963James DongpBuf RN 8 1370c1bc742181ded4930842b46e9507372f0b1b963James DongHeight RN 9 1380c1bc742181ded4930842b46e9507372f0b1b963James DongbufStep RN 9 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptridx, 4 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptridy, 4 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptrWidth, 4 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptrHeight, 4 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load structure elements of roi 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR idx, ptridx 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR idy, ptridy 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iWidth, ptrWidth 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iHeight, ptrHeight 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "roi.width %d\n", iWidth 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "roi.height %d\n", iHeight 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong 1580c1bc742181ded4930842b46e9507372f0b1b963James DongInterpolateLuma 1590c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4WidthLoop 1600c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4HeightLoop 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pArgs, {pSrc,srcStep,pDst,dstStep} 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pBuffer 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// switch table using motion vector as index 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_SWITCH index, L 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_0 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_1 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_2 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_3 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_4 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_5 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_6 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_7 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_8 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_9 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_a 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_b 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_c 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_d 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_e 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Case_f 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ENDSWITCH 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong 1850c1bc742181ded4930842b46e9507372f0b1b963James DongCase_0 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case G 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 0 \n" 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James DongCase_1 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case a 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 1 \n" 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align2_unsafe 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2020c1bc742181ded4930842b46e9507372f0b1b963James DongCase_2 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case b 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 2 \n" 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2110c1bc742181ded4930842b46e9507372f0b1b963James DongCase_3 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case c 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 3 \n" 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align3_unsafe 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2210c1bc742181ded4930842b46e9507372f0b1b963James DongCase_4 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case d 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 4 \n" 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2320c1bc742181ded4930842b46e9507372f0b1b963James DongCase_5 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case e 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 5 \n" 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDst, pTempBuf 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #4 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc, srcStep, pDst, dstStep} 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pBuffer 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pSrc, pTempBuf 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2550c1bc742181ded4930842b46e9507372f0b1b963James DongCase_6 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case f 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 6 \n" 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pInterBuf 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR idy, pTempBuf 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2690c1bc742181ded4930842b46e9507372f0b1b963James DongCase_7 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case g 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 7 \n" 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDst, pTempBuf 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #4 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc, srcStep, pDst, dstStep} 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #1 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pBuffer 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pSrc, pTempBuf 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2920c1bc742181ded4930842b46e9507372f0b1b963James DongCase_8 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case h 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 8 \n" 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3010c1bc742181ded4930842b46e9507372f0b1b963James DongCase_9 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case i 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 9 \n" 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pInterBuf 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR idy, pTempBuf 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align2_unsafe 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3160c1bc742181ded4930842b46e9507372f0b1b963James DongCase_a 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case j 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case a \n" 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pInterBuf 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3280c1bc742181ded4930842b46e9507372f0b1b963James DongCase_b 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case k 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case b \n" 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pInterBuf 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR idy, pTempBuf 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align3_unsafe 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3420c1bc742181ded4930842b46e9507372f0b1b963James DongCase_c 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case n 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case c \n" 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3530c1bc742181ded4930842b46e9507372f0b1b963James DongCase_d 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case p 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case d \n" 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDst, pTempBuf 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #4 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc, srcStep, pDst, dstStep} 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pBuffer 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pSrc, pTempBuf 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3740c1bc742181ded4930842b46e9507372f0b1b963James DongCase_e 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case q 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case e \n" 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pInterBuf 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR idy, pTempBuf 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #4 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3900c1bc742181ded4930842b46e9507372f0b1b963James DongCase_f 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case r 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case f \n" 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #4 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDst, pTempBuf 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #4 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc, srcStep, pDst, dstStep} 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #1 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pBuf, pBuffer 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Height, #9 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pSrc, pTempBuf 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_Average_4x4_Align0_unsafe 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong 4120c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4LoopEnd 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Width Loop 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iWidth, iWidth, #4 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #4 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, #4 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Block4x4WidthLoop 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Height Loop 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #4 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iWidth, ptrWidth 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #2 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep, LSL #2 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, iWidth 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, iWidth 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Block4x4HeightLoop 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong 4320c1bc742181ded4930842b46e9507372f0b1b963James DongEndOfInterpolation 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV r0, #0 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong END 44078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar 441