10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_InterpolateLuma_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_InterpolateLuma 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Performs quarter pel interpolation of inter luma MB. 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It's assumed that the frame is already padded when calling this function. 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Parameters: 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] pSrc Pointer to the source reference frame buffer 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] srcStep Reference frame step in byte 360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width 370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dx Fractional part of horizontal motion vector 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// component in 1/4 pixel unit; valid in the range [0,3] 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] dy Fractional part of vertical motion vector 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// component in 1/4 pixel unit; valid in the range [0,3] 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// be equal to either 4, 8, or 16. 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [out] pDst Pointer to the destination frame buffer. 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==4, 4-byte alignment required 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==8, 8-byte alignment required 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// if roi.width==16, 16-byte alignment required 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Return Value: 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If the function runs without error, it returns OMX_Sts_NoErr. 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// It is assued that following cases are satisfied before calling this function: 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// pSrc or pDst is not NULL. 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// srcStep or dstStep >= roi.width. 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// dx or dy is in the range [0-3]. 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// roi.width or roi.height is not out of range {4, 8, 16}. 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 4, Dst is 4 byte aligned. 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 8, pDst is 8 byte aligned. 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// If roi.width is equal to 16, pDst is 16 byte aligned. 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// srcStep and dstStep is multiple of 8. 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 640c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT omxVCM4P10_InterpolateLuma 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 720c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 730c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 740c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 750c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 760c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong 800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 810c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 820c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 830c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 840c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 850c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight RN 4 860c1bc742181ded4930842b46e9507372f0b1b963James DongiWidth RN 5 870c1bc742181ded4930842b46e9507372f0b1b963James Dong 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers 890c1bc742181ded4930842b46e9507372f0b1b963James Dongidx RN 6 900c1bc742181ded4930842b46e9507372f0b1b963James Dongidy RN 7 910c1bc742181ded4930842b46e9507372f0b1b963James Dongindex RN 6 920c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 12 930c1bc742181ded4930842b46e9507372f0b1b963James DongpArgs RN 11 940c1bc742181ded4930842b46e9507372f0b1b963James Dong 950c1bc742181ded4930842b46e9507372f0b1b963James Dong 960c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 970c1bc742181ded4930842b46e9507372f0b1b963James Dong 980c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 990c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppArgs, 16 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_InterpolateLuma, r11, d15 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong 1060c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcBK RN 8 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Neon registers 1090c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff5 DN 30.S16 1100c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff20 DN 31.S16 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Horizontal interpolation 1130c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0c DN 14.U8 1140c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1c DN 16.U8 1150c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2c DN 18.U8 1160c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3c DN 20.U8 1170c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0d DN 15.U8 1180c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1d DN 17.U8 1190c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2d DN 19.U8 1200c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3d DN 21.U8 1210c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH0 DN 22.U8 1220c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH1 DN 24.U8 1230c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH2 DN 26.U8 1240c1bc742181ded4930842b46e9507372f0b1b963James DongdAccH3 DN 28.U8 1250c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH0 DN 22.U32 1260c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH1 DN 24.U32 1270c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH2 DN 26.U32 1280c1bc742181ded4930842b46e9507372f0b1b963James DongdResultH3 DN 28.U32 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Vertical interpolation 1310c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0 DN 9.U8 1320c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1 DN 10.U8 1330c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2 DN 11.U8 1340c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3 DN 12.U8 1350c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc4 DN 13.U8 1360c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV0 DN 0.U8 1370c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV1 DN 2.U8 1380c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV2 DN 4.U8 1390c1bc742181ded4930842b46e9507372f0b1b963James DongdAccV3 DN 6.U8 1400c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV0 DN 0.U32 1410c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV1 DN 2.U32 1420c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV2 DN 4.U32 1430c1bc742181ded4930842b46e9507372f0b1b963James DongdResultV3 DN 6.U32 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used for implementing Diagonal interpolation 1460c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc0 DN 0.U8 1470c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc1 DN 2.U8 1480c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc2 DN 4.U8 1490c1bc742181ded4930842b46e9507372f0b1b963James DongdTAcc3 DN 6.U8 1500c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes0 DN 0.32 1510c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes1 DN 2.32 1520c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes2 DN 4.32 1530c1bc742181ded4930842b46e9507372f0b1b963James DongdTRes3 DN 6.32 1540c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult0 DN 14.U8 1550c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult1 DN 16.U8 1560c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult2 DN 18.U8 1570c1bc742181ded4930842b46e9507372f0b1b963James DongdTResult3 DN 20.U8 1580c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP0 DN 18.S16 1590c1bc742181ded4930842b46e9507372f0b1b963James DongdTempP1 DN 19.S16 1600c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ0 DN 20.S16 1610c1bc742181ded4930842b46e9507372f0b1b963James DongdTempQ1 DN 21.S16 1620c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR0 DN 22.S16 1630c1bc742181ded4930842b46e9507372f0b1b963James DongdTempR1 DN 23.S16 1640c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS0 DN 24.S16 1650c1bc742181ded4930842b46e9507372f0b1b963James DongdTempS1 DN 25.S16 1660c1bc742181ded4930842b46e9507372f0b1b963James DongqTempP01 QN 9.S16 1670c1bc742181ded4930842b46e9507372f0b1b963James DongqTempQ01 QN 10.S16 1680c1bc742181ded4930842b46e9507372f0b1b963James DongqTempR01 QN 11.S16 1690c1bc742181ded4930842b46e9507372f0b1b963James DongqTempS01 QN 12.S16 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Intermediate values for averaging 1720c1bc742181ded4930842b46e9507372f0b1b963James DongqRes2 QN 7.S16 1730c1bc742181ded4930842b46e9507372f0b1b963James DongqRes3 QN 8.S16 1740c1bc742181ded4930842b46e9507372f0b1b963James DongqRes4 QN 9.S16 1750c1bc742181ded4930842b46e9507372f0b1b963James DongqRes5 QN 10.S16 1760c1bc742181ded4930842b46e9507372f0b1b963James DongqRes6 QN 11.S16 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// For implementing copy 1790c1bc742181ded4930842b46e9507372f0b1b963James DongdDst0 DN 9.32 1800c1bc742181ded4930842b46e9507372f0b1b963James DongdDst1 DN 10.32 1810c1bc742181ded4930842b46e9507372f0b1b963James DongdDst2 DN 11.32 1820c1bc742181ded4930842b46e9507372f0b1b963James DongdDst3 DN 12.32 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptridx, 4 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptridy, 4 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptrWidth, 4 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG ptrHeight, 4 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load structure elements of roi 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR idx, ptridx 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR idy, ptridy 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iWidth, ptrWidth 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iHeight, ptrHeight 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Move coefficients Neon registers 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dCoeff20, #20 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOV dCoeff5, #5 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4WidthLoop 2040c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4HeightLoop 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pArgs, {pSrc,srcStep,pDst,dstStep} 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// switch table using motion vector as index 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pc, pc, index, LSL #2 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_f 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_0 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_1 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_2 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_3 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_4 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_5 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_6 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_7 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_8 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_9 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_a 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_b 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_c 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_d 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_e 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong B Case_f 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James DongCase_0 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case G 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 0 \n" 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Loads a 4x4 block of .8 and stores as .32 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pSrc, srcStep, LSL #1 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc0, [pSrc], srcStep 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc2, [Temp], srcStep 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc1, [pSrc] 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 dSrc3, [Temp] 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDst0[0], [pDst], dstStep 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDst2[0], [Temp], dstStep 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDst1[0], [pDst] 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dDst3[0], [Temp] 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2460c1bc742181ded4930842b46e9507372f0b1b963James DongCase_1 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case a 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 1 \n" 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dSrc0c 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dSrc2c 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dSrc1c 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dSrc3c 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2630c1bc742181ded4930842b46e9507372f0b1b963James DongCase_2 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case b 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 2 \n" 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2760c1bc742181ded4930842b46e9507372f0b1b963James DongCase_3 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case c 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 3 \n" 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dSrc0d 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dSrc2d 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dSrc1d 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dSrc3d 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 2930c1bc742181ded4930842b46e9507372f0b1b963James DongCase_4 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case d 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 4 \n" 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV0, dAccV0, dSrc0 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV2, dAccV2, dSrc2 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV1, dAccV1, dSrc1 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV3, dAccV3, dSrc3 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV0[0], [pDst], dstStep 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV2[0], [Temp], dstStep 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV1[0], [pDst] 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV3[0], [Temp] 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3100c1bc742181ded4930842b46e9507372f0b1b963James DongCase_5 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case e 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 5 \n" 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrcBK, pSrc 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrcBK, #2 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dAccV0 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dAccV2 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dAccV1 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dAccV3 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3310c1bc742181ded4930842b46e9507372f0b1b963James DongCase_6 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case f 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 6 \n" 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult0, qRes2, #5 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult1, qRes3, #5 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult2, qRes4, #5 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult3, qRes5, #5 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc0, dTAcc0, dTResult0 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc2, dTAcc2, dTResult2 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc1, dTAcc1, dTResult1 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc3, dTAcc3, dTResult3 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes0[0], [pDst], dstStep 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes2[0], [Temp], dstStep 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes1[0], [pDst] 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes3[0], [Temp] 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3540c1bc742181ded4930842b46e9507372f0b1b963James DongCase_7 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case g 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 7 \n" 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrcBK, pSrc 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #1 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrcBK, #2 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dAccV0 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dAccV2 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dAccV1 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dAccV3 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3750c1bc742181ded4930842b46e9507372f0b1b963James DongCase_8 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case h 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 8 \n" 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV0[0], [pDst], dstStep 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV2[0], [Temp], dstStep 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV1[0], [pDst] 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV3[0], [Temp] 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 3880c1bc742181ded4930842b46e9507372f0b1b963James DongCase_9 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case i 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case 9 \n" 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempP0, dTempP0, dTempP1, #2 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempQ0, dTempQ0, dTempQ1, #2 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempR0, dTempR0, dTempR1, #2 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempS0, dTempS0, dTempS1, #2 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult0, qTempP01, #5 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult1, qTempQ01, #5 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult2, qTempR01, #5 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult3, qTempS01, #5 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc0, dTAcc0, dTResult0 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc2, dTAcc2, dTResult2 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc1, dTAcc1, dTResult1 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc3, dTAcc3, dTResult3 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes0[0], [pDst], dstStep 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes2[0], [Temp], dstStep 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes1[0], [pDst] 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes3[0], [Temp] 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 4150c1bc742181ded4930842b46e9507372f0b1b963James DongCase_a 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case j 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case a \n" 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes0[0], [pDst], dstStep 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes2[0], [Temp], dstStep 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes1[0], [pDst] 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes3[0], [Temp] 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 4290c1bc742181ded4930842b46e9507372f0b1b963James DongCase_b 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case k 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case b \n" 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempP0, dTempP0, dTempP1, #3 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempQ0, dTempQ0, dTempQ1, #3 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempR0, dTempR0, dTempR1, #3 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dTempS0, dTempS0, dTempS1, #3 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult0, qTempP01, #5 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult1, qTempQ01, #5 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult2, qTempR01, #5 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult3, qTempS01, #5 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc0, dTAcc0, dTResult0 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc2, dTAcc2, dTResult2 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc1, dTAcc1, dTResult1 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc3, dTAcc3, dTResult3 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes0[0], [pDst], dstStep 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes2[0], [Temp], dstStep 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes1[0], [pDst] 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes3[0], [Temp] 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 4560c1bc742181ded4930842b46e9507372f0b1b963James DongCase_c 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case n 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case c \n" 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV0, dAccV0, dSrc1 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV2, dAccV2, dSrc3 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV1, dAccV1, dSrc2 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccV3, dAccV3, dSrc4 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV0[0], [pDst], dstStep 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV2[0], [Temp], dstStep 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV1[0], [pDst] 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultV3[0], [Temp] 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 4730c1bc742181ded4930842b46e9507372f0b1b963James DongCase_d 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case p 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case d \n" 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrcBK, pSrc 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrcBK, srcStep 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dAccV0 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dAccV2 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dAccV1 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dAccV3 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 4940c1bc742181ded4930842b46e9507372f0b1b963James DongCase_e 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case q 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case e \n" 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult0, qRes3, #5 5020c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult1, qRes4, #5 5030c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult2, qRes5, #5 5040c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dTResult3, qRes6, #5 5050c1bc742181ded4930842b46e9507372f0b1b963James Dong 5060c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc0, dTAcc0, dTResult0 5070c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc2, dTAcc2, dTResult2 5080c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc1, dTAcc1, dTResult1 5090c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dTAcc3, dTAcc3, dTResult3 5100c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 5110c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes0[0], [pDst], dstStep 5120c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes2[0], [Temp], dstStep 5130c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes1[0], [pDst] 5140c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dTRes3[0], [Temp] 5150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 5160c1bc742181ded4930842b46e9507372f0b1b963James Dong B Block4x4LoopEnd 5170c1bc742181ded4930842b46e9507372f0b1b963James DongCase_f 5180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Case r 5190c1bc742181ded4930842b46e9507372f0b1b963James Dong M_PRINTF "Case f \n" 5200c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrcBK, pSrc 5210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #1 5220c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #1 5230c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 5240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrcBK, srcStep 5250c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #2 5260c1bc742181ded4930842b46e9507372f0b1b963James Dong BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 5270c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH0, dAccH0, dAccV0 5280c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH2, dAccH2, dAccV2 5290c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH1, dAccH1, dAccV1 5300c1bc742181ded4930842b46e9507372f0b1b963James Dong VRHADD dAccH3, dAccH3, dAccV3 5310c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp, pDst, dstStep, LSL #1 5320c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH0[0], [pDst], dstStep 5330c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH2[0], [Temp], dstStep 5340c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH1[0], [pDst] 5350c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dResultH3[0], [Temp] 5360c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 5370c1bc742181ded4930842b46e9507372f0b1b963James Dong 5380c1bc742181ded4930842b46e9507372f0b1b963James Dong 5390c1bc742181ded4930842b46e9507372f0b1b963James DongBlock4x4LoopEnd 5400c1bc742181ded4930842b46e9507372f0b1b963James Dong 5410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Width Loop 5420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//M_ADR pArgs, ppArgs 5430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments 5440c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iWidth, iWidth, #4 5450c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #4 5460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, #4 5470c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Block4x4WidthLoop 5480c1bc742181ded4930842b46e9507372f0b1b963James Dong 5490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Height Loop 5500c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #4 5510c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iWidth, ptrWidth 5520c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pArgs, ppArgs 5530c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #2 5540c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep, LSL #2 5550c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, iWidth 5560c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, iWidth 5570c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Block4x4HeightLoop 5580c1bc742181ded4930842b46e9507372f0b1b963James Dong 5590c1bc742181ded4930842b46e9507372f0b1b963James DongEndOfInterpolation 5600c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV r0, #0 5610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 5620c1bc742181ded4930842b46e9507372f0b1b963James Dong 5630c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 5640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// End of CortexA8 5650c1bc742181ded4930842b46e9507372f0b1b963James Dong 5660c1bc742181ded4930842b46e9507372f0b1b963James Dong END 5670c1bc742181ded4930842b46e9507372f0b1b963James Dong 568