10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_PredictIntra_16x16_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing. 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 360c1bc742181ded4930842b46e9507372f0b1b963James Dong 370c1bc742181ded4930842b46e9507372f0b1b963James Dong M_TABLE armVCM4P10_pIndexTable16x16 380c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 390c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 420c1bc742181ded4930842b46e9507372f0b1b963James Dong 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 460c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE EQU 0x10 470c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0 EQU 0x01010101 480c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1 EQU 0x00060004 490c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST2 EQU 0x00070005 500c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST3 EQU 0x00030001 510c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST EQU 0x00FF00FF 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 560c1bc742181ded4930842b46e9507372f0b1b963James Dongy RN 12 570c1bc742181ded4930842b46e9507372f0b1b963James Dongpc RN 15 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn RN 0 600c1bc742181ded4930842b46e9507372f0b1b963James DonginnerCount RN 0 610c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount RN 1 620c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2 RN 1 630c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2 RN 2 640c1bc742181ded4930842b46e9507372f0b1b963James Dongsum RN 6 650c1bc742181ded4930842b46e9507372f0b1b963James DongpTable RN 9 660c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1 RN 10 670c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2 RN 12 680c1bc742181ded4930842b46e9507372f0b1b963James DongcMul1 RN 11 690c1bc742181ded4930842b46e9507372f0b1b963James DongcMul2 RN 12 700c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 12 710c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2 RN 11 720c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2 RN 14 730c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101 RN 10 740c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF RN 11 750c1bc742181ded4930842b46e9507372f0b1b963James Dong 760c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0 RN 0 770c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1 RN 1 780c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2 RN 2 790c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3 RN 3 800c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4 RN 4 810c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5 RN 5 820c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6 RN 6 830c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7 RN 7 840c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8 RN 8 850c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9 RN 9 860c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10 RN 10 870c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11 RN 11 880c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12 RN 12 890c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14 RN 14 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dongb RN 12 920c1bc742181ded4930842b46e9507372f0b1b963James Dongc RN 14 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0 RN 0 950c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1 RN 1 960c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4 RN 2 970c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5 RN 4 980c1bc742181ded4930842b46e9507372f0b1b963James Dongp10p8 RN 6 990c1bc742181ded4930842b46e9507372f0b1b963James Dongp11p9 RN 7 1000c1bc742181ded4930842b46e9507372f0b1b963James Dongp14p12 RN 8 1010c1bc742181ded4930842b46e9507372f0b1b963James Dongp15p13 RN 9 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong 1030c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210 RN 10 1040c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654 RN 10 1050c1bc742181ded4930842b46e9507372f0b1b963James Dongp111098 RN 10 1060c1bc742181ded4930842b46e9507372f0b1b963James Dongp15141312 RN 10 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 1110c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft RN 0 ;// input pointer 1120c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove RN 1 ;// input pointer 1130c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft RN 2 ;// input pointer 1140c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 ;// output pointer 1150c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep RN 4 ;// input variable 1160c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 5 ;// input variable 1170c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode RN 6 ;// input variable 1180c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability RN 7 ;// input variable 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_16x16 starts 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_PredictIntra_16x16, r11 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG LeftStep, 4 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG DstStep, 4 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG PredMode, 4 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG Availability, 4 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=4 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load argument from the stack 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR availability, Availability ;// Arg availability loaded from stack to reg 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #BLK_SIZE ;// Outer Loop Count 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_VERT 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 ;// Stall outside the loop 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong 1530c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_VERT 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #2 ;// y-- 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_VERT ;// Loop for 8 times 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_HOR 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=6 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #4 ;// Outer Loop Count 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong 1760c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_HOR 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #1 ;// y-- 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_HOR ;// Loop for 3 times 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_DC 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV count, #0 ;// count = 0 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_LEFT ;// Jump to Left if not upper 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, #1 ;// if upper inc count by 1 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum, tVal2 ;// Extract the lower half for result 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong TST availability, #OMX_VC_LEFT 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_COUNT 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, #1 ;// Inc Counter if Left is available 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong 2880c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP count, #0 ;// if(count == 0) 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong MOVEQ sum, #128 ;// sum = 128 if(count == 0) 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_COUNT0 ;// if(count == 0) 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP count, #1 ;// if(count == 1) 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum, sum ;// sum only byte rest cleared 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong 3090c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal7, tVal6 ;// tVal1 = tVal0 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal8, tVal6 ;// tVal2 = tVal0 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal9, tVal6 ;// tVal3 = tVal0 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #BLK_SIZE ;// Outer Loop Count 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong 3250c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_DC 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #2 ;// y-- 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_DC ;// Loop for 8 times 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong 3360c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_PLANE 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal12, [pSrcAbove, #15] 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #0] 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #14] 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #1] 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #13] 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #2] 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #12] 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #3] 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #11] 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #4] 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #10] 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #5] 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #9] 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #6] 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #8] 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal11, tVal8 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong 4630c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p3p1 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p2p0 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p3p1, p3p1, c 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p2p0, p2p0, c 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p7p5 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p6p4 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p7p5, p7p5, c 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p6p4, p6p4, c 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p11p9 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p10p8 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p11p9, p11p9, c 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p10p8, p10p8, c 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p15p13 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p14p12 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p15p13, p15p13, c 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p14p12, p14p12, c 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits 5020c1bc742181ded4930842b46e9507372f0b1b963James Dong 5030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep 5040c1bc742181ded4930842b46e9507372f0b1b963James Dong 5050c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC LOOP_PLANE ;// Loop for 16 times 5060c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 5070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 5080c1bc742181ded4930842b46e9507372f0b1b963James Dong 5090c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// ARM1136JS 5100c1bc742181ded4930842b46e9507372f0b1b963James Dong 5110c1bc742181ded4930842b46e9507372f0b1b963James Dong 5120c1bc742181ded4930842b46e9507372f0b1b963James Dong END 5130c1bc742181ded4930842b46e9507372f0b1b963James Dong;----------------------------------------------------------------------------------------------- 5140c1bc742181ded4930842b46e9507372f0b1b963James Dong; omxVCM4P10_PredictIntra_16x16 ends 5150c1bc742181ded4930842b46e9507372f0b1b963James Dong;----------------------------------------------------------------------------------------------- 516