10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_PredictIntra_16x16_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong 160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing. 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong M_TABLE armVCM4P10_pIndexTable16x16 240c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 250c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 280c1bc742181ded4930842b46e9507372f0b1b963James Dong 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 320c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE EQU 0x10 330c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0 EQU 0x01010101 340c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1 EQU 0x00060004 350c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST2 EQU 0x00070005 360c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST3 EQU 0x00030001 370c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST EQU 0x00FF00FF 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 420c1bc742181ded4930842b46e9507372f0b1b963James Dongy RN 12 430c1bc742181ded4930842b46e9507372f0b1b963James Dongpc RN 15 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn RN 0 460c1bc742181ded4930842b46e9507372f0b1b963James DonginnerCount RN 0 470c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount RN 1 480c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2 RN 1 490c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2 RN 2 500c1bc742181ded4930842b46e9507372f0b1b963James Dongsum RN 6 510c1bc742181ded4930842b46e9507372f0b1b963James DongpTable RN 9 520c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1 RN 10 530c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2 RN 12 540c1bc742181ded4930842b46e9507372f0b1b963James DongcMul1 RN 11 550c1bc742181ded4930842b46e9507372f0b1b963James DongcMul2 RN 12 560c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 12 570c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2 RN 11 580c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2 RN 14 590c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101 RN 10 600c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF RN 11 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0 RN 0 630c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1 RN 1 640c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2 RN 2 650c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3 RN 3 660c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4 RN 4 670c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5 RN 5 680c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6 RN 6 690c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7 RN 7 700c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8 RN 8 710c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9 RN 9 720c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10 RN 10 730c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11 RN 11 740c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12 RN 12 750c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14 RN 14 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dongb RN 12 780c1bc742181ded4930842b46e9507372f0b1b963James Dongc RN 14 790c1bc742181ded4930842b46e9507372f0b1b963James Dong 800c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0 RN 0 810c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1 RN 1 820c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4 RN 2 830c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5 RN 4 840c1bc742181ded4930842b46e9507372f0b1b963James Dongp10p8 RN 6 850c1bc742181ded4930842b46e9507372f0b1b963James Dongp11p9 RN 7 860c1bc742181ded4930842b46e9507372f0b1b963James Dongp14p12 RN 8 870c1bc742181ded4930842b46e9507372f0b1b963James Dongp15p13 RN 9 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210 RN 10 900c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654 RN 10 910c1bc742181ded4930842b46e9507372f0b1b963James Dongp111098 RN 10 920c1bc742181ded4930842b46e9507372f0b1b963James Dongp15141312 RN 10 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 960c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 970c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft RN 0 ;// input pointer 980c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove RN 1 ;// input pointer 990c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft RN 2 ;// input pointer 1000c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 ;// output pointer 1010c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep RN 4 ;// input variable 1020c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 5 ;// input variable 1030c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode RN 6 ;// input variable 1040c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability RN 7 ;// input variable 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_16x16 starts 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_PredictIntra_16x16, r11 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG LeftStep, 4 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG DstStep, 4 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG PredMode, 4 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG Availability, 4 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=4 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load argument from the stack 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR availability, Availability ;// Arg availability loaded from stack to reg 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #BLK_SIZE ;// Outer Loop Count 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong 1320c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_VERT 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 ;// Stall outside the loop 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong 1390c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_VERT 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #2 ;// y-- 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_VERT ;// Loop for 8 times 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong 1500c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_HOR 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=6 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #4 ;// Outer Loop Count 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_HOR 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #1 ;// y-- 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_HOR ;// Loop for 3 times 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_DC 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV count, #0 ;// count = 0 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_LEFT ;// Jump to Left if not upper 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, #1 ;// if upper inc count by 1 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum, tVal2 ;// Extract the lower half for result 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong 2300c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong TST availability, #OMX_VC_LEFT 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_COUNT 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, #1 ;// Inc Counter if Left is available 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum, sum, tVal7 ;// sum = sum + tVal7 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong 2740c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP count, #0 ;// if(count == 0) 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong MOVEQ sum, #128 ;// sum = 128 if(count == 0) 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ TST_COUNT0 ;// if(count == 0) 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP count, #1 ;// if(count == 1) 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum, sum ;// sum only byte rest cleared 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong 2950c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal7, tVal6 ;// tVal1 = tVal0 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal8, tVal6 ;// tVal2 = tVal0 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong CPY tVal9, tVal6 ;// tVal3 = tVal0 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #BLK_SIZE ;// Outer Loop Count 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_DC 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS y, y, #2 ;// y-- 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_DC ;// Loop for 8 times 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong 3220c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_PLANE 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal12, [pSrcAbove, #15] 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #0] 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #14] 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #1] 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #13] 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #2] 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #12] 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #3] 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #11] 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #4] 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #10] 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #5] 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #9] 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcAbove, #6] 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #8] 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal11, tVal8 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong 4490c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p3p1 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p2p0 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p3p1, p3p1, c 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p2p0, p2p0, c 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p7p5 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p6p4 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p7p5, p7p5, c 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p6p4, p6p4, c 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p11p9 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p10p8 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p11p9, p11p9, c 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p10p8, p10p8, c 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp2, #13, p15p13 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 temp1, #13, p14p12 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p15p13, p15p13, c 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p14p12, p14p12, c 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp2, r0x00FF00FF, temp2, ASR #5 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong AND temp1, r0x00FF00FF, temp1, ASR #5 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR temp1, temp1, temp2, LSL #8 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong STR temp1, [pDst], #4 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC LOOP_PLANE ;// Loop for 16 times 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// ARM1136JS 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong END 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong;----------------------------------------------------------------------------------------------- 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong; omxVCM4P10_PredictIntra_16x16 ends 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong;----------------------------------------------------------------------------------------------- 502