10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 160c1bc742181ded4930842b46e9507372f0b1b963James Dong 170c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_pIndexTable8x8 180c1bc742181ded4930842b46e9507372f0b1b963James Dong 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Define the processor variants supported by this file 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong AREA table, DATA 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing. 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;//------------------------------------------------------- 280c1bc742181ded4930842b46e9507372f0b1b963James Dong 290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_TABLE armVCM4P10_pIndexTable8x8 300c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR 310c1bc742181ded4930842b46e9507372f0b1b963James Dong DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong M_TABLE armVCM4P10_MultiplierTableChroma8x8,1 340c1bc742181ded4930842b46e9507372f0b1b963James Dong DCW 3, 2, 1,4 350c1bc742181ded4930842b46e9507372f0b1b963James Dong DCW -3,-2,-1,0 360c1bc742181ded4930842b46e9507372f0b1b963James Dong DCW 1, 2, 3,4 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 390c1bc742181ded4930842b46e9507372f0b1b963James Dong 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE EQU 0x8 450c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0 EQU 0x01010101 460c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST EQU 0x00FF00FF 470c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1 EQU 0x80808080 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 520c1bc742181ded4930842b46e9507372f0b1b963James Dongy RN 12 530c1bc742181ded4930842b46e9507372f0b1b963James Dongpc RN 15 540c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn RN 0 550c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2 RN 1 560c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2 RN 2 570c1bc742181ded4930842b46e9507372f0b1b963James Dongsum1 RN 6 580c1bc742181ded4930842b46e9507372f0b1b963James Dongsum2 RN 7 590c1bc742181ded4930842b46e9507372f0b1b963James DongpTable RN 9 600c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2 RN 11 610c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2 RN 14 620c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount RN 14 630c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101 RN 10 640c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF RN 11 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0 RN 0 670c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1 RN 1 680c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2 RN 2 690c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3 RN 3 700c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4 RN 4 710c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5 RN 5 720c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6 RN 6 730c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7 RN 7 740c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8 RN 8 750c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9 RN 9 760c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10 RN 10 770c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11 RN 11 780c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12 RN 12 790c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14 RN 14 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James Dongb RN 14 820c1bc742181ded4930842b46e9507372f0b1b963James Dongc RN 12 830c1bc742181ded4930842b46e9507372f0b1b963James Dong 840c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0 RN 0 850c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1 RN 1 860c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4 RN 2 870c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5 RN 4 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dongpp2pp0 RN 6 900c1bc742181ded4930842b46e9507372f0b1b963James Dongpp3pp1 RN 7 910c1bc742181ded4930842b46e9507372f0b1b963James Dongpp6pp4 RN 8 920c1bc742181ded4930842b46e9507372f0b1b963James Dongpp7pp5 RN 9 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210 RN 10 950c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654 RN 10 960c1bc742181ded4930842b46e9507372f0b1b963James Dong 970c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Input Arguments 990c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------- 1000c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft RN 0 ;// input pointer 1010c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove RN 1 ;// input pointer 1020c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft RN 2 ;// input pointer 1030c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 3 ;// output pointer 1040c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep RN 4 ;// input variable 1050c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 5 ;// input variable 1060c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode RN 6 ;// input variable 1070c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability RN 7 ;// input variable 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntraChroma_8x8 starts 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Write function header 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START omxVCM4P10_PredictIntraChroma_8x8, r11 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Define stack arguments 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG LeftStep, 4 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG DstStep, 4 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG PredMode, 4 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG Availability, 4 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=4 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load argument from the stack 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR availability, Availability ;// Arg availability loaded from stack to reg 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y, #BLK_SIZE ;// Outer Loop Count 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong 1360c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_DC 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong AND availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT) 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT)) 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x01010101, =MUL_CONST0 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE TST_UPPER ;// Jump to Upper if not both 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal8, tVal9 ;// tVal14 = tVal8 + tVal9 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal14, tVal4, tVal12 ;// tVal14 = tVal4 + tVal12 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal4, [pSrcLeft] ;// tVal4 = pSrcLeft[6] 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal12,[pSrcLeft2] ;// tVal12= pSrcLeft[7] 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, tVal14 ;// leftsum1 = sum(pSrcLeft[0] to pSrcLeft[3]) 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal14, tVal8, tVal4 ;// leftsum2 = sum(pSrcLeft[4] to pSrcLeft[7]) 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal14, #2 ;// tVal8 = leftsum2 + 2 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal9, sum2, #2 ;// tVal8 = upsum2 + 2 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, tVal2 ;// sum1 = upsum1 + leftsum1 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, tVal14 ;// sum2 = upsum2 + leftsum2 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, #4 ;// (sum1 + 4) 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, #4 ;// (sum2 + 4) 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum1, sum1, LSR #3 ;// (sum1 + 4)>>3 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal9, tVal9, LSR #2 ;// (tVal9 + 2)>>2 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal8, tVal8, LSR #2 ;// (tVal8 + 2)>>2 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum2, sum2, LSR #3 ;// (sum2 + 4)>>3 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal0, sum1, r0x01010101 ;// replicate the val in all the bytes 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal1, tVal9,r0x01010101 ;// replicate the val in all the bytes 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, tVal8,r0x01010101 ;// replicate the val in all the bytes 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal9, sum2, r0x01010101 ;// replicate the val in all the bytes 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[0 to 7] = tVal 0 to 1 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[8 to 15] = tVal 0 to 1 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[16 to 23] = tVal 0 to 1 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[24 to 31] = tVal 0 to 1 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James DongTST_UPPER 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE TST_LEFT ;// Jump to Left if not upper 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, #2 ;// sum1 + 2 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, #2 ;// sum2 + 2 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL sum1, sum1,r0x01010101 ;// replicate the val in all the bytes 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL sum2, sum2,r0x01010101 ;// replicate the val in all the bytes 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong 2490c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP availability, #OMX_VC_LEFT 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE TST_COUNT0 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal8, tVal9 ;// tVal6 = tVal8 + tVal9 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal4, tVal12 ;// tVal7 = tVal4 + tVal12 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[6] 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[7] 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, tVal6, tVal7 ;// sum1 = sum(pSrcLeft[0] to pSrcLeft[3]) 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, tVal8, tVal4 ;// sum2 = sum(pSrcLeft[4] to pSrcLeft[7]) 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum1, sum1, #2 ;// sum1 + 2 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sum2, sum2, #2 ;// sum2 + 2 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal6, sum1,r0x01010101 ;// replicate the val in all the bytes 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, sum2,r0x01010101 ;// replicate the val in all the bytes 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal7,tVal6 ;// tVal7 = sum1 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal9,tVal8 ;// tVal9 = sum2 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT ;// Macro to exit midway-break frm case 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong 3010c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR sum1, =MUL_CONST1 ;// sum1 = 0x80808080 if(count == 0) 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal7, sum1 ;// tVal7 = sum1 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT ;// Macro to exit midway-break frm case 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong 3200c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_HOR 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=2 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD leftStepx2, leftStep, leftStep ;// leftStepx2 = leftStep * 2 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst2, pDst, dstStep ;// pDst2 = pDst + dstStep 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD dstStepx2, dstStep, dstStep ;// double dstStep 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStepx2, dstStepx2, #4 ;// double dstStep minus 4 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[0] 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[1] 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[2] 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[3] 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[4] 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[5] 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[6] 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[7] 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong 3650c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_VERT 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=4 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMIA pSrcAbove, {tVal6,tVal7} ;// tVal 6 to 7 = pSrcAbove[0 to 7] 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_EXIT ;// Macro to exit midway-break frm case 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong 3830c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_PLANE 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=3 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB tVal14, leftStep, leftStep, LSL #3 ;// 7*leftStep 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal7, [pSrcAbove, #+7] ;// pSrcAbove[7] 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal6, [pSrcLeft, +tVal14] ;// pSrcLeft[7*leftStep] 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal8, [pSrcAboveLeft] ;// pSrcAboveLeft[0] 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal9, [pSrcAbove, #+6 ] ;// pSrcAbove[6] 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal10,[pSrcAbove] ;// pSrcAbove[0] 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal7, tVal6 ;// pSrcAbove[7] + pSrcLeft[7*leftStep] 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal6, tVal6, tVal8 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0] 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal7, tVal8 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0] 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL tVal2, tVal2, #4 ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS]) 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal2, #16 ;// a + 16 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal9, tVal9,tVal10 ;// pSrcAbove[6] - pSrcAbove[0] 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal8, [pSrcAbove,#+5] ;// pSrcAbove[5] 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal10,[pSrcAbove,#+1] ;// pSrcAbove[1] 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal9, tVal9, tVal9, LSL #1 ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0]) 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal9, tVal7, LSL #2 ;// H = H1 + H0 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal8, tVal10 ;// pSrcAbove[5] - pSrcAbove[1] 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal9, [pSrcAbove,#+4] ;// pSrcAbove[4] 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tVal10,[pSrcAbove,#+2] ;// pSrcAbove[2] 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal8, LSL #1 ;// H = H + H2 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal11, tVal14,leftStep ;// 6*leftStep 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal11, pSrcLeft, tVal11 ;// pSrcLeft + 6*leftStep 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tVal12, pSrcLeft ;// pSrcLeft 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal9, tVal9, tVal10 ;// pSrcAbove[4] - pSrcAbove[2] 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal9 ;// H = H + H3 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[6*leftStep] 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[0] 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, tVal7, LSL #4 ;// 17 * H 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, #16 ;// 17 * H + 16 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal8, tVal8, tVal10 ;// pSrcLeft[6*leftStep] - pSrcLeft[0] 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR b, tVal7, #5 ;// b = (17 * H + 16) >> 5 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal8, tVal8, tVal8, LSL #1 ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0]) 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal8, tVal6, LSL #2 ;// V = V0 +V1 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[5*leftStep] 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[leftStep] 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, b, b, LSL #1 ;// 3*b 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal2, tVal2, tVal7 ;// a + 16 - 3*b 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep] 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[4*leftStep] 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[2*leftStep] 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7, LSL #1 ;// V = V + V2 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal7, tVal8, tVal10 ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep] 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal7 ;// V = V + V7 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB dstStep, dstStep, #4 ;// dstStep - 4 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, tVal6, LSL #4 ;// 17*V 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, tVal6, #16 ;// 17*V + 16 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong ASR c, tVal6, #5 ;// c = (17*V + 16)>>5 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// M_STALL ARM1136JS=1 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal6, c, c, LSL #1 ;// 3*c 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH c, c ;// only in half word 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tVal6, tVal2, tVal6 ;// a - 3*b - 3*c + 16 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR c, c, c, LSL #16 ;// c c 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, b, b ;// 2b 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal2, tVal6, tVal7 ;// pp2 = d + 2*b 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tVal7, tVal7, b ;// 3b 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR p2p0, tVal6, tVal2, LSL #16 ;// p2p0 = pack {p2, p0} 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH b, b 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTH tVal7, tVal7 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR b, b, b, LSL #16 ;// {b,b} 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tVal7, tVal7, tVal7, LSL #16 ;// {3b,3b} 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p3p1, p2p0, b ;// p3p1 = p2p0 + {b,b} 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p6p4, p3p1, tVal7 ;// p6p4 = p3p1 + {3b,3b} 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p7p5, p6p4, b ;// p7p5 = p6p4 + {b,b} 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV outerCount, #BLK_SIZE ;// Outer Loop Count 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong 4580c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 p7p5, #13, p7p5 ;// clip13(p7) clip13(p5) 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 p6p4, #13, p6p4 ;// clip13(p6) clip13(p4) 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 p3p1, #13, p3p1 ;// clip13(p3) clip13(p1) 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 p2p0, #13, p2p0 ;// clip13(p2) clip13(p0) 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong AND pp7pp5, r0x00FF00FF, p7p5, ASR #5 ;// clip8(p7) clip8(p5) 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong AND pp6pp4, r0x00FF00FF, p6p4, ASR #5 ;// clip8(p6) clip8(p4) 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong AND pp3pp1, r0x00FF00FF, p3p1, ASR #5 ;// clip8(p3) clip8(p1) 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong AND pp2pp0, r0x00FF00FF, p2p0, ASR #5 ;// clip8(p2) clip8(p0) 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS outerCount, outerCount, #1 ;// outerCount-- 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR p3210, pp2pp0, pp3pp1, LSL #8 ;// pack {p3,p2, p1, p0} 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong STR p3210, [pDst], #4 ;// store {pDst[0] to pDst[3]} 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR p7654, pp6pp4, pp7pp5, LSL #8 ;// pack {p7,p6, p5, p4} 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR p7654, [pDst], dstStep ;// store {pDst[4] to pDst[7]} 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p7p5, p7p5, c ;// {p7 + c}, {p5 + c} 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p6p4, p6p4, c ;// {p6 + c}, {p4 + c} 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p3p1, p3p1, c ;// {p3 + c}, {p1 + c} 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 p2p0, p2p0, c ;// {p2 + c}, {p0 + c} 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong BNE LOOP_PLANE ;// Loop for 8 times 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV return, #OMX_Sts_NoErr 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// ARM1136JS 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong END 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntraChroma_8x8 ends 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong;//----------------------------------------------------------------------------------------------- 495