armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong 160c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 190c1bc742181ded4930842b46e9507372f0b1b963James Dong 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong 250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 ppDstArgs, 8 260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pTempResult1, 8 270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 pTempResult2, 8 280c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppSrc, 4 290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppDst, 4 300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pDstStep, 4 310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pSrcStep, 4 320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pCounter, 4 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function: 360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Implements diagonal interpolation for a block of size 4x4. Input and output should 390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// be aligned. 400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers used as input for this function 420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer 430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 440c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers preserved for top level function 450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3,r4,r5,r6,r14 460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers modified by the function 480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r7,r8,r9,r10,r11,r12 490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output registers 510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// None. Function will preserve r0-r3 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 560c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 570c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 580c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 590c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 600c1bc742181ded4930842b46e9507372f0b1b963James Dong 610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 620c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0 RN 4 630c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1 RN 5 640c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2 RN 6 650c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3 RN 7 660c1bc742181ded4930842b46e9507372f0b1b963James Dong 670c1bc742181ded4930842b46e9507372f0b1b963James DongValA RN 4 680c1bc742181ded4930842b46e9507372f0b1b963James DongValB RN 5 690c1bc742181ded4930842b46e9507372f0b1b963James DongValC RN 6 700c1bc742181ded4930842b46e9507372f0b1b963James DongValD RN 7 710c1bc742181ded4930842b46e9507372f0b1b963James DongValE RN 8 720c1bc742181ded4930842b46e9507372f0b1b963James DongValF RN 9 730c1bc742181ded4930842b46e9507372f0b1b963James DongValG RN 12 740c1bc742181ded4930842b46e9507372f0b1b963James DongValH RN 14 750c1bc742181ded4930842b46e9507372f0b1b963James DongValI RN 1 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1 RN 3 780c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2 RN 1 790c1bc742181ded4930842b46e9507372f0b1b963James DongTemp3 RN 12 800c1bc742181ded4930842b46e9507372f0b1b963James DongTemp4 RN 7 810c1bc742181ded4930842b46e9507372f0b1b963James DongTemp5 RN 5 820c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 830c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset 840c1bc742181ded4930842b46e9507372f0b1b963James DongCounter RN 11 850c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf RN 8 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James DongValCA RN 8 880c1bc742181ded4930842b46e9507372f0b1b963James DongValDB RN 9 890c1bc742181ded4930842b46e9507372f0b1b963James DongValGE RN 10 900c1bc742181ded4930842b46e9507372f0b1b963James DongValHF RN 11 910c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001 RN 12 920c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb RN 14 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00 RN 11 950c1bc742181ded4930842b46e9507372f0b1b963James Dong 960c1bc742181ded4930842b46e9507372f0b1b963James DongAccx RN 8 970c1bc742181ded4930842b46e9507372f0b1b963James DongAccy RN 9 980c1bc742181ded4930842b46e9507372f0b1b963James DongTemp6 RN 14 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD pDst, dstStep, ppDstArgs 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDst, pInterBuf 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #16 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #4 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR dstStep, pDstStep 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR srcStep, pSrcStep 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong 1110c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop 1120c1bc742181ded4930842b46e9507372f0b1b963James DongNextTwoRowsLoop 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc0 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValC, ValD, ROR #8 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp3, ValE, ValB, ROR #8 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp3, Temp1, LSL #2 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc0, ValA, ValF, ROR #8 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc0, Acc0, Temp1 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc1 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValE, ValD, ROR #8 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp3, ValC, ValF, ROR #8 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp3, Temp1, LSL #2 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc1, ValG, ValB, ROR #8 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc1, Acc1, Temp1 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc2, ValC, ValH, ROR #8 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc2 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValG, ValD, ROR #8 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Acc3, ValI, ValD, ROR #8 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp2, ValE, ValF, ROR #8 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp1, Temp2, LSL #2 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp2, ValG, ValF, ROR #8 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc2, Acc2, Temp1 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate Acc3 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 Temp1, ValE, ValH, ROR #8 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB Temp1, Temp1, Temp2, LSL #2 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Temp1, Temp1, Temp1, LSL #2 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc3, Acc3, Temp1 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR dstStep, pDstStep 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR srcStep, pSrcStep 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// If Counter is even store Acc0-Acc3 in a temporary buffer 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ANDS Temp3, Counter, #1 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ NoProcessing 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Packing previous and current Acc0-Acc3 values 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD Accx, Accy, pTempResult1 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0] 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2] 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc0, [pDst, dstStep] 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0] 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2] 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD Accx, Accy, pTempResult2 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc1, [pDst, dstStep] 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0] 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2] 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc2, [pDst, dstStep] 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #4 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0] 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2] 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc3, [pDst, dstStep] 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Temp6, [pDst], #-12 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, dstStep, LSL #1 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong B AfterStore 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James DongNoProcessing 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD Acc0, Acc1, pTempResult1 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD Acc2, Acc3, pTempResult2 2030c1bc742181ded4930842b46e9507372f0b1b963James DongAfterStore 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 ;// Loop till height is 10 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong BPL HeightLoop 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc0, [pDst], #4 ;//[0 a1 0 a0] 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc1, [pDst], #4 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc2, [pDst], #4 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc3, [pDst], #-12 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Horizontal interpolation using multiplication 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDst, dstStep, LSL #2 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #16 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD pDst, dstStep, ppDstArgs 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #4 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0014fffb, =0x0014fffb 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00140001, =0x00140001 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Counter, pCounter 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a] 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b] 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e] 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f] 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc0, Acc0, r0x0001fc00 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc1, Acc1, r0x0001fc00 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc2, Acc2, r0x0001fc00 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc3, Acc3, r0x0001fc00 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc0, #18, Acc0 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc1, #18, Acc1 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc2, #18, Acc2 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc3, #18, Acc3 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc0, Acc0, LSR #10 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc0, [pDst], dstStep 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc1, Acc1, LSR #10 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc1, [pDst], dstStep 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc2, Acc2, LSR #10 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc2, [pDst], dstStep 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc3, Acc3, LSR #10 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRB Acc3, [pDst], dstStep 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR Counter, pCounter 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, dstStep, LSL #2 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, #1 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT HeightLoop1 2870c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, #4 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #16 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 297