armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong 160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 190c1bc742181ded4930842b46e9507372f0b1b963James Dong 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 250c1bc742181ded4930842b46e9507372f0b1b963James Dong 260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function: 270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Implements vertical interpolation for a block of size 4x4. Input and output should 300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// be aligned. 310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers used as input for this function 330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size 340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers preserved for top level function 360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3,r4,r5,r6,r14 370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers modified by the function 390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r7,r8,r9,r10,r11,r12 400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output registers 420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// None. Function will preserve r0-r3 430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 460c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 470c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 480c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 490c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 500c1bc742181ded4930842b46e9507372f0b1b963James Dong 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 520c1bc742181ded4930842b46e9507372f0b1b963James DongValA RN 5 530c1bc742181ded4930842b46e9507372f0b1b963James DongValA0 RN 4 540c1bc742181ded4930842b46e9507372f0b1b963James DongValA1 RN 5 550c1bc742181ded4930842b46e9507372f0b1b963James DongValAF0 RN 4 560c1bc742181ded4930842b46e9507372f0b1b963James DongValAF1 RN 5 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James DongValB RN 11 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James DongValC RN 5 610c1bc742181ded4930842b46e9507372f0b1b963James DongValC0 RN 4 620c1bc742181ded4930842b46e9507372f0b1b963James DongValC1 RN 5 630c1bc742181ded4930842b46e9507372f0b1b963James DongValCD0 RN 12 640c1bc742181ded4930842b46e9507372f0b1b963James DongValCD1 RN 14 650c1bc742181ded4930842b46e9507372f0b1b963James DongValCF0 RN 4 660c1bc742181ded4930842b46e9507372f0b1b963James DongValCF1 RN 5 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James DongValD RN 10 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James DongValE RN 7 710c1bc742181ded4930842b46e9507372f0b1b963James DongValE0 RN 6 720c1bc742181ded4930842b46e9507372f0b1b963James DongValE1 RN 7 730c1bc742181ded4930842b46e9507372f0b1b963James DongValEB0 RN 10 740c1bc742181ded4930842b46e9507372f0b1b963James DongValEB1 RN 11 750c1bc742181ded4930842b46e9507372f0b1b963James DongValED0 RN 6 760c1bc742181ded4930842b46e9507372f0b1b963James DongValED1 RN 7 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James DongValF RN 10 790c1bc742181ded4930842b46e9507372f0b1b963James Dong 800c1bc742181ded4930842b46e9507372f0b1b963James DongValG RN 14 810c1bc742181ded4930842b46e9507372f0b1b963James DongValG0 RN 12 820c1bc742181ded4930842b46e9507372f0b1b963James DongValG1 RN 14 830c1bc742181ded4930842b46e9507372f0b1b963James DongValGB0 RN 12 840c1bc742181ded4930842b46e9507372f0b1b963James DongValGB1 RN 14 850c1bc742181ded4930842b46e9507372f0b1b963James Dong 860c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0 RN 4 870c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1 RN 5 880c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2 RN 6 890c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3 RN 7 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 7 920c1bc742181ded4930842b46e9507372f0b1b963James DongHeight RN 3 930c1bc742181ded4930842b46e9507372f0b1b963James DongStep RN 6 940c1bc742181ded4930842b46e9507372f0b1b963James Dong 950c1bc742181ded4930842b46e9507372f0b1b963James DongCounter RN 8 960c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset 970c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #2 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong 1030c1bc742181ded4930842b46e9507372f0b1b963James DongTwoRowsLoop 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValCD1, ValEB1, ValCD1, LSL #2 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValCD1, ValCD1, ValCD1, LSL #2 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValED1, ValCF1, ValED1, LSL #2 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB ValA, pSrc, srcStep, LSL #1 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValED1, ValED1, ValED1, LSL #2 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0fe00fe0, =0x0fe00fe0 ;// [0 255 0 255] 255 is offset to avoid negative results 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc1, ValCD1, ValAF1 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB16 Acc1, Acc1, r0x0fe00fe0 ;// Acc1 -= (16*Off - 16) 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB16 Acc0, Acc0, r0x0fe00fe0 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc3, ValED1, ValGB1 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB16 Acc3, Acc3, r0x0fe00fe0 ;// Acc3 -= (16*Off - 16) 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB16 Acc2, Acc2, r0x0fe00fe0 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 Acc1, #13, Acc1 ;// Saturate to 8+5 = 13 bits 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 Acc0, #13, Acc0 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 Acc3, #13, Acc3 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 Acc2, #13, Acc2 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong AND Acc1, r0x00ff00ff, Acc1, LSR #5 ;// [0 a3 0 a1] 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong AND Acc0, r0x00ff00ff, Acc0, LSR #5 ;// [0 a2 0 a0] 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR Acc0, Acc0, Acc1, LSL #8 ;// [a3 a2 a1 a0] 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong AND Acc3, r0x00ff00ff, Acc3, LSR #5 ;// [0 b3 0 b1] 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong AND Acc2, r0x00ff00ff, Acc2, LSR #5 ;// [0 b2 0 b0] 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR Acc2, Acc2, Acc3, LSL #8 ;// [b3 b2 b1 b0] 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT TwoRowsLoop 1760c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, dstStep, LSL #2 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong