10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong 160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS CortexA8 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 190c1bc742181ded4930842b46e9507372f0b1b963James Dong 200c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON SETL {FALSE} 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r11 250c1bc742181ded4930842b46e9507372f0b1b963James Dong 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 270c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 280c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 290c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 300c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Neon registers 330c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff5 DN 30.S16 340c1bc742181ded4930842b46e9507372f0b1b963James DongdCoeff20 DN 31.S16 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James DongqSrcA01 QN 11.U8 370c1bc742181ded4930842b46e9507372f0b1b963James DongqSrcB01 QN 12.U8 380c1bc742181ded4930842b46e9507372f0b1b963James DongqSrcC01 QN 13.U8 390c1bc742181ded4930842b46e9507372f0b1b963James DongqSrcD01 QN 14.U8 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcA0 DN 22.U8 420c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcA1 DN 23.U8 430c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcB0 DN 24.U8 440c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcB1 DN 25.U8 450c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcC0 DN 26.U8 460c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcC1 DN 27.U8 470c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcD0 DN 28.U8 480c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcD1 DN 29.U8 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcb DN 12.U8 510c1bc742181ded4930842b46e9507372f0b1b963James DongdSrce DN 13.U8 520c1bc742181ded4930842b46e9507372f0b1b963James DongdSrcf DN 10.U8 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0c DN 14.U8 550c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1c DN 16.U8 560c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2c DN 18.U8 570c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3c DN 20.U8 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc0d DN 15.U8 600c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc1d DN 17.U8 610c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc2d DN 19.U8 620c1bc742181ded4930842b46e9507372f0b1b963James DongdSrc3d DN 21.U8 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp01 QN 4.S16 650c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp23 QN 6.S16 660c1bc742181ded4930842b46e9507372f0b1b963James DongdTemp0 DN 8.S16 670c1bc742181ded4930842b46e9507372f0b1b963James DongdTemp2 DN 12.S16 680c1bc742181ded4930842b46e9507372f0b1b963James Dong 690c1bc742181ded4930842b46e9507372f0b1b963James DongqRes01 QN 11.S16 700c1bc742181ded4930842b46e9507372f0b1b963James DongqRes23 QN 12.S16 710c1bc742181ded4930842b46e9507372f0b1b963James DongqRes45 QN 13.S16 720c1bc742181ded4930842b46e9507372f0b1b963James DongqRes67 QN 14.S16 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James DongdRes0 DN 22.S16 750c1bc742181ded4930842b46e9507372f0b1b963James DongdRes2 DN 24.S16 760c1bc742181ded4930842b46e9507372f0b1b963James DongdRes4 DN 26.S16 770c1bc742181ded4930842b46e9507372f0b1b963James DongdRes6 DN 28.S16 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc0 DN 22.U8 800c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc2 DN 24.U8 810c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc4 DN 26.U8 820c1bc742181ded4930842b46e9507372f0b1b963James DongdAcc6 DN 28.U8 830c1bc742181ded4930842b46e9507372f0b1b963James Dong 840c1bc742181ded4930842b46e9507372f0b1b963James DongdResult0 DN 22.U32 850c1bc742181ded4930842b46e9507372f0b1b963James DongdResult2 DN 24.U32 860c1bc742181ded4930842b46e9507372f0b1b963James DongdResult4 DN 26.U32 870c1bc742181ded4930842b46e9507372f0b1b963James DongdResult6 DN 28.U32 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qSrcA01, [pSrc], srcStep ;// Load A register [a0 a1 a2 a3 ..] 900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 910c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcf, dSrcA0, dSrcA1, #5 ;// [f0 f1 f2 f3 ..] 920c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcb, dSrcA0, dSrcA1, #1 ;// [b0 b1 b2 b3 ..] 930c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..] 940c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc0c, dSrcA0, dSrcA1, #2 950c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc0d, dSrcA0, dSrcA1, #3 960c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrce, dSrcA0, dSrcA1, #4 970c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qRes01, dSrcA0, dSrcf ;// Acc=a+f 980c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp01, dSrc0c, dSrc0d ;// c+d 990c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp23, dSrcb, dSrce ;// b+e 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..] 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..] 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLA dRes0, dTemp0, dCoeff20 ;// Acc += 20*(c+d) 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLS dRes0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcf, dSrcB0, dSrcB1, #5 ;// [f0 f1 f2 f3 ..] 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcb, dSrcB0, dSrcB1, #1 ;// [b0 b1 b2 b3 ..] 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc1c, dSrcB0, dSrcB1, #2 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc1d, dSrcB0, dSrcB1, #3 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrce, dSrcB0, dSrcB1, #4 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qRes23, dSrcB0, dSrcf ;// Acc=a+f 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dRes0, dRes0, dTemp0 ;// TeRi 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp01, dSrc1c, dSrc1d ;// c+d 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp23, dSrcb, dSrce ;// b+e 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..] 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong; VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..] 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLA dRes2, dTemp0, dCoeff20 ;// Acc += 20*(c+d) 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLS dRes2, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcf, dSrcC0, dSrcC1, #5 ;// [f0 f1 f2 f3 ..] 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcb, dSrcC0, dSrcC1, #1 ;// [b0 b1 b2 b3 ..] 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc2c, dSrcC0, dSrcC1, #2 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc2d, dSrcC0, dSrcC1, #3 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrce, dSrcC0, dSrcC1, #4 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qRes45, dSrcC0, dSrcf ;// Acc=a+f 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dRes2, dRes2, dTemp0 ;// TeRi 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp01, dSrc2c, dSrc2d ;// c+d 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp23, dSrcb, dSrce ;// b+e 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..] 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLA dRes4, dTemp0, dCoeff20 ;// Acc += 20*(c+d) 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong; VMLS dRes4, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong VMUL dTemp0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) TeRi 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcf, dSrcD0, dSrcD1, #5 ;// [f0 f1 f2 f3 ..] 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrcb, dSrcD0, dSrcD1, #1 ;// [b0 b1 b2 b3 ..] 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc3c, dSrcD0, dSrcD1, #2 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrc3d, dSrcD0, dSrcD1, #3 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong VEXT dSrce, dSrcD0, dSrcD1, #4 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qRes67, dSrcD0, dSrcf ;// Acc=a+f 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB dRes4, dRes4, dTemp0 ;// TeRi 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp01, dSrc3c, dSrc3d ;// c+d 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong VADDL qTemp23, dSrcb, dSrce ;// b+e 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLA dRes6, dTemp0, dCoeff20 ;// Acc += 20*(c+d) 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLS dRes6, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc0, qRes01, #5 ;// Acc = Sat ((Acc + 16) / 32) 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc2, qRes23, #5 ;// Acc = Sat ((Acc + 16) / 32) 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc4, qRes45, #5 ;// Acc = Sat ((Acc + 16) / 32) 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRUN dAcc6, qRes67, #5 ;// Acc = Sat ((Acc + 16) / 32) 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 229