10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 12290 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Wednesday, April 9, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong 160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 170c1bc742181ded4930842b46e9507372f0b1b963James Dong 180c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 190c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON SETL {FALSE} 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 240c1bc742181ded4930842b46e9507372f0b1b963James Dong 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 260c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 270c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 280c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 8 290c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight RN 9 300c1bc742181ded4930842b46e9507372f0b1b963James Dong 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 320c1bc742181ded4930842b46e9507372f0b1b963James Dongx RN 7 330c1bc742181ded4930842b46e9507372f0b1b963James Dongx0 RN 7 340c1bc742181ded4930842b46e9507372f0b1b963James Dongx1 RN 10 350c1bc742181ded4930842b46e9507372f0b1b963James Dongx2 RN 11 360c1bc742181ded4930842b46e9507372f0b1b963James DongScratch RN 12 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for horizontal interpolation. 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 9 bytes in horizontal direction. 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 590c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 600c1bc742181ded4930842b46e9507372f0b1b963James Dong 610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Copy pDst to scratch 620c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Scratch, pDst 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James DongStartAlignedStackCopy 650c1bc742181ded4930842b46e9507372f0b1b963James Dong AND x, pSrc, #3 660c1bc742181ded4930842b46e9507372f0b1b963James Dong BIC pSrc, pSrc, #3 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong M_SWITCH x 690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy0toAligned 700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy1toAligned 710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy2toAligned 720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy3toAligned 730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ENDSWITCH 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toAligned 760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 770c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 780c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 790c1bc742181ded4930842b46e9507372f0b1b963James Dong 800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 810c1bc742181ded4930842b46e9507372f0b1b963James Dong 820c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 830c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy0toAligned 840c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 850c1bc742181ded4930842b46e9507372f0b1b963James Dong 860c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toAligned 870c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 880c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 890c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #8 940c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #24 950c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #8 960c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #24 970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #8 980c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 990c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy1toAligned 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong 1020c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toAligned 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #16 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #16 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #16 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #16 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #16 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy2toAligned 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyEnd 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong 1180c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toAligned 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong LDM pSrc, {x0, x1, x2} 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, x0, LSR #24 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x0, x1, LSL #8 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSR #24 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x1, x1, x2, LSL #8 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x2, x2, LSR #24 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong STM pDst!, {x0, x1, x2} ;// Store aligned output row 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy3toAligned 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James DongCopyEnd 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pSrc, Scratch 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #12 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function: 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for vertical interpolation. 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 4 bytes in horizontal direction 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Copy pSrc to stack 1650c1bc742181ded4930842b46e9507372f0b1b963James DongStartVAlignedStackCopy 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong AND x, pSrc, #3 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong BIC pSrc, pSrc, #3 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong M_SWITCH x 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy0toVAligned 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy1toVAligned 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy2toVAligned 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_CASE Copy3toVAligned 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ENDSWITCH 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong 1770c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toVAligned 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy0toVAligned 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong 1870c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toVAligned 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #24 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #8 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy1toVAligned 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toVAligned 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #16 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #16 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy2toVAligned 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong B CopyVEnd 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toVAligned 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x1, [pSrc, #4] 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR x0, [pSrc], srcStep 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS iHeight, iHeight, #1 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// One cycle stall 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x1, x1, LSL #8 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR x0, x1, x0, LSR #24 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [pDst], #4 ;// Store aligned output row 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT Copy3toVAligned 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James DongCopyVEnd 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDst, #28 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #4 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong 237