10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong 140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Functions: 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements Average of 4x4 with equation c = (a+b+1)>>1. 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// First operand will be at offset ALIGNMENT from aligned address 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Second operand will be at aligned location and will be used as output. 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for vertical interpolation. 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 4 bytes in horizontal direction 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r4,r5,r6,r8,r9,r14 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r10,r11,r12 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2 - pointer to the aligned location 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r3 - step size to this aligned location 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 370c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_Average_4x4_Align0_unsafe 420c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_Average_4x4_Align2_unsafe 430c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_Average_4x4_Align3_unsafe 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON SETL {FALSE} 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 480c1bc742181ded4930842b46e9507372f0b1b963James DongpPred0 RN 0 490c1bc742181ded4930842b46e9507372f0b1b963James DongiPredStep0 RN 1 500c1bc742181ded4930842b46e9507372f0b1b963James DongpPred1 RN 2 510c1bc742181ded4930842b46e9507372f0b1b963James DongiPredStep1 RN 3 520c1bc742181ded4930842b46e9507372f0b1b963James DongpDstPred RN 2 530c1bc742181ded4930842b46e9507372f0b1b963James DongiDstStep RN 3 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers 560c1bc742181ded4930842b46e9507372f0b1b963James DongiPredA0 RN 10 570c1bc742181ded4930842b46e9507372f0b1b963James DongiPredA1 RN 11 580c1bc742181ded4930842b46e9507372f0b1b963James DongiPredB0 RN 12 590c1bc742181ded4930842b46e9507372f0b1b963James DongiPredB1 RN 14 600c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1 RN 4 610c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2 RN 5 620c1bc742181ded4930842b46e9507372f0b1b963James DongResultA RN 5 630c1bc742181ded4930842b46e9507372f0b1b963James DongResultB RN 4 640c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x80808080 RN 7 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This function calculates average of 4x4 block 690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 720c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Code start 750c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x80808080, =0x80808080 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1st load 780c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB0, [pPred1] 790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 800c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB1, [pPred1, iPredStep1] 810c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 820c1bc742181ded4930842b46e9507372f0b1b963James Dong 830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 840c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 850c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 860c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 870c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 880c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 900c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2nd load 940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 950c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB0, [pPred1] 960c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB1, [pPred1, iPredStep1] 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 1070c1bc742181ded4930842b46e9507372f0b1b963James DongEnd0 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This function calculates average of 4x4 block 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Code start 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x80808080, =0x80808080 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1st load 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp1, [pPred0, #4] 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB0, [pPred1] 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredB1, [pPred1, iPredStep1] 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR Temp2, [pPred0, #4] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA0, iPredA0, LSR #16 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA0, iPredA0, Temp1, LSL #16 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA1, iPredA1, LSR #16 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA1, iPredA1, Temp2, LSL #16 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2nd load 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp1, [pPred0, #4] 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB0, [pPred1] 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB1, [pPred1, iPredStep1] 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp2, [pPred0, #4] 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA0, iPredA0, LSR #16 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA0, iPredA0, Temp1, LSL #16 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA1, iPredA1, LSR #16 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA1, iPredA1, Temp2, LSL #16 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 1610c1bc742181ded4930842b46e9507372f0b1b963James DongEnd2 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This function calculates average of 4x4 block 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Code start 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x80808080, =0x80808080 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1st load 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp1, [pPred0, #4] 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB0, [pPred1] 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB1, [pPred1, iPredStep1] 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp2, [pPred0, #4] 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA0, iPredA0, LSR #24 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA0, iPredA0, Temp1, LSL #8 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA1, iPredA1, LSR #24 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA1, iPredA1, Temp2, LSL #8 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2nd load 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp1, [pPred0, #4] 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA0, [pPred0], iPredStep0 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB0, [pPred1] 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR iPredB1, [pPred1, iPredStep1] 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR Temp2, [pPred0, #4] 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR iPredA1, [pPred0], iPredStep0 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB0, iPredB0 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN iPredB1, iPredB1 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA0, iPredA0, LSR #24 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA0, iPredA0, Temp1, LSL #8 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV iPredA1, iPredA1, LSR #24 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR iPredA1, iPredA1, Temp2, LSL #8 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultA, iPredA0, iPredB0 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 ResultB, iPredA1, iPredB1 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultA, ResultA, r0x80808080 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultA, [pDstPred], iDstStep 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR ResultB, ResultB, r0x80808080 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR ResultB, [pDstPred], iDstStep 2160c1bc742181ded4930842b46e9507372f0b1b963James DongEnd3 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong