armVCM4P10_Average_4x_Align_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong
140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Functions:
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// First operand will be at offset ALIGNMENT from aligned address
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Second operand will be at aligned location and will be used as output.
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for vertical interpolation.
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 4 bytes in horizontal direction
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r4,r5,r6,r8,r9,r14
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r10,r11,r12
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2 - pointer to the aligned location
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r3 - step size to this aligned location
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
370c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON    SETL {FALSE}
460c1bc742181ded4930842b46e9507372f0b1b963James Dong
470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
480c1bc742181ded4930842b46e9507372f0b1b963James DongpPred0          RN 0
490c1bc742181ded4930842b46e9507372f0b1b963James DongiPredStep0      RN 1
500c1bc742181ded4930842b46e9507372f0b1b963James DongpPred1          RN 2
510c1bc742181ded4930842b46e9507372f0b1b963James DongiPredStep1      RN 3
520c1bc742181ded4930842b46e9507372f0b1b963James DongpDstPred        RN 2
530c1bc742181ded4930842b46e9507372f0b1b963James DongiDstStep        RN 3
540c1bc742181ded4930842b46e9507372f0b1b963James Dong
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare other intermediate registers
560c1bc742181ded4930842b46e9507372f0b1b963James DongiPredA0         RN 10
570c1bc742181ded4930842b46e9507372f0b1b963James DongiPredA1         RN 11
580c1bc742181ded4930842b46e9507372f0b1b963James DongiPredB0         RN 12
590c1bc742181ded4930842b46e9507372f0b1b963James DongiPredB1         RN 14
600c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1           RN 4
610c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2           RN 5
620c1bc742181ded4930842b46e9507372f0b1b963James DongResultA         RN 5
630c1bc742181ded4930842b46e9507372f0b1b963James DongResultB         RN 4
640c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x80808080     RN 7
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This function calculates average of 4x4 block
690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Code start
750c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x80808080, =0x80808080
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 1st load
780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB0, [pPred1]
790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
800c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
820c1bc742181ded4930842b46e9507372f0b1b963James Dong
830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
860c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
870c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
880c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
900c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 2nd load
940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB0, [pPred1]
960c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
980c1bc742181ded4930842b46e9507372f0b1b963James Dong
990c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
1070c1bc742181ded4930842b46e9507372f0b1b963James DongEnd0
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This function calculates average of 4x4 block
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Code start
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x80808080, =0x80808080
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 1st load
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp1, [pPred0, #4]
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB0, [pPred1]
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       Temp2, [pPred0, #4]
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA0, iPredA0, LSR #16
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #16
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA1, iPredA1, LSR #16
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #16
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 2nd load
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp1, [pPred0, #4]
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR         iPredA0, [pPred0], iPredStep0
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB0, [pPred1]
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB1, [pPred1, iPredStep1]
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp2, [pPred0, #4]
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR         iPredA1, [pPred0], iPredStep0
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA0, iPredA0, LSR #16
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #16
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA1, iPredA1, LSR #16
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #16
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
1610c1bc742181ded4930842b46e9507372f0b1b963James DongEnd2
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This function calculates average of 4x4 block
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Code start
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x80808080, =0x80808080
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 1st load
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp1, [pPred0, #4]
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB0, [pPred1]
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB1, [pPred1, iPredStep1]
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp2, [pPred0, #4]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA0, iPredA0, LSR #24
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #8
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA1, iPredA1, LSR #24
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #8
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// 2nd load
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp1, [pPred0, #4]
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB0, [pPred1]
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         iPredB1, [pPred1, iPredStep1]
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         Temp2, [pPred0, #4]
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB0, iPredB0
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN         iPredB1, iPredB1
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA0, iPredA0, LSR #24
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #8
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         iPredA1, iPredA1, LSR #24
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #8
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultA, iPredA0, iPredB0
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8      ResultB, iPredA1, iPredB1
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultA, ResultA, r0x80808080
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultA, [pDstPred], iDstStep
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR         ResultB, ResultB, r0x80808080
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       ResultB, [pDstPred], iDstStep
2160c1bc742181ded4930842b46e9507372f0b1b963James DongEnd3
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong