10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
140c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
150c1bc742181ded4930842b46e9507372f0b1b963James Dong
160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
170c1bc742181ded4930842b46e9507372f0b1b963James Dong
180c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
190c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James DongDEBUG_ON    SETL {FALSE}
220c1bc742181ded4930842b46e9507372f0b1b963James Dong
230c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
240c1bc742181ded4930842b46e9507372f0b1b963James Dong
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
260c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
270c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
280c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 8
290c1bc742181ded4930842b46e9507372f0b1b963James DongiHeight         RN 9
300c1bc742181ded4930842b46e9507372f0b1b963James Dong
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
320c1bc742181ded4930842b46e9507372f0b1b963James Dongx               RN 7
330c1bc742181ded4930842b46e9507372f0b1b963James Dongx0              RN 7
340c1bc742181ded4930842b46e9507372f0b1b963James Dongx1              RN 10
350c1bc742181ded4930842b46e9507372f0b1b963James Dongx2              RN 11
360c1bc742181ded4930842b46e9507372f0b1b963James DongScratch         RN 12
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function:
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for horizontal interpolation.
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 9 bytes in horizontal direction.
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function
460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
470c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc
560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location
570c1bc742181ded4930842b46e9507372f0b1b963James Dong
580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Copy pDst to scratch
620c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     Scratch, pDst
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James DongStartAlignedStackCopy
650c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND     x, pSrc, #3
660c1bc742181ded4930842b46e9507372f0b1b963James Dong        BIC     pSrc, pSrc, #3
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_SWITCH x
690c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy0toAligned
700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy1toAligned
710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy2toAligned
720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy3toAligned
730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ENDSWITCH
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toAligned
760c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pSrc, {x0, x1, x2}
770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
790c1bc742181ded4930842b46e9507372f0b1b963James Dong
800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
810c1bc742181ded4930842b46e9507372f0b1b963James Dong
820c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
830c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy0toAligned
840c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyEnd
850c1bc742181ded4930842b46e9507372f0b1b963James Dong
860c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toAligned
870c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pSrc, {x0, x1, x2}
880c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x0, x0, LSR #8
940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x0, x1, LSL #24
950c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSR #8
960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x1, x1, x2, LSL #24
970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x2, x2, LSR #8
980c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
990c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy1toAligned
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyEnd
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toAligned
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pSrc, {x0, x1, x2}
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x0, x0, LSR #16
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x0, x1, LSL #16
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSR #16
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x1, x1, x2, LSL #16
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x2, x2, LSR #16
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy2toAligned
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyEnd
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong
1180c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toAligned
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM     pSrc, {x0, x1, x2}
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pSrc, pSrc, srcStep
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x0, x0, LSR #24
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x0, x1, LSL #8
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSR #24
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x1, x1, x2, LSL #8
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x2, x2, LSR #24
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy3toAligned
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James DongCopyEnd
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     pSrc, Scratch
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #12
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function:
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// destination pointed by (pDst) for vertical interpolation.
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This function needs to copy 4 bytes in horizontal direction
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers used as input for this function
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers preserved for top level function
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r2,r3,r4,r5,r6
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers modified by the function
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7,r8,r9,r10,r11,r12
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Output registers
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r0 - pointer to the new aligned location which will be used as pSrc
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 - step size to this aligned location
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Copy pSrc to stack
1650c1bc742181ded4930842b46e9507372f0b1b963James DongStartVAlignedStackCopy
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND     x, pSrc, #3
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        BIC     pSrc, pSrc, #3
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_SWITCH x
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy0toVAligned
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy1toVAligned
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy2toVAligned
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_CASE   Copy3toVAligned
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ENDSWITCH
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong
1770c1bc742181ded4930842b46e9507372f0b1b963James DongCopy0toVAligned
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   x0, [pSrc], srcStep
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR     x0, [pDst], #4                              ;// Store aligned output row
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy0toVAligned
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyVEnd
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong
1870c1bc742181ded4930842b46e9507372f0b1b963James DongCopy1toVAligned
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     x1, [pSrc, #4]
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   x0, [pSrc], srcStep
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSL #24
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x1, x0, LSR #8
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR     x0, [pDst], #4                              ;// Store aligned output row
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy1toVAligned
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyVEnd
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong
2000c1bc742181ded4930842b46e9507372f0b1b963James DongCopy2toVAligned
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     x1, [pSrc, #4]
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   x0, [pSrc], srcStep
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSL #16
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x1, x0, LSR #16
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR     x0, [pDst], #4                              ;// Store aligned output row
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy2toVAligned
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       CopyVEnd
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James DongCopy3toVAligned
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     x1, [pSrc, #4]
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   x0, [pSrc], srcStep
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS    iHeight, iHeight, #1
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     x1, x1, LSL #8
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     x0, x1, x0, LSR #24
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR     x0, [pDst], #4                              ;// Store aligned output row
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT     Copy3toVAligned
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James DongCopyVEnd
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pDst, #28
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     srcStep, #4
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
237