armVCM4P10_InterpolateLuma_Align_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;//
2;//
3;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        M_VARIANTS ARM1136JS
17
18        EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
19        EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
20
21DEBUG_ON    SETL {FALSE}
22
23    IF ARM1136JS
24
25;// Declare input registers
26pSrc            RN 0
27srcStep         RN 1
28pDst            RN 8
29iHeight         RN 9
30
31;// Declare inner loop registers
32x               RN 7
33x0              RN 7
34x1              RN 10
35x2              RN 11
36Scratch         RN 12
37
38;// Function:
39;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
40;//
41;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
42;// destination pointed by (pDst) for horizontal interpolation.
43;// This function needs to copy 9 bytes in horizontal direction.
44;//
45;// Registers used as input for this function
46;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
47;//
48;// Registers preserved for top level function
49;// r2,r3,r4,r5,r6
50;//
51;// Registers modified by the function
52;// r7,r8,r9,r10,r11,r12
53;//
54;// Output registers
55;// r0 - pointer to the new aligned location which will be used as pSrc
56;// r1 - step size to this aligned location
57
58        ;// Function header
59        M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
60
61        ;// Copy pDst to scratch
62        MOV     Scratch, pDst
63
64StartAlignedStackCopy
65        AND     x, pSrc, #3
66        BIC     pSrc, pSrc, #3
67
68        M_SWITCH x
69        M_CASE   Copy0toAligned
70        M_CASE   Copy1toAligned
71        M_CASE   Copy2toAligned
72        M_CASE   Copy3toAligned
73        M_ENDSWITCH
74
75Copy0toAligned
76        LDM     pSrc, {x0, x1, x2}
77        SUBS    iHeight, iHeight, #1
78        ADD     pSrc, pSrc, srcStep
79
80        ;// One cycle stall
81
82        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
83        BGT     Copy0toAligned
84        B       CopyEnd
85
86Copy1toAligned
87        LDM     pSrc, {x0, x1, x2}
88        SUBS    iHeight, iHeight, #1
89        ADD     pSrc, pSrc, srcStep
90
91        ;// One cycle stall
92
93        MOV     x0, x0, LSR #8
94        ORR     x0, x0, x1, LSL #24
95        MOV     x1, x1, LSR #8
96        ORR     x1, x1, x2, LSL #24
97        MOV     x2, x2, LSR #8
98        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
99        BGT     Copy1toAligned
100        B       CopyEnd
101
102Copy2toAligned
103        LDM     pSrc, {x0, x1, x2}
104        SUBS    iHeight, iHeight, #1
105        ADD     pSrc, pSrc, srcStep
106
107        ;// One cycle stall
108
109        MOV     x0, x0, LSR #16
110        ORR     x0, x0, x1, LSL #16
111        MOV     x1, x1, LSR #16
112        ORR     x1, x1, x2, LSL #16
113        MOV     x2, x2, LSR #16
114        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
115        BGT     Copy2toAligned
116        B       CopyEnd
117
118Copy3toAligned
119        LDM     pSrc, {x0, x1, x2}
120        SUBS    iHeight, iHeight, #1
121        ADD     pSrc, pSrc, srcStep
122
123        ;// One cycle stall
124
125        MOV     x0, x0, LSR #24
126        ORR     x0, x0, x1, LSL #8
127        MOV     x1, x1, LSR #24
128        ORR     x1, x1, x2, LSL #8
129        MOV     x2, x2, LSR #24
130        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
131        BGT     Copy3toAligned
132
133CopyEnd
134
135        MOV     pSrc, Scratch
136        MOV     srcStep, #12
137
138        M_END
139
140
141;// Function:
142;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
143;//
144;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
145;// destination pointed by (pDst) for vertical interpolation.
146;// This function needs to copy 4 bytes in horizontal direction
147;//
148;// Registers used as input for this function
149;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
150;//
151;// Registers preserved for top level function
152;// r2,r3,r4,r5,r6
153;//
154;// Registers modified by the function
155;// r7,r8,r9,r10,r11,r12
156;//
157;// Output registers
158;// r0 - pointer to the new aligned location which will be used as pSrc
159;// r1 - step size to this aligned location
160
161        ;// Function header
162        M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
163
164        ;// Copy pSrc to stack
165StartVAlignedStackCopy
166        AND     x, pSrc, #3
167        BIC     pSrc, pSrc, #3
168
169
170        M_SWITCH x
171        M_CASE   Copy0toVAligned
172        M_CASE   Copy1toVAligned
173        M_CASE   Copy2toVAligned
174        M_CASE   Copy3toVAligned
175        M_ENDSWITCH
176
177Copy0toVAligned
178        M_LDR   x0, [pSrc], srcStep
179        SUBS    iHeight, iHeight, #1
180
181        ;// One cycle stall
182
183        STR     x0, [pDst], #4                              ;// Store aligned output row
184        BGT     Copy0toVAligned
185        B       CopyVEnd
186
187Copy1toVAligned
188        LDR     x1, [pSrc, #4]
189        M_LDR   x0, [pSrc], srcStep
190        SUBS    iHeight, iHeight, #1
191
192        ;// One cycle stall
193
194        MOV     x1, x1, LSL #24
195        ORR     x0, x1, x0, LSR #8
196        STR     x0, [pDst], #4                              ;// Store aligned output row
197        BGT     Copy1toVAligned
198        B       CopyVEnd
199
200Copy2toVAligned
201        LDR     x1, [pSrc, #4]
202        M_LDR   x0, [pSrc], srcStep
203        SUBS    iHeight, iHeight, #1
204
205        ;// One cycle stall
206
207        MOV     x1, x1, LSL #16
208        ORR     x0, x1, x0, LSR #16
209        STR     x0, [pDst], #4                              ;// Store aligned output row
210        BGT     Copy2toVAligned
211        B       CopyVEnd
212
213Copy3toVAligned
214        LDR     x1, [pSrc, #4]
215        M_LDR   x0, [pSrc], srcStep
216        SUBS    iHeight, iHeight, #1
217
218        ;// One cycle stall
219
220        MOV     x1, x1, LSL #8
221        ORR     x0, x1, x0, LSR #24
222        STR     x0, [pDst], #4                              ;// Store aligned output row
223        BGT     Copy3toVAligned
224
225CopyVEnd
226
227        SUB     pSrc, pDst, #28
228        MOV     srcStep, #4
229
230        M_END
231
232
233    ENDIF
234
235    END
236
237