1/*
2 * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
3 *
4 */
5
6    .eabi_attribute 24, 1
7    .eabi_attribute 25, 1
8
9    .arm
10    .fpu neon
11    .text
12
13    .global omxVCM4P10_InterpolateLuma
14    .func   omxVCM4P10_InterpolateLuma
15omxVCM4P10_InterpolateLuma:
16    PUSH     {r4-r12,lr}
17    VPUSH    {d8-d15}
18    SUB      sp,sp,#0x10
19    LDR      r6,[sp,#0x78]
20    LDR      r7,[sp,#0x7c]
21    LDR      r5,[sp,#0x80]
22    LDR      r4,[sp,#0x84]
23    ADD      r6,r6,r7,LSL #2
24    ADD      r11,sp,#0
25    VMOV.I16 d31,#0x14
26    VMOV.I16 d30,#0x5
27L0x2c:
28    STM      r11,{r0-r3}
29    ADD      pc,pc,r6,LSL #2
30    B        L0x3f0
31    B        L0x78
32    B        L0xa8
33    B        L0xdc
34    B        L0x100
35    B        L0x134
36    B        L0x168
37    B        L0x1a8
38    B        L0x1f0
39    B        L0x234
40    B        L0x258
41    B        L0x2b0
42    B        L0x2d8
43    B        L0x330
44    B        L0x364
45    B        L0x3a8
46    B        L0x3f0
47L0x78:
48    ADD      r12,r0,r1,LSL #1
49    VLD1.8   {d9},[r0],r1
50    VLD1.8   {d11},[r12],r1
51    VLD1.8   {d10},[r0]
52    VLD1.8   {d12},[r12]
53    ADD      r12,r2,r3,LSL #1
54    VST1.32  {d9[0]},[r2],r3
55    VST1.32  {d11[0]},[r12],r3
56    VST1.32  {d10[0]},[r2]
57    VST1.32  {d12[0]},[r12]
58    ADD      r11,sp,#0
59    B        L0x434
60L0xa8:
61    SUB      r0,r0,#2
62    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
63    VRHADD.U8 d22,d22,d14
64    VRHADD.U8 d26,d26,d18
65    VRHADD.U8 d24,d24,d16
66    VRHADD.U8 d28,d28,d20
67    ADD      r12,r2,r3,LSL #1
68    VST1.32  {d22[0]},[r2],r3
69    VST1.32  {d26[0]},[r12],r3
70    VST1.32  {d24[0]},[r2]
71    VST1.32  {d28[0]},[r12]
72    ADD      r11,sp,#0
73    B        L0x434
74L0xdc:
75    SUB      r0,r0,#2
76    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
77    ADD      r12,r2,r3,LSL #1
78    VST1.32  {d22[0]},[r2],r3
79    VST1.32  {d26[0]},[r12],r3
80    VST1.32  {d24[0]},[r2]
81    VST1.32  {d28[0]},[r12]
82    ADD      r11,sp,#0
83    B        L0x434
84L0x100:
85    SUB      r0,r0,#2
86    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
87    VRHADD.U8 d22,d22,d15
88    VRHADD.U8 d26,d26,d19
89    VRHADD.U8 d24,d24,d17
90    VRHADD.U8 d28,d28,d21
91    ADD      r12,r2,r3,LSL #1
92    VST1.32  {d22[0]},[r2],r3
93    VST1.32  {d26[0]},[r12],r3
94    VST1.32  {d24[0]},[r2]
95    VST1.32  {d28[0]},[r12]
96    ADD      r11,sp,#0
97    B        L0x434
98L0x134:
99    SUB      r0,r0,r1,LSL #1
100    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
101    VRHADD.U8 d0,d0,d9
102    VRHADD.U8 d4,d4,d11
103    VRHADD.U8 d2,d2,d10
104    VRHADD.U8 d6,d6,d12
105    ADD      r12,r2,r3,LSL #1
106    VST1.32  {d0[0]},[r2],r3
107    VST1.32  {d4[0]},[r12],r3
108    VST1.32  {d2[0]},[r2]
109    VST1.32  {d6[0]},[r12]
110    ADD      r11,sp,#0
111    B        L0x434
112L0x168:
113    MOV      r8,r0
114    SUB      r0,r0,r1,LSL #1
115    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
116    SUB      r0,r8,#2
117    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
118    VRHADD.U8 d22,d22,d0
119    VRHADD.U8 d26,d26,d4
120    VRHADD.U8 d24,d24,d2
121    VRHADD.U8 d28,d28,d6
122    ADD      r12,r2,r3,LSL #1
123    VST1.32  {d22[0]},[r2],r3
124    VST1.32  {d26[0]},[r12],r3
125    VST1.32  {d24[0]},[r2]
126    VST1.32  {d28[0]},[r12]
127    ADD      r11,sp,#0
128    B        L0x434
129L0x1a8:
130    SUB      r0,r0,r1,LSL #1
131    SUB      r0,r0,#2
132    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
133    VQRSHRUN.S16 d14,q7,#5
134    VQRSHRUN.S16 d16,q8,#5
135    VQRSHRUN.S16 d18,q9,#5
136    VQRSHRUN.S16 d20,q10,#5
137    VRHADD.U8 d0,d0,d14
138    VRHADD.U8 d4,d4,d18
139    VRHADD.U8 d2,d2,d16
140    VRHADD.U8 d6,d6,d20
141    ADD      r12,r2,r3,LSL #1
142    VST1.32  {d0[0]},[r2],r3
143    VST1.32  {d4[0]},[r12],r3
144    VST1.32  {d2[0]},[r2]
145    VST1.32  {d6[0]},[r12]
146    ADD      r11,sp,#0
147    B        L0x434
148L0x1f0:
149    MOV      r8,r0
150    ADD      r0,r0,#1
151    SUB      r0,r0,r1,LSL #1
152    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
153    SUB      r0,r8,#2
154    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
155    VRHADD.U8 d22,d22,d0
156    VRHADD.U8 d26,d26,d4
157    VRHADD.U8 d24,d24,d2
158    VRHADD.U8 d28,d28,d6
159    ADD      r12,r2,r3,LSL #1
160    VST1.32  {d22[0]},[r2],r3
161    VST1.32  {d26[0]},[r12],r3
162    VST1.32  {d24[0]},[r2]
163    VST1.32  {d28[0]},[r12]
164    ADD      r11,sp,#0
165    B        L0x434
166L0x234:
167    SUB      r0,r0,r1,LSL #1
168    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
169    ADD      r12,r2,r3,LSL #1
170    VST1.32  {d0[0]},[r2],r3
171    VST1.32  {d4[0]},[r12],r3
172    VST1.32  {d2[0]},[r2]
173    VST1.32  {d6[0]},[r12]
174    ADD      r11,sp,#0
175    B        L0x434
176L0x258:
177    SUB      r0,r0,r1,LSL #1
178    SUB      r0,r0,#2
179    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
180    VEXT.8   d18,d18,d19,#4
181    VEXT.8   d20,d20,d21,#4
182    VEXT.8   d22,d22,d23,#4
183    VEXT.8   d24,d24,d25,#4
184    VQRSHRUN.S16 d14,q9,#5
185    VQRSHRUN.S16 d16,q10,#5
186    VQRSHRUN.S16 d18,q11,#5
187    VQRSHRUN.S16 d20,q12,#5
188    VRHADD.U8 d0,d0,d14
189    VRHADD.U8 d4,d4,d18
190    VRHADD.U8 d2,d2,d16
191    VRHADD.U8 d6,d6,d20
192    ADD      r12,r2,r3,LSL #1
193    VST1.32  {d0[0]},[r2],r3
194    VST1.32  {d4[0]},[r12],r3
195    VST1.32  {d2[0]},[r2]
196    VST1.32  {d6[0]},[r12]
197    ADD      r11,sp,#0
198    B        L0x434
199L0x2b0:
200    SUB      r0,r0,r1,LSL #1
201    SUB      r0,r0,#2
202    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
203    ADD      r12,r2,r3,LSL #1
204    VST1.32  {d0[0]},[r2],r3
205    VST1.32  {d4[0]},[r12],r3
206    VST1.32  {d2[0]},[r2]
207    VST1.32  {d6[0]},[r12]
208    ADD      r11,sp,#0
209    B        L0x434
210L0x2d8:
211    SUB      r0,r0,r1,LSL #1
212    SUB      r0,r0,#2
213    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
214    VEXT.8   d18,d18,d19,#6
215    VEXT.8   d20,d20,d21,#6
216    VEXT.8   d22,d22,d23,#6
217    VEXT.8   d24,d24,d25,#6
218    VQRSHRUN.S16 d14,q9,#5
219    VQRSHRUN.S16 d16,q10,#5
220    VQRSHRUN.S16 d18,q11,#5
221    VQRSHRUN.S16 d20,q12,#5
222    VRHADD.U8 d0,d0,d14
223    VRHADD.U8 d4,d4,d18
224    VRHADD.U8 d2,d2,d16
225    VRHADD.U8 d6,d6,d20
226    ADD      r12,r2,r3,LSL #1
227    VST1.32  {d0[0]},[r2],r3
228    VST1.32  {d4[0]},[r12],r3
229    VST1.32  {d2[0]},[r2]
230    VST1.32  {d6[0]},[r12]
231    ADD      r11,sp,#0
232    B        L0x434
233L0x330:
234    SUB      r0,r0,r1,LSL #1
235    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
236    VRHADD.U8 d0,d0,d10
237    VRHADD.U8 d4,d4,d12
238    VRHADD.U8 d2,d2,d11
239    VRHADD.U8 d6,d6,d13
240    ADD      r12,r2,r3,LSL #1
241    VST1.32  {d0[0]},[r2],r3
242    VST1.32  {d4[0]},[r12],r3
243    VST1.32  {d2[0]},[r2]
244    VST1.32  {d6[0]},[r12]
245    ADD      r11,sp,#0
246    B        L0x434
247L0x364:
248    MOV      r8,r0
249    SUB      r0,r0,r1,LSL #1
250    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
251    ADD      r0,r8,r1
252    SUB      r0,r0,#2
253    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
254    VRHADD.U8 d22,d22,d0
255    VRHADD.U8 d26,d26,d4
256    VRHADD.U8 d24,d24,d2
257    VRHADD.U8 d28,d28,d6
258    ADD      r12,r2,r3,LSL #1
259    VST1.32  {d22[0]},[r2],r3
260    VST1.32  {d26[0]},[r12],r3
261    VST1.32  {d24[0]},[r2]
262    VST1.32  {d28[0]},[r12]
263    ADD      r11,sp,#0
264    B        L0x434
265L0x3a8:
266    SUB      r0,r0,r1,LSL #1
267    SUB      r0,r0,#2
268    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
269    VQRSHRUN.S16 d14,q8,#5
270    VQRSHRUN.S16 d16,q9,#5
271    VQRSHRUN.S16 d18,q10,#5
272    VQRSHRUN.S16 d20,q11,#5
273    VRHADD.U8 d0,d0,d14
274    VRHADD.U8 d4,d4,d18
275    VRHADD.U8 d2,d2,d16
276    VRHADD.U8 d6,d6,d20
277    ADD      r12,r2,r3,LSL #1
278    VST1.32  {d0[0]},[r2],r3
279    VST1.32  {d4[0]},[r12],r3
280    VST1.32  {d2[0]},[r2]
281    VST1.32  {d6[0]},[r12]
282    ADD      r11,sp,#0
283    B        L0x434
284L0x3f0:
285    MOV      r8,r0
286    ADD      r0,r0,#1
287    SUB      r0,r0,r1,LSL #1
288    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
289    ADD      r0,r8,r1
290    SUB      r0,r0,#2
291    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
292    VRHADD.U8 d22,d22,d0
293    VRHADD.U8 d26,d26,d4
294    VRHADD.U8 d24,d24,d2
295    VRHADD.U8 d28,d28,d6
296    ADD      r12,r2,r3,LSL #1
297    VST1.32  {d22[0]},[r2],r3
298    VST1.32  {d26[0]},[r12],r3
299    VST1.32  {d24[0]},[r2]
300    VST1.32  {d28[0]},[r12]
301    ADD      r11,sp,#0
302L0x434:
303    LDM      r11,{r0-r3}
304    SUBS     r5,r5,#4
305    ADD      r0,r0,#4
306    ADD      r2,r2,#4
307    BGT      L0x2c
308    SUBS     r4,r4,#4
309    LDR      r5,[sp,#0x80]
310    ADD      r11,sp,#0
311    ADD      r0,r0,r1,LSL #2
312    ADD      r2,r2,r3,LSL #2
313    SUB      r0,r0,r5
314    SUB      r2,r2,r5
315    BGT      L0x2c
316    MOV      r0,#0
317    ADD      sp,sp,#0x10
318    VPOP     {d8-d15}
319    POP      {r4-r12,pc}
320    .endfunc
321
322    .end
323
324