omxVCM4P10_InterpolateLuma_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_InterpolateLuma
29omxVCM4P10_InterpolateLuma:
30    PUSH     {r4-r12,lr}
31    VPUSH    {d8-d15}
32    SUB      sp,sp,#0x10
33    LDR      r6,[sp,#0x78]
34    LDR      r7,[sp,#0x7c]
35    LDR      r5,[sp,#0x80]
36    LDR      r4,[sp,#0x84]
37    ADD      r6,r6,r7,LSL #2
38    ADD      r11,sp,#0
39    VMOV.I16 d31,#0x14
40    VMOV.I16 d30,#0x5
41L0x2c:
42    STM      r11,{r0-r3}
43    ADD      pc,pc,r6,LSL #2
44    B        L0x3f0
45    B        L0x78
46    B        L0xa8
47    B        L0xdc
48    B        L0x100
49    B        L0x134
50    B        L0x168
51    B        L0x1a8
52    B        L0x1f0
53    B        L0x234
54    B        L0x258
55    B        L0x2b0
56    B        L0x2d8
57    B        L0x330
58    B        L0x364
59    B        L0x3a8
60    B        L0x3f0
61L0x78:
62    ADD      r12,r0,r1,LSL #1
63    VLD1.8   {d9},[r0],r1
64    VLD1.8   {d11},[r12],r1
65    VLD1.8   {d10},[r0]
66    VLD1.8   {d12},[r12]
67    ADD      r12,r2,r3,LSL #1
68    VST1.32  {d9[0]},[r2],r3
69    VST1.32  {d11[0]},[r12],r3
70    VST1.32  {d10[0]},[r2]
71    VST1.32  {d12[0]},[r12]
72    ADD      r11,sp,#0
73    B        L0x434
74L0xa8:
75    SUB      r0,r0,#2
76    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
77    VRHADD.U8 d22,d22,d14
78    VRHADD.U8 d26,d26,d18
79    VRHADD.U8 d24,d24,d16
80    VRHADD.U8 d28,d28,d20
81    ADD      r12,r2,r3,LSL #1
82    VST1.32  {d22[0]},[r2],r3
83    VST1.32  {d26[0]},[r12],r3
84    VST1.32  {d24[0]},[r2]
85    VST1.32  {d28[0]},[r12]
86    ADD      r11,sp,#0
87    B        L0x434
88L0xdc:
89    SUB      r0,r0,#2
90    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
91    ADD      r12,r2,r3,LSL #1
92    VST1.32  {d22[0]},[r2],r3
93    VST1.32  {d26[0]},[r12],r3
94    VST1.32  {d24[0]},[r2]
95    VST1.32  {d28[0]},[r12]
96    ADD      r11,sp,#0
97    B        L0x434
98L0x100:
99    SUB      r0,r0,#2
100    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
101    VRHADD.U8 d22,d22,d15
102    VRHADD.U8 d26,d26,d19
103    VRHADD.U8 d24,d24,d17
104    VRHADD.U8 d28,d28,d21
105    ADD      r12,r2,r3,LSL #1
106    VST1.32  {d22[0]},[r2],r3
107    VST1.32  {d26[0]},[r12],r3
108    VST1.32  {d24[0]},[r2]
109    VST1.32  {d28[0]},[r12]
110    ADD      r11,sp,#0
111    B        L0x434
112L0x134:
113    SUB      r0,r0,r1,LSL #1
114    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
115    VRHADD.U8 d0,d0,d9
116    VRHADD.U8 d4,d4,d11
117    VRHADD.U8 d2,d2,d10
118    VRHADD.U8 d6,d6,d12
119    ADD      r12,r2,r3,LSL #1
120    VST1.32  {d0[0]},[r2],r3
121    VST1.32  {d4[0]},[r12],r3
122    VST1.32  {d2[0]},[r2]
123    VST1.32  {d6[0]},[r12]
124    ADD      r11,sp,#0
125    B        L0x434
126L0x168:
127    MOV      r8,r0
128    SUB      r0,r0,r1,LSL #1
129    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
130    SUB      r0,r8,#2
131    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
132    VRHADD.U8 d22,d22,d0
133    VRHADD.U8 d26,d26,d4
134    VRHADD.U8 d24,d24,d2
135    VRHADD.U8 d28,d28,d6
136    ADD      r12,r2,r3,LSL #1
137    VST1.32  {d22[0]},[r2],r3
138    VST1.32  {d26[0]},[r12],r3
139    VST1.32  {d24[0]},[r2]
140    VST1.32  {d28[0]},[r12]
141    ADD      r11,sp,#0
142    B        L0x434
143L0x1a8:
144    SUB      r0,r0,r1,LSL #1
145    SUB      r0,r0,#2
146    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
147    VQRSHRUN.S16 d14,q7,#5
148    VQRSHRUN.S16 d16,q8,#5
149    VQRSHRUN.S16 d18,q9,#5
150    VQRSHRUN.S16 d20,q10,#5
151    VRHADD.U8 d0,d0,d14
152    VRHADD.U8 d4,d4,d18
153    VRHADD.U8 d2,d2,d16
154    VRHADD.U8 d6,d6,d20
155    ADD      r12,r2,r3,LSL #1
156    VST1.32  {d0[0]},[r2],r3
157    VST1.32  {d4[0]},[r12],r3
158    VST1.32  {d2[0]},[r2]
159    VST1.32  {d6[0]},[r12]
160    ADD      r11,sp,#0
161    B        L0x434
162L0x1f0:
163    MOV      r8,r0
164    ADD      r0,r0,#1
165    SUB      r0,r0,r1,LSL #1
166    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
167    SUB      r0,r8,#2
168    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
169    VRHADD.U8 d22,d22,d0
170    VRHADD.U8 d26,d26,d4
171    VRHADD.U8 d24,d24,d2
172    VRHADD.U8 d28,d28,d6
173    ADD      r12,r2,r3,LSL #1
174    VST1.32  {d22[0]},[r2],r3
175    VST1.32  {d26[0]},[r12],r3
176    VST1.32  {d24[0]},[r2]
177    VST1.32  {d28[0]},[r12]
178    ADD      r11,sp,#0
179    B        L0x434
180L0x234:
181    SUB      r0,r0,r1,LSL #1
182    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
183    ADD      r12,r2,r3,LSL #1
184    VST1.32  {d0[0]},[r2],r3
185    VST1.32  {d4[0]},[r12],r3
186    VST1.32  {d2[0]},[r2]
187    VST1.32  {d6[0]},[r12]
188    ADD      r11,sp,#0
189    B        L0x434
190L0x258:
191    SUB      r0,r0,r1,LSL #1
192    SUB      r0,r0,#2
193    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
194    VEXT.8   d18,d18,d19,#4
195    VEXT.8   d20,d20,d21,#4
196    VEXT.8   d22,d22,d23,#4
197    VEXT.8   d24,d24,d25,#4
198    VQRSHRUN.S16 d14,q9,#5
199    VQRSHRUN.S16 d16,q10,#5
200    VQRSHRUN.S16 d18,q11,#5
201    VQRSHRUN.S16 d20,q12,#5
202    VRHADD.U8 d0,d0,d14
203    VRHADD.U8 d4,d4,d18
204    VRHADD.U8 d2,d2,d16
205    VRHADD.U8 d6,d6,d20
206    ADD      r12,r2,r3,LSL #1
207    VST1.32  {d0[0]},[r2],r3
208    VST1.32  {d4[0]},[r12],r3
209    VST1.32  {d2[0]},[r2]
210    VST1.32  {d6[0]},[r12]
211    ADD      r11,sp,#0
212    B        L0x434
213L0x2b0:
214    SUB      r0,r0,r1,LSL #1
215    SUB      r0,r0,#2
216    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
217    ADD      r12,r2,r3,LSL #1
218    VST1.32  {d0[0]},[r2],r3
219    VST1.32  {d4[0]},[r12],r3
220    VST1.32  {d2[0]},[r2]
221    VST1.32  {d6[0]},[r12]
222    ADD      r11,sp,#0
223    B        L0x434
224L0x2d8:
225    SUB      r0,r0,r1,LSL #1
226    SUB      r0,r0,#2
227    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
228    VEXT.8   d18,d18,d19,#6
229    VEXT.8   d20,d20,d21,#6
230    VEXT.8   d22,d22,d23,#6
231    VEXT.8   d24,d24,d25,#6
232    VQRSHRUN.S16 d14,q9,#5
233    VQRSHRUN.S16 d16,q10,#5
234    VQRSHRUN.S16 d18,q11,#5
235    VQRSHRUN.S16 d20,q12,#5
236    VRHADD.U8 d0,d0,d14
237    VRHADD.U8 d4,d4,d18
238    VRHADD.U8 d2,d2,d16
239    VRHADD.U8 d6,d6,d20
240    ADD      r12,r2,r3,LSL #1
241    VST1.32  {d0[0]},[r2],r3
242    VST1.32  {d4[0]},[r12],r3
243    VST1.32  {d2[0]},[r2]
244    VST1.32  {d6[0]},[r12]
245    ADD      r11,sp,#0
246    B        L0x434
247L0x330:
248    SUB      r0,r0,r1,LSL #1
249    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
250    VRHADD.U8 d0,d0,d10
251    VRHADD.U8 d4,d4,d12
252    VRHADD.U8 d2,d2,d11
253    VRHADD.U8 d6,d6,d13
254    ADD      r12,r2,r3,LSL #1
255    VST1.32  {d0[0]},[r2],r3
256    VST1.32  {d4[0]},[r12],r3
257    VST1.32  {d2[0]},[r2]
258    VST1.32  {d6[0]},[r12]
259    ADD      r11,sp,#0
260    B        L0x434
261L0x364:
262    MOV      r8,r0
263    SUB      r0,r0,r1,LSL #1
264    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
265    ADD      r0,r8,r1
266    SUB      r0,r0,#2
267    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
268    VRHADD.U8 d22,d22,d0
269    VRHADD.U8 d26,d26,d4
270    VRHADD.U8 d24,d24,d2
271    VRHADD.U8 d28,d28,d6
272    ADD      r12,r2,r3,LSL #1
273    VST1.32  {d22[0]},[r2],r3
274    VST1.32  {d26[0]},[r12],r3
275    VST1.32  {d24[0]},[r2]
276    VST1.32  {d28[0]},[r12]
277    ADD      r11,sp,#0
278    B        L0x434
279L0x3a8:
280    SUB      r0,r0,r1,LSL #1
281    SUB      r0,r0,#2
282    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
283    VQRSHRUN.S16 d14,q8,#5
284    VQRSHRUN.S16 d16,q9,#5
285    VQRSHRUN.S16 d18,q10,#5
286    VQRSHRUN.S16 d20,q11,#5
287    VRHADD.U8 d0,d0,d14
288    VRHADD.U8 d4,d4,d18
289    VRHADD.U8 d2,d2,d16
290    VRHADD.U8 d6,d6,d20
291    ADD      r12,r2,r3,LSL #1
292    VST1.32  {d0[0]},[r2],r3
293    VST1.32  {d4[0]},[r12],r3
294    VST1.32  {d2[0]},[r2]
295    VST1.32  {d6[0]},[r12]
296    ADD      r11,sp,#0
297    B        L0x434
298L0x3f0:
299    MOV      r8,r0
300    ADD      r0,r0,#1
301    SUB      r0,r0,r1,LSL #1
302    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
303    ADD      r0,r8,r1
304    SUB      r0,r0,#2
305    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
306    VRHADD.U8 d22,d22,d0
307    VRHADD.U8 d26,d26,d4
308    VRHADD.U8 d24,d24,d2
309    VRHADD.U8 d28,d28,d6
310    ADD      r12,r2,r3,LSL #1
311    VST1.32  {d22[0]},[r2],r3
312    VST1.32  {d26[0]},[r12],r3
313    VST1.32  {d24[0]},[r2]
314    VST1.32  {d28[0]},[r12]
315    ADD      r11,sp,#0
316L0x434:
317    LDM      r11,{r0-r3}
318    SUBS     r5,r5,#4
319    ADD      r0,r0,#4
320    ADD      r2,r2,#4
321    BGT      L0x2c
322    SUBS     r4,r4,#4
323    LDR      r5,[sp,#0x80]
324    ADD      r11,sp,#0
325    ADD      r0,r0,r1,LSL #2
326    ADD      r2,r2,r3,LSL #2
327    SUB      r0,r0,r5
328    SUB      r2,r2,r5
329    BGT      L0x2c
330    MOV      r0,#0
331    ADD      sp,sp,#0x10
332    VPOP     {d8-d15}
333    POP      {r4-r12,pc}
334
335    .end
336
337