omxVCM4P10_InterpolateLuma_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_InterpolateLuma
29    .func   omxVCM4P10_InterpolateLuma
30omxVCM4P10_InterpolateLuma:
31    PUSH     {r4-r12,lr}
32    VPUSH    {d8-d15}
33    SUB      sp,sp,#0x10
34    LDR      r6,[sp,#0x78]
35    LDR      r7,[sp,#0x7c]
36    LDR      r5,[sp,#0x80]
37    LDR      r4,[sp,#0x84]
38    ADD      r6,r6,r7,LSL #2
39    ADD      r11,sp,#0
40    VMOV.I16 d31,#0x14
41    VMOV.I16 d30,#0x5
42L0x2c:
43    STM      r11,{r0-r3}
44    ADD      pc,pc,r6,LSL #2
45    B        L0x3f0
46    B        L0x78
47    B        L0xa8
48    B        L0xdc
49    B        L0x100
50    B        L0x134
51    B        L0x168
52    B        L0x1a8
53    B        L0x1f0
54    B        L0x234
55    B        L0x258
56    B        L0x2b0
57    B        L0x2d8
58    B        L0x330
59    B        L0x364
60    B        L0x3a8
61    B        L0x3f0
62L0x78:
63    ADD      r12,r0,r1,LSL #1
64    VLD1.8   {d9},[r0],r1
65    VLD1.8   {d11},[r12],r1
66    VLD1.8   {d10},[r0]
67    VLD1.8   {d12},[r12]
68    ADD      r12,r2,r3,LSL #1
69    VST1.32  {d9[0]},[r2],r3
70    VST1.32  {d11[0]},[r12],r3
71    VST1.32  {d10[0]},[r2]
72    VST1.32  {d12[0]},[r12]
73    ADD      r11,sp,#0
74    B        L0x434
75L0xa8:
76    SUB      r0,r0,#2
77    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
78    VRHADD.U8 d22,d22,d14
79    VRHADD.U8 d26,d26,d18
80    VRHADD.U8 d24,d24,d16
81    VRHADD.U8 d28,d28,d20
82    ADD      r12,r2,r3,LSL #1
83    VST1.32  {d22[0]},[r2],r3
84    VST1.32  {d26[0]},[r12],r3
85    VST1.32  {d24[0]},[r2]
86    VST1.32  {d28[0]},[r12]
87    ADD      r11,sp,#0
88    B        L0x434
89L0xdc:
90    SUB      r0,r0,#2
91    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
92    ADD      r12,r2,r3,LSL #1
93    VST1.32  {d22[0]},[r2],r3
94    VST1.32  {d26[0]},[r12],r3
95    VST1.32  {d24[0]},[r2]
96    VST1.32  {d28[0]},[r12]
97    ADD      r11,sp,#0
98    B        L0x434
99L0x100:
100    SUB      r0,r0,#2
101    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
102    VRHADD.U8 d22,d22,d15
103    VRHADD.U8 d26,d26,d19
104    VRHADD.U8 d24,d24,d17
105    VRHADD.U8 d28,d28,d21
106    ADD      r12,r2,r3,LSL #1
107    VST1.32  {d22[0]},[r2],r3
108    VST1.32  {d26[0]},[r12],r3
109    VST1.32  {d24[0]},[r2]
110    VST1.32  {d28[0]},[r12]
111    ADD      r11,sp,#0
112    B        L0x434
113L0x134:
114    SUB      r0,r0,r1,LSL #1
115    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
116    VRHADD.U8 d0,d0,d9
117    VRHADD.U8 d4,d4,d11
118    VRHADD.U8 d2,d2,d10
119    VRHADD.U8 d6,d6,d12
120    ADD      r12,r2,r3,LSL #1
121    VST1.32  {d0[0]},[r2],r3
122    VST1.32  {d4[0]},[r12],r3
123    VST1.32  {d2[0]},[r2]
124    VST1.32  {d6[0]},[r12]
125    ADD      r11,sp,#0
126    B        L0x434
127L0x168:
128    MOV      r8,r0
129    SUB      r0,r0,r1,LSL #1
130    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
131    SUB      r0,r8,#2
132    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
133    VRHADD.U8 d22,d22,d0
134    VRHADD.U8 d26,d26,d4
135    VRHADD.U8 d24,d24,d2
136    VRHADD.U8 d28,d28,d6
137    ADD      r12,r2,r3,LSL #1
138    VST1.32  {d22[0]},[r2],r3
139    VST1.32  {d26[0]},[r12],r3
140    VST1.32  {d24[0]},[r2]
141    VST1.32  {d28[0]},[r12]
142    ADD      r11,sp,#0
143    B        L0x434
144L0x1a8:
145    SUB      r0,r0,r1,LSL #1
146    SUB      r0,r0,#2
147    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
148    VQRSHRUN.S16 d14,q7,#5
149    VQRSHRUN.S16 d16,q8,#5
150    VQRSHRUN.S16 d18,q9,#5
151    VQRSHRUN.S16 d20,q10,#5
152    VRHADD.U8 d0,d0,d14
153    VRHADD.U8 d4,d4,d18
154    VRHADD.U8 d2,d2,d16
155    VRHADD.U8 d6,d6,d20
156    ADD      r12,r2,r3,LSL #1
157    VST1.32  {d0[0]},[r2],r3
158    VST1.32  {d4[0]},[r12],r3
159    VST1.32  {d2[0]},[r2]
160    VST1.32  {d6[0]},[r12]
161    ADD      r11,sp,#0
162    B        L0x434
163L0x1f0:
164    MOV      r8,r0
165    ADD      r0,r0,#1
166    SUB      r0,r0,r1,LSL #1
167    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
168    SUB      r0,r8,#2
169    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
170    VRHADD.U8 d22,d22,d0
171    VRHADD.U8 d26,d26,d4
172    VRHADD.U8 d24,d24,d2
173    VRHADD.U8 d28,d28,d6
174    ADD      r12,r2,r3,LSL #1
175    VST1.32  {d22[0]},[r2],r3
176    VST1.32  {d26[0]},[r12],r3
177    VST1.32  {d24[0]},[r2]
178    VST1.32  {d28[0]},[r12]
179    ADD      r11,sp,#0
180    B        L0x434
181L0x234:
182    SUB      r0,r0,r1,LSL #1
183    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
184    ADD      r12,r2,r3,LSL #1
185    VST1.32  {d0[0]},[r2],r3
186    VST1.32  {d4[0]},[r12],r3
187    VST1.32  {d2[0]},[r2]
188    VST1.32  {d6[0]},[r12]
189    ADD      r11,sp,#0
190    B        L0x434
191L0x258:
192    SUB      r0,r0,r1,LSL #1
193    SUB      r0,r0,#2
194    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
195    VEXT.8   d18,d18,d19,#4
196    VEXT.8   d20,d20,d21,#4
197    VEXT.8   d22,d22,d23,#4
198    VEXT.8   d24,d24,d25,#4
199    VQRSHRUN.S16 d14,q9,#5
200    VQRSHRUN.S16 d16,q10,#5
201    VQRSHRUN.S16 d18,q11,#5
202    VQRSHRUN.S16 d20,q12,#5
203    VRHADD.U8 d0,d0,d14
204    VRHADD.U8 d4,d4,d18
205    VRHADD.U8 d2,d2,d16
206    VRHADD.U8 d6,d6,d20
207    ADD      r12,r2,r3,LSL #1
208    VST1.32  {d0[0]},[r2],r3
209    VST1.32  {d4[0]},[r12],r3
210    VST1.32  {d2[0]},[r2]
211    VST1.32  {d6[0]},[r12]
212    ADD      r11,sp,#0
213    B        L0x434
214L0x2b0:
215    SUB      r0,r0,r1,LSL #1
216    SUB      r0,r0,#2
217    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
218    ADD      r12,r2,r3,LSL #1
219    VST1.32  {d0[0]},[r2],r3
220    VST1.32  {d4[0]},[r12],r3
221    VST1.32  {d2[0]},[r2]
222    VST1.32  {d6[0]},[r12]
223    ADD      r11,sp,#0
224    B        L0x434
225L0x2d8:
226    SUB      r0,r0,r1,LSL #1
227    SUB      r0,r0,#2
228    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
229    VEXT.8   d18,d18,d19,#6
230    VEXT.8   d20,d20,d21,#6
231    VEXT.8   d22,d22,d23,#6
232    VEXT.8   d24,d24,d25,#6
233    VQRSHRUN.S16 d14,q9,#5
234    VQRSHRUN.S16 d16,q10,#5
235    VQRSHRUN.S16 d18,q11,#5
236    VQRSHRUN.S16 d20,q12,#5
237    VRHADD.U8 d0,d0,d14
238    VRHADD.U8 d4,d4,d18
239    VRHADD.U8 d2,d2,d16
240    VRHADD.U8 d6,d6,d20
241    ADD      r12,r2,r3,LSL #1
242    VST1.32  {d0[0]},[r2],r3
243    VST1.32  {d4[0]},[r12],r3
244    VST1.32  {d2[0]},[r2]
245    VST1.32  {d6[0]},[r12]
246    ADD      r11,sp,#0
247    B        L0x434
248L0x330:
249    SUB      r0,r0,r1,LSL #1
250    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
251    VRHADD.U8 d0,d0,d10
252    VRHADD.U8 d4,d4,d12
253    VRHADD.U8 d2,d2,d11
254    VRHADD.U8 d6,d6,d13
255    ADD      r12,r2,r3,LSL #1
256    VST1.32  {d0[0]},[r2],r3
257    VST1.32  {d4[0]},[r12],r3
258    VST1.32  {d2[0]},[r2]
259    VST1.32  {d6[0]},[r12]
260    ADD      r11,sp,#0
261    B        L0x434
262L0x364:
263    MOV      r8,r0
264    SUB      r0,r0,r1,LSL #1
265    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
266    ADD      r0,r8,r1
267    SUB      r0,r0,#2
268    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
269    VRHADD.U8 d22,d22,d0
270    VRHADD.U8 d26,d26,d4
271    VRHADD.U8 d24,d24,d2
272    VRHADD.U8 d28,d28,d6
273    ADD      r12,r2,r3,LSL #1
274    VST1.32  {d22[0]},[r2],r3
275    VST1.32  {d26[0]},[r12],r3
276    VST1.32  {d24[0]},[r2]
277    VST1.32  {d28[0]},[r12]
278    ADD      r11,sp,#0
279    B        L0x434
280L0x3a8:
281    SUB      r0,r0,r1,LSL #1
282    SUB      r0,r0,#2
283    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
284    VQRSHRUN.S16 d14,q8,#5
285    VQRSHRUN.S16 d16,q9,#5
286    VQRSHRUN.S16 d18,q10,#5
287    VQRSHRUN.S16 d20,q11,#5
288    VRHADD.U8 d0,d0,d14
289    VRHADD.U8 d4,d4,d18
290    VRHADD.U8 d2,d2,d16
291    VRHADD.U8 d6,d6,d20
292    ADD      r12,r2,r3,LSL #1
293    VST1.32  {d0[0]},[r2],r3
294    VST1.32  {d4[0]},[r12],r3
295    VST1.32  {d2[0]},[r2]
296    VST1.32  {d6[0]},[r12]
297    ADD      r11,sp,#0
298    B        L0x434
299L0x3f0:
300    MOV      r8,r0
301    ADD      r0,r0,#1
302    SUB      r0,r0,r1,LSL #1
303    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
304    ADD      r0,r8,r1
305    SUB      r0,r0,#2
306    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
307    VRHADD.U8 d22,d22,d0
308    VRHADD.U8 d26,d26,d4
309    VRHADD.U8 d24,d24,d2
310    VRHADD.U8 d28,d28,d6
311    ADD      r12,r2,r3,LSL #1
312    VST1.32  {d22[0]},[r2],r3
313    VST1.32  {d26[0]},[r12],r3
314    VST1.32  {d24[0]},[r2]
315    VST1.32  {d28[0]},[r12]
316    ADD      r11,sp,#0
317L0x434:
318    LDM      r11,{r0-r3}
319    SUBS     r5,r5,#4
320    ADD      r0,r0,#4
321    ADD      r2,r2,#4
322    BGT      L0x2c
323    SUBS     r4,r4,#4
324    LDR      r5,[sp,#0x80]
325    ADD      r11,sp,#0
326    ADD      r0,r0,r1,LSL #2
327    ADD      r2,r2,r3,LSL #2
328    SUB      r0,r0,r5
329    SUB      r2,r2,r5
330    BGT      L0x2c
331    MOV      r0,#0
332    ADD      sp,sp,#0x10
333    VPOP     {d8-d15}
334    POP      {r4-r12,pc}
335    .endfunc
336
337    .end
338
339