1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_yv12_extend_frame_borders_neon|
13    ARM
14    REQUIRE8
15    PRESERVE8
16
17    INCLUDE vpx_asm_offsets.asm
18
19    AREA ||.text||, CODE, READONLY, ALIGN=2
20;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
21;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
22; are always multiples of 16.
23
24|vp8_yv12_extend_frame_borders_neon| PROC
25    push            {r4 - r10, lr}
26    vpush           {d8 - d15}
27
28    ;Not need to load y_width, since: y_width = y_stride - 2*border
29    ldr             r3, [r0, #yv12_buffer_config_border]
30    ldr             r1, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
31    ldr             r4, [r0, #yv12_buffer_config_y_height]
32    ldr             lr, [r0, #yv12_buffer_config_y_stride]
33
34    cmp             r3, #16
35    beq             b16_extend_frame_borders
36
37;=======================
38b32_extend_frame_borders
39;border = 32
40;=======================
41;Border copy for Y plane
42;copy the left and right most columns out
43    sub             r5, r1, r3              ;destptr1
44    add             r6, r1, lr
45    sub             r6, r6, r3, lsl #1      ;destptr2
46    sub             r2, r6, #1              ;srcptr2
47
48    ;Do four rows at one time
49    mov             r12, r4, lsr #2
50
51copy_left_right_y
52    vld1.8          {d0[], d1[]}, [r1], lr
53    vld1.8          {d4[], d5[]}, [r2], lr
54    vld1.8          {d8[], d9[]}, [r1], lr
55    vld1.8          {d12[], d13[]}, [r2], lr
56    vld1.8          {d16[], d17[]},  [r1], lr
57    vld1.8          {d20[], d21[]}, [r2], lr
58    vld1.8          {d24[], d25[]}, [r1], lr
59    vld1.8          {d28[], d29[]}, [r2], lr
60
61    vmov            q1, q0
62    vmov            q3, q2
63    vmov            q5, q4
64    vmov            q7, q6
65    vmov            q9, q8
66    vmov            q11, q10
67    vmov            q13, q12
68    vmov            q15, q14
69
70    subs            r12, r12, #1
71
72    vst1.8          {q0, q1}, [r5], lr
73    vst1.8          {q2, q3}, [r6], lr
74    vst1.8          {q4, q5}, [r5], lr
75    vst1.8          {q6, q7}, [r6], lr
76    vst1.8          {q8, q9}, [r5], lr
77    vst1.8          {q10, q11}, [r6], lr
78    vst1.8          {q12, q13}, [r5], lr
79    vst1.8          {q14, q15}, [r6], lr
80
81    bne             copy_left_right_y
82
83;Now copy the top and bottom source lines into each line of the respective borders
84    ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
85    mul             r8, r3, lr
86
87    mov             r12, lr, lsr #7
88
89    sub             r6, r1, r3              ;destptr2
90    sub             r2, r6, lr              ;srcptr2
91    sub             r1, r7, r3              ;srcptr1
92    sub             r5, r1, r8              ;destptr1
93
94copy_top_bottom_y
95    vld1.8          {q0, q1}, [r1]!
96    vld1.8          {q8, q9}, [r2]!
97    vld1.8          {q2, q3}, [r1]!
98    vld1.8          {q10, q11}, [r2]!
99    vld1.8          {q4, q5}, [r1]!
100    vld1.8          {q12, q13}, [r2]!
101    vld1.8          {q6, q7}, [r1]!
102    vld1.8          {q14, q15}, [r2]!
103
104    mov             r7, r3
105
106top_bottom_32
107    subs            r7, r7, #1
108
109    vst1.8          {q0, q1}, [r5]!
110    vst1.8          {q8, q9}, [r6]!
111    vst1.8          {q2, q3}, [r5]!
112    vst1.8          {q10, q11}, [r6]!
113    vst1.8          {q4, q5}, [r5]!
114    vst1.8          {q12, q13}, [r6]!
115    vst1.8          {q6, q7}, [r5]!
116    vst1.8          {q14, q15}, [r6]!
117
118    add             r5, r5, lr
119    sub             r5, r5, #128
120    add             r6, r6, lr
121    sub             r6, r6, #128
122
123    bne             top_bottom_32
124
125    sub             r5, r1, r8
126    add             r6, r2, lr
127
128    subs            r12, r12, #1
129    bne             copy_top_bottom_y
130
131    mov             r7, lr, lsr #4              ;check to see if extra copy is needed
132    ands            r7, r7, #0x7
133    bne             extra_top_bottom_y
134end_of_border_copy_y
135
136;Border copy for U, V planes
137    ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
138    mov             lr, lr, lsr #1              ;uv_stride
139    mov             r3, r3, lsr #1              ;border
140    mov             r4, r4, lsr #1              ;uv_height
141    mov             r8, r8, lsr #2
142
143    mov             r10, #2
144
145;copy the left and right most columns out
146border_copy_uv
147    sub             r5, r1, r3              ;destptr1
148    add             r6, r1, lr
149    sub             r6, r6, r3, lsl #1      ;destptr2
150    sub             r2, r6, #1              ;srcptr2
151
152    mov             r7, r1
153
154    ;Do eight rows at one time
155    mov             r12, r4, lsr #3
156
157copy_left_right_uv
158    vld1.8          {d0[], d1[]}, [r1], lr
159    vld1.8          {d2[], d3[]}, [r2], lr
160    vld1.8          {d4[], d5[]}, [r1], lr
161    vld1.8          {d6[], d7[]}, [r2], lr
162    vld1.8          {d8[], d9[]},  [r1], lr
163    vld1.8          {d10[], d11[]}, [r2], lr
164    vld1.8          {d12[], d13[]}, [r1], lr
165    vld1.8          {d14[], d15[]}, [r2], lr
166    vld1.8          {d16[], d17[]}, [r1], lr
167    vld1.8          {d18[], d19[]}, [r2], lr
168    vld1.8          {d20[], d21[]}, [r1], lr
169    vld1.8          {d22[], d23[]}, [r2], lr
170    vld1.8          {d24[], d25[]},  [r1], lr
171    vld1.8          {d26[], d27[]}, [r2], lr
172    vld1.8          {d28[], d29[]}, [r1], lr
173    vld1.8          {d30[], d31[]}, [r2], lr
174
175    subs            r12, r12, #1
176
177    vst1.8          {q0}, [r5], lr
178    vst1.8          {q1}, [r6], lr
179    vst1.8          {q2}, [r5], lr
180    vst1.8          {q3}, [r6], lr
181    vst1.8          {q4}, [r5], lr
182    vst1.8          {q5}, [r6], lr
183    vst1.8          {q6}, [r5], lr
184    vst1.8          {q7}, [r6], lr
185    vst1.8          {q8}, [r5], lr
186    vst1.8          {q9}, [r6], lr
187    vst1.8          {q10}, [r5], lr
188    vst1.8          {q11}, [r6], lr
189    vst1.8          {q12}, [r5], lr
190    vst1.8          {q13}, [r6], lr
191    vst1.8          {q14}, [r5], lr
192    vst1.8          {q15}, [r6], lr
193
194    bne             copy_left_right_uv
195
196;Now copy the top and bottom source lines into each line of the respective borders
197    mov             r12, lr, lsr #6
198
199    sub             r6, r1, r3              ;destptr2
200    sub             r2, r6, lr              ;srcptr2
201    sub             r1, r7, r3              ;srcptr1
202    sub             r5, r1, r8              ;destptr1
203
204copy_top_bottom_uv
205    vld1.8          {q0, q1}, [r1]!
206    vld1.8          {q8, q9}, [r2]!
207    vld1.8          {q2, q3}, [r1]!
208    vld1.8          {q10, q11}, [r2]!
209
210    mov             r7, r3
211
212top_bottom_16
213    subs            r7, r7, #1
214
215    vst1.8          {q0, q1}, [r5]!
216    vst1.8          {q8, q9}, [r6]!
217    vst1.8          {q2, q3}, [r5]!
218    vst1.8          {q10, q11}, [r6]!
219
220    add             r5, r5, lr
221    sub             r5, r5, #64
222    add             r6, r6, lr
223    sub             r6, r6, #64
224
225    bne             top_bottom_16
226
227    sub             r5, r1, r8
228    add             r6, r2, lr
229
230    subs            r12, r12, #1
231    bne             copy_top_bottom_uv
232
233    mov             r7, lr, lsr #3              ;check to see if extra copy is needed
234    ands            r7, r7, #0x7
235    bne             extra_top_bottom_uv
236
237end_of_border_copy_uv
238    subs            r10, r10, #1
239    ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1
240    bne             border_copy_uv
241
242    vpop            {d8 - d15}
243    pop             {r4 - r10, pc}
244
245;;;;;;;;;;;;;;;;;;;;;;
246;extra copy part for Y
247extra_top_bottom_y
248    vld1.8          {q0}, [r1]!
249    vld1.8          {q2}, [r2]!
250
251    mov             r9, r3, lsr #3
252
253extra_top_bottom_32
254    subs            r9, r9, #1
255
256    vst1.8          {q0}, [r5], lr
257    vst1.8          {q2}, [r6], lr
258    vst1.8          {q0}, [r5], lr
259    vst1.8          {q2}, [r6], lr
260    vst1.8          {q0}, [r5], lr
261    vst1.8          {q2}, [r6], lr
262    vst1.8          {q0}, [r5], lr
263    vst1.8          {q2}, [r6], lr
264    vst1.8          {q0}, [r5], lr
265    vst1.8          {q2}, [r6], lr
266    vst1.8          {q0}, [r5], lr
267    vst1.8          {q2}, [r6], lr
268    vst1.8          {q0}, [r5], lr
269    vst1.8          {q2}, [r6], lr
270    vst1.8          {q0}, [r5], lr
271    vst1.8          {q2}, [r6], lr
272    bne             extra_top_bottom_32
273
274    sub             r5, r1, r8
275    add             r6, r2, lr
276    subs            r7, r7, #1
277    bne             extra_top_bottom_y
278
279    b               end_of_border_copy_y
280
281;extra copy part for UV
282extra_top_bottom_uv
283    vld1.8          {d0}, [r1]!
284    vld1.8          {d8}, [r2]!
285
286    mov             r9, r3, lsr #3
287
288extra_top_bottom_16
289    subs            r9, r9, #1
290
291    vst1.8          {d0}, [r5], lr
292    vst1.8          {d8}, [r6], lr
293    vst1.8          {d0}, [r5], lr
294    vst1.8          {d8}, [r6], lr
295    vst1.8          {d0}, [r5], lr
296    vst1.8          {d8}, [r6], lr
297    vst1.8          {d0}, [r5], lr
298    vst1.8          {d8}, [r6], lr
299    vst1.8          {d0}, [r5], lr
300    vst1.8          {d8}, [r6], lr
301    vst1.8          {d0}, [r5], lr
302    vst1.8          {d8}, [r6], lr
303    vst1.8          {d0}, [r5], lr
304    vst1.8          {d8}, [r6], lr
305    vst1.8          {d0}, [r5], lr
306    vst1.8          {d8}, [r6], lr
307    bne             extra_top_bottom_16
308
309    sub             r5, r1, r8
310    add             r6, r2, lr
311    subs            r7, r7, #1
312    bne             extra_top_bottom_uv
313
314    b               end_of_border_copy_uv
315
316
317;=======================
318b16_extend_frame_borders
319;border = 16
320;=======================
321;Border copy for Y plane
322;copy the left and right most columns out
323    sub             r5, r1, r3              ;destptr1
324    add             r6, r1, lr
325    sub             r6, r6, r3, lsl #1      ;destptr2
326    sub             r2, r6, #1              ;srcptr2
327
328    ;Do four rows at one time
329    mov             r12, r4, lsr #2
330
331copy_left_right_y_b16
332    vld1.8          {d0[], d1[]}, [r1], lr
333    vld1.8          {d4[], d5[]}, [r2], lr
334    vld1.8          {d8[], d9[]}, [r1], lr
335    vld1.8          {d12[], d13[]}, [r2], lr
336    vld1.8          {d16[], d17[]},  [r1], lr
337    vld1.8          {d20[], d21[]}, [r2], lr
338    vld1.8          {d24[], d25[]}, [r1], lr
339    vld1.8          {d28[], d29[]}, [r2], lr
340
341    subs            r12, r12, #1
342
343    vst1.8          {q0}, [r5], lr
344    vst1.8          {q2}, [r6], lr
345    vst1.8          {q4}, [r5], lr
346    vst1.8          {q6}, [r6], lr
347    vst1.8          {q8}, [r5], lr
348    vst1.8          {q10}, [r6], lr
349    vst1.8          {q12}, [r5], lr
350    vst1.8          {q14}, [r6], lr
351
352    bne             copy_left_right_y_b16
353
354;Now copy the top and bottom source lines into each line of the respective borders
355    ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
356    mul             r8, r3, lr
357
358    mov             r12, lr, lsr #7
359
360    sub             r6, r1, r3              ;destptr2
361    sub             r2, r6, lr              ;srcptr2
362    sub             r1, r7, r3              ;srcptr1
363    sub             r5, r1, r8              ;destptr1
364
365copy_top_bottom_y_b16
366    vld1.8          {q0, q1}, [r1]!
367    vld1.8          {q8, q9}, [r2]!
368    vld1.8          {q2, q3}, [r1]!
369    vld1.8          {q10, q11}, [r2]!
370    vld1.8          {q4, q5}, [r1]!
371    vld1.8          {q12, q13}, [r2]!
372    vld1.8          {q6, q7}, [r1]!
373    vld1.8          {q14, q15}, [r2]!
374
375    mov             r7, r3
376
377top_bottom_16_b16
378    subs            r7, r7, #1
379
380    vst1.8          {q0, q1}, [r5]!
381    vst1.8          {q8, q9}, [r6]!
382    vst1.8          {q2, q3}, [r5]!
383    vst1.8          {q10, q11}, [r6]!
384    vst1.8          {q4, q5}, [r5]!
385    vst1.8          {q12, q13}, [r6]!
386    vst1.8          {q6, q7}, [r5]!
387    vst1.8          {q14, q15}, [r6]!
388
389    add             r5, r5, lr
390    sub             r5, r5, #128
391    add             r6, r6, lr
392    sub             r6, r6, #128
393
394    bne             top_bottom_16_b16
395
396    sub             r5, r1, r8
397    add             r6, r2, lr
398
399    subs            r12, r12, #1
400    bne             copy_top_bottom_y_b16
401
402    mov             r7, lr, lsr #4              ;check to see if extra copy is needed
403    ands            r7, r7, #0x7
404    bne             extra_top_bottom_y_b16
405end_of_border_copy_y_b16
406
407;Border copy for U, V planes
408    ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
409    mov             lr, lr, lsr #1              ;uv_stride
410    mov             r3, r3, lsr #1              ;border
411    mov             r4, r4, lsr #1              ;uv_height
412    mov             r8, r8, lsr #2
413
414    mov             r10, #2
415
416;copy the left and right most columns out
417border_copy_uv_b16
418    sub             r5, r1, r3              ;destptr1
419    add             r6, r1, lr
420    sub             r6, r6, r3, lsl #1      ;destptr2
421    sub             r2, r6, #1              ;srcptr2
422
423    mov             r7, r1
424
425    ;Do eight rows at one time
426    mov             r12, r4, lsr #3
427
428copy_left_right_uv_b16
429    vld1.8          {d0[]}, [r1], lr
430    vld1.8          {d2[]}, [r2], lr
431    vld1.8          {d4[]}, [r1], lr
432    vld1.8          {d6[]}, [r2], lr
433    vld1.8          {d8[]},  [r1], lr
434    vld1.8          {d10[]}, [r2], lr
435    vld1.8          {d12[]}, [r1], lr
436    vld1.8          {d14[]}, [r2], lr
437    vld1.8          {d16[]}, [r1], lr
438    vld1.8          {d18[]}, [r2], lr
439    vld1.8          {d20[]}, [r1], lr
440    vld1.8          {d22[]}, [r2], lr
441    vld1.8          {d24[]},  [r1], lr
442    vld1.8          {d26[]}, [r2], lr
443    vld1.8          {d28[]}, [r1], lr
444    vld1.8          {d30[]}, [r2], lr
445
446    subs            r12, r12, #1
447
448    vst1.8          {d0}, [r5], lr
449    vst1.8          {d2}, [r6], lr
450    vst1.8          {d4}, [r5], lr
451    vst1.8          {d6}, [r6], lr
452    vst1.8          {d8}, [r5], lr
453    vst1.8          {d10}, [r6], lr
454    vst1.8          {d12}, [r5], lr
455    vst1.8          {d14}, [r6], lr
456    vst1.8          {d16}, [r5], lr
457    vst1.8          {d18}, [r6], lr
458    vst1.8          {d20}, [r5], lr
459    vst1.8          {d22}, [r6], lr
460    vst1.8          {d24}, [r5], lr
461    vst1.8          {d26}, [r6], lr
462    vst1.8          {d28}, [r5], lr
463    vst1.8          {d30}, [r6], lr
464
465    bne             copy_left_right_uv_b16
466
467;Now copy the top and bottom source lines into each line of the respective borders
468    mov             r12, lr, lsr #6
469
470    sub             r6, r1, r3              ;destptr2
471    sub             r2, r6, lr              ;srcptr2
472    sub             r1, r7, r3              ;srcptr1
473    sub             r5, r1, r8              ;destptr1
474
475copy_top_bottom_uv_b16
476    vld1.8          {q0, q1}, [r1]!
477    vld1.8          {q8, q9}, [r2]!
478    vld1.8          {q2, q3}, [r1]!
479    vld1.8          {q10, q11}, [r2]!
480
481    mov             r7, r3
482
483top_bottom_8_b16
484    subs            r7, r7, #1
485
486    vst1.8          {q0, q1}, [r5]!
487    vst1.8          {q8, q9}, [r6]!
488    vst1.8          {q2, q3}, [r5]!
489    vst1.8          {q10, q11}, [r6]!
490
491    add             r5, r5, lr
492    sub             r5, r5, #64
493    add             r6, r6, lr
494    sub             r6, r6, #64
495
496    bne             top_bottom_8_b16
497
498    sub             r5, r1, r8
499    add             r6, r2, lr
500
501    subs            r12, r12, #1
502    bne             copy_top_bottom_uv_b16
503
504    mov             r7, lr, lsr #3              ;check to see if extra copy is needed
505    ands            r7, r7, #0x7
506    bne             extra_top_bottom_uv_b16
507
508end_of_border_copy_uv_b16
509    subs            r10, r10, #1
510    ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1
511    bne             border_copy_uv_b16
512
513    vpop            {d8-d15}
514    pop             {r4 - r10, pc}
515
516;;;;;;;;;;;;;;;;;;;;;;
517;extra copy part for Y
518extra_top_bottom_y_b16
519    vld1.8          {q0}, [r1]!
520    vld1.8          {q2}, [r2]!
521
522    mov             r9, r3, lsr #3
523
524extra_top_bottom_16_b16
525    subs            r9, r9, #1
526
527    vst1.8          {q0}, [r5], lr
528    vst1.8          {q2}, [r6], lr
529    vst1.8          {q0}, [r5], lr
530    vst1.8          {q2}, [r6], lr
531    vst1.8          {q0}, [r5], lr
532    vst1.8          {q2}, [r6], lr
533    vst1.8          {q0}, [r5], lr
534    vst1.8          {q2}, [r6], lr
535    vst1.8          {q0}, [r5], lr
536    vst1.8          {q2}, [r6], lr
537    vst1.8          {q0}, [r5], lr
538    vst1.8          {q2}, [r6], lr
539    vst1.8          {q0}, [r5], lr
540    vst1.8          {q2}, [r6], lr
541    vst1.8          {q0}, [r5], lr
542    vst1.8          {q2}, [r6], lr
543    bne             extra_top_bottom_16_b16
544
545    sub             r5, r1, r8
546    add             r6, r2, lr
547    subs            r7, r7, #1
548    bne             extra_top_bottom_y_b16
549
550    b               end_of_border_copy_y_b16
551
552;extra copy part for UV
553extra_top_bottom_uv_b16
554    vld1.8          {d0}, [r1]!
555    vld1.8          {d8}, [r2]!
556
557    mov             r9, r3, lsr #3
558
559extra_top_bottom_8_b16
560    subs            r9, r9, #1
561
562    vst1.8          {d0}, [r5], lr
563    vst1.8          {d8}, [r6], lr
564    vst1.8          {d0}, [r5], lr
565    vst1.8          {d8}, [r6], lr
566    vst1.8          {d0}, [r5], lr
567    vst1.8          {d8}, [r6], lr
568    vst1.8          {d0}, [r5], lr
569    vst1.8          {d8}, [r6], lr
570    vst1.8          {d0}, [r5], lr
571    vst1.8          {d8}, [r6], lr
572    vst1.8          {d0}, [r5], lr
573    vst1.8          {d8}, [r6], lr
574    vst1.8          {d0}, [r5], lr
575    vst1.8          {d8}, [r6], lr
576    vst1.8          {d0}, [r5], lr
577    vst1.8          {d8}, [r6], lr
578    bne             extra_top_bottom_8_b16
579
580    sub             r5, r1, r8
581    add             r6, r2, lr
582    subs            r7, r7, #1
583    bne             extra_top_bottom_uv_b16
584
585    b               end_of_border_copy_uv_b16
586
587    ENDP
588    END
589