1;
2;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11    EXPORT  |vp9_convolve_avg_neon|
12    ARM
13    REQUIRE8
14    PRESERVE8
15
16    AREA ||.text||, CODE, READONLY, ALIGN=2
17
18|vp9_convolve_avg_neon| PROC
19    push                {r4-r6, lr}
20    ldrd                r4, r5, [sp, #32]
21    mov                 r6, r2
22
23    cmp                 r4, #32
24    bgt                 avg64
25    beq                 avg32
26    cmp                 r4, #8
27    bgt                 avg16
28    beq                 avg8
29    b                   avg4
30
31avg64
32    sub                 lr, r1, #32
33    sub                 r4, r3, #32
34avg64_h
35    pld                 [r0, r1, lsl #1]
36    vld1.8              {q0-q1}, [r0]!
37    vld1.8              {q2-q3}, [r0], lr
38    pld                 [r2, r3]
39    vld1.8              {q8-q9},   [r6@128]!
40    vld1.8              {q10-q11}, [r6@128], r4
41    vrhadd.u8           q0, q0, q8
42    vrhadd.u8           q1, q1, q9
43    vrhadd.u8           q2, q2, q10
44    vrhadd.u8           q3, q3, q11
45    vst1.8              {q0-q1}, [r2@128]!
46    vst1.8              {q2-q3}, [r2@128], r4
47    subs                r5, r5, #1
48    bgt                 avg64_h
49    pop                 {r4-r6, pc}
50
51avg32
52    vld1.8              {q0-q1}, [r0], r1
53    vld1.8              {q2-q3}, [r0], r1
54    vld1.8              {q8-q9},   [r6@128], r3
55    vld1.8              {q10-q11}, [r6@128], r3
56    pld                 [r0]
57    vrhadd.u8           q0, q0, q8
58    pld                 [r0, r1]
59    vrhadd.u8           q1, q1, q9
60    pld                 [r6]
61    vrhadd.u8           q2, q2, q10
62    pld                 [r6, r3]
63    vrhadd.u8           q3, q3, q11
64    vst1.8              {q0-q1}, [r2@128], r3
65    vst1.8              {q2-q3}, [r2@128], r3
66    subs                r5, r5, #2
67    bgt                 avg32
68    pop                 {r4-r6, pc}
69
70avg16
71    vld1.8              {q0}, [r0], r1
72    vld1.8              {q1}, [r0], r1
73    vld1.8              {q2}, [r6@128], r3
74    vld1.8              {q3}, [r6@128], r3
75    pld                 [r0]
76    pld                 [r0, r1]
77    vrhadd.u8           q0, q0, q2
78    pld                 [r6]
79    pld                 [r6, r3]
80    vrhadd.u8           q1, q1, q3
81    vst1.8              {q0}, [r2@128], r3
82    vst1.8              {q1}, [r2@128], r3
83    subs                r5, r5, #2
84    bgt                 avg16
85    pop                 {r4-r6, pc}
86
87avg8
88    vld1.8              {d0}, [r0], r1
89    vld1.8              {d1}, [r0], r1
90    vld1.8              {d2}, [r6@64], r3
91    vld1.8              {d3}, [r6@64], r3
92    pld                 [r0]
93    pld                 [r0, r1]
94    vrhadd.u8           q0, q0, q1
95    pld                 [r6]
96    pld                 [r6, r3]
97    vst1.8              {d0}, [r2@64], r3
98    vst1.8              {d1}, [r2@64], r3
99    subs                r5, r5, #2
100    bgt                 avg8
101    pop                 {r4-r6, pc}
102
103avg4
104    vld1.32             {d0[0]}, [r0], r1
105    vld1.32             {d0[1]}, [r0], r1
106    vld1.32             {d2[0]}, [r6@32], r3
107    vld1.32             {d2[1]}, [r6@32], r3
108    vrhadd.u8           d0, d0, d2
109    vst1.32             {d0[0]}, [r2@32], r3
110    vst1.32             {d0[1]}, [r2@32], r3
111    subs                r5, r5, #2
112    bgt                 avg4
113    pop                 {r4-r6, pc}
114    ENDP
115
116    END
117