sad8_neon.asm revision 90d3ed91ae9228e1c8bab561b6138d4cb8c1e4fd
1;
2;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license and patent
5;  grant that can be found in the LICENSE file in the root of the source
6;  tree. All contributing project authors may be found in the AUTHORS
7;  file in the root of the source tree.
8;
9
10
11    EXPORT  |vp8_sad8x8_neon|
12    EXPORT  |vp8_sad8x16_neon|
13    EXPORT  |vp8_sad4x4_neon|
14
15    ARM
16    REQUIRE8
17    PRESERVE8
18
19    AREA ||.text||, CODE, READONLY, ALIGN=2
20; unsigned int vp8_sad8x8_c(
21;    unsigned char *src_ptr,
22;    int  src_stride,
23;    unsigned char *ref_ptr,
24;    int  ref_stride)
25
26|vp8_sad8x8_neon| PROC
27    vld1.8          {d0}, [r0], r1
28    vld1.8          {d8}, [r2], r3
29
30    vld1.8          {d2}, [r0], r1
31    vld1.8          {d10}, [r2], r3
32
33    vabdl.u8        q12, d0, d8
34
35    vld1.8          {d4}, [r0], r1
36    vld1.8          {d12}, [r2], r3
37
38    vabal.u8        q12, d2, d10
39
40    vld1.8          {d6}, [r0], r1
41    vld1.8          {d14}, [r2], r3
42
43    vabal.u8        q12, d4, d12
44
45    vld1.8          {d0}, [r0], r1
46    vld1.8          {d8}, [r2], r3
47
48    vabal.u8        q12, d6, d14
49
50    vld1.8          {d2}, [r0], r1
51    vld1.8          {d10}, [r2], r3
52
53    vabal.u8        q12, d0, d8
54
55    vld1.8          {d4}, [r0], r1
56    vld1.8          {d12}, [r2], r3
57
58    vabal.u8        q12, d2, d10
59
60    vld1.8          {d6}, [r0], r1
61    vld1.8          {d14}, [r2], r3
62
63    vabal.u8        q12, d4, d12
64    vabal.u8        q12, d6, d14
65
66    vpaddl.u16      q1, q12
67    vpaddl.u32      q0, q1
68    vadd.u32        d0, d0, d1
69
70    vmov.32         r0, d0[0]
71
72    bx              lr
73
74    ENDP
75
76;============================
77;unsigned int vp8_sad8x16_c(
78;    unsigned char *src_ptr,
79;    int  src_stride,
80;    unsigned char *ref_ptr,
81;    int  ref_stride)
82
83|vp8_sad8x16_neon| PROC
84    vld1.8          {d0}, [r0], r1
85    vld1.8          {d8}, [r2], r3
86
87    vld1.8          {d2}, [r0], r1
88    vld1.8          {d10}, [r2], r3
89
90    vabdl.u8        q12, d0, d8
91
92    vld1.8          {d4}, [r0], r1
93    vld1.8          {d12}, [r2], r3
94
95    vabal.u8        q12, d2, d10
96
97    vld1.8          {d6}, [r0], r1
98    vld1.8          {d14}, [r2], r3
99
100    vabal.u8        q12, d4, d12
101
102    vld1.8          {d0}, [r0], r1
103    vld1.8          {d8}, [r2], r3
104
105    vabal.u8        q12, d6, d14
106
107    vld1.8          {d2}, [r0], r1
108    vld1.8          {d10}, [r2], r3
109
110    vabal.u8        q12, d0, d8
111
112    vld1.8          {d4}, [r0], r1
113    vld1.8          {d12}, [r2], r3
114
115    vabal.u8        q12, d2, d10
116
117    vld1.8          {d6}, [r0], r1
118    vld1.8          {d14}, [r2], r3
119
120    vabal.u8        q12, d4, d12
121
122    vld1.8          {d0}, [r0], r1
123    vld1.8          {d8}, [r2], r3
124
125    vabal.u8        q12, d6, d14
126
127    vld1.8          {d2}, [r0], r1
128    vld1.8          {d10}, [r2], r3
129
130    vabal.u8        q12, d0, d8
131
132    vld1.8          {d4}, [r0], r1
133    vld1.8          {d12}, [r2], r3
134
135    vabal.u8        q12, d2, d10
136
137    vld1.8          {d6}, [r0], r1
138    vld1.8          {d14}, [r2], r3
139
140    vabal.u8        q12, d4, d12
141
142    vld1.8          {d0}, [r0], r1
143    vld1.8          {d8}, [r2], r3
144
145    vabal.u8        q12, d6, d14
146
147    vld1.8          {d2}, [r0], r1
148    vld1.8          {d10}, [r2], r3
149
150    vabal.u8        q12, d0, d8
151
152    vld1.8          {d4}, [r0], r1
153    vld1.8          {d12}, [r2], r3
154
155    vabal.u8        q12, d2, d10
156
157    vld1.8          {d6}, [r0], r1
158    vld1.8          {d14}, [r2], r3
159
160    vabal.u8        q12, d4, d12
161    vabal.u8        q12, d6, d14
162
163    vpaddl.u16      q1, q12
164    vpaddl.u32      q0, q1
165    vadd.u32        d0, d0, d1
166
167    vmov.32         r0, d0[0]
168
169    bx              lr
170
171    ENDP
172
173;===========================
174;unsigned int vp8_sad4x4_c(
175;    unsigned char *src_ptr,
176;    int  src_stride,
177;    unsigned char *ref_ptr,
178;    int  ref_stride)
179
180|vp8_sad4x4_neon| PROC
181    vld1.8          {d0}, [r0], r1
182    vld1.8          {d8}, [r2], r3
183
184    vld1.8          {d2}, [r0], r1
185    vld1.8          {d10}, [r2], r3
186
187    vabdl.u8        q12, d0, d8
188
189    vld1.8          {d4}, [r0], r1
190    vld1.8          {d12}, [r2], r3
191
192    vabal.u8        q12, d2, d10
193
194    vld1.8          {d6}, [r0], r1
195    vld1.8          {d14}, [r2], r3
196
197    vabal.u8        q12, d4, d12
198    vabal.u8        q12, d6, d14
199
200    vpaddl.u16      d1, d24
201    vpaddl.u32      d0, d1
202    vmov.32         r0, d0[0]
203
204    bx              lr
205
206    ENDP
207
208    END
209