1233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan    EXPORT  |vp8_sixtap_predict8x4_neon|
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ARM
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan    REQUIRE8
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan    PRESERVE8
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan    AREA ||.text||, CODE, READONLY, ALIGN=2
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan
19233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_coeff
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0,  0,  128,    0,   0,  0,   0,  0
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0, -6,  123,   12,  -1,  0,   0,  0
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     2, -11, 108,   36,  -8,  1,   0,  0
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0, -9,   93,   50,  -6,  0,   0,  0
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     3, -16,  77,   77, -16,  3,   0,  0
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0, -6,   50,   93,  -9,  0,   0,  0
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     1, -8,   36,  108, -11,  2,   0,  0
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0, -1,   12,  123,  -6,   0,  0,  0
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0    unsigned char  *src_ptr,
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1    int  src_pixels_per_line,
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r2    int  xoffset,
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r3    int  yoffset,
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r4    unsigned char *dst_ptr,
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan; stack(r5) int  dst_pitch
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_sixtap_predict8x4_neon| PROC
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    push            {r4-r5, lr}
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan    adr             r12, filter8_coeff
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r4, [sp, #12]           ;load parameters from stack
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r5, [sp, #16]           ;load parameters from stack
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan    beq             secondpass_filter8x4_only
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan    add             r2, r12, r2, lsl #5     ;calculate filter location
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan    beq             firstpass_filter8x4_only
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             sp, sp, #32             ;reserve space on stack for temporary storage
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q12, q14
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q13, q15
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan    mov             lr, sp
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             r0, r0, r1, lsl #1
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d1, d24[4]
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d2, d25[0]
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan;First pass: output_height lines x output_width columns (9x8)
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q3}, [r0], r1          ;load src data
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d3, d25[4]
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q4}, [r0], r1
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d4, d26[0]
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q5}, [r0], r1
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d5, d26[4]
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q6}, [r0], r1
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0]
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0, r1]
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0, r1, lsl #1]
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q8, d8, d0
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q9, d10, d0
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q10, d12, d0
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #1
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #1
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #1
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d29, d1
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d30, d1
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d31, d1
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #4
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #4
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #4
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d29, d4
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d30, d4
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d31, d4
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #2
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #2
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #2
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d29, d2
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d30, d2
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d31, d2
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #5
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #5
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #5
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d29, d5
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d30, d5
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d31, d5
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #3
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #3
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #3
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q4, d29, d3
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q5, d30, d3
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q6, d31, d3
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q8, q4
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q9, q5
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q10, q6
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q3}, [r0], r1          ;load src data
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d23, q8, #7
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d24, q9, #7
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d25, q10, #7
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q4}, [r0], r1
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d22}, [lr]!            ;store result
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q5}, [r0], r1
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d23}, [lr]!
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q6}, [r0], r1
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d24}, [lr]!
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q7}, [r0], r1
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d25}, [lr]!
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;first_pass filtering on the rest 5-line data
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q9, d8, d0
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q10, d10, d0
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q11, d12, d0
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q12, d14, d0
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d8, d9, #1
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d10, d11, #1
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d12, d13, #1
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d14, d15, #1
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d28, d1
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d29, d1
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q11, d30, d1
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q12, d31, d1
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d8, d9, #4
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d10, d11, #4
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d12, d13, #4
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d14, d15, #4
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d28, d4
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d29, d4
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q11, d30, d4
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q12, d31, d4
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d8, d9, #2
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d10, d11, #2
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d12, d13, #2
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d14, d15, #2
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d28, d2
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d29, d2
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q11, d30, d2
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q12, d31, d2
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d8, d9, #5
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d10, d11, #5
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d12, d13, #5
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d14, d15, #5
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d28, d5
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d29, d5
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q11, d30, d5
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q12, d31, d5
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d8, d9, #3
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d10, d11, #3
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d12, d13, #3
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d14, d15, #3
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q4, d28, d3
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q5, d29, d3
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q6, d30, d3
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q7, d31, d3
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
223233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q9, q4
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q10, q5
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q11, q6
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q12, q7
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d27, q9, #7
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d28, q10, #7
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d29, q11, #7                ;load intermediate data from stack
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d30, q12, #7
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan;Second pass: 8x4
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan;secondpass_filter
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan    add             r3, r12, r3, lsl #5
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             lr, lr, #32
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q11}, [lr]!
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q7, q5
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q8, q6
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q12}, [lr]!
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d1, d14[4]
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d2, d15[0]
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d3, d15[4]
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d4, d16[0]
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d5, d16[4]
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q4, d23, d0
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q5, d24, d0
257233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q6, d25, d0
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q4, d24, d1
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q5, d25, d1
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q6, d26, d1
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q4, d27, d4
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q5, d28, d4
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q6, d29, d4
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q4, d25, d2
271233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q5, d26, d2
272233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q6, d27, d2
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q4, d28, d5
276233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q5, d29, d5
277233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q6, d30, d5
278233d2500723e5594f3e7c70896ffeeef32b9c950ywan
279233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
280233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q8, d26, d3
281233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q9, d27, d3
282233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q10, d28, d3
283233d2500723e5594f3e7c70896ffeeef32b9c950ywan
284233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
285233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q8, q4
286233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q9, q5
287233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q10, q6
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan
289233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
290233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d7, q8, #7
291233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d8, q9, #7
292233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d9, q10, #7
293233d2500723e5594f3e7c70896ffeeef32b9c950ywan
294233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d6}, [r4], r5          ;store result
295233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d7}, [r4], r5
296233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d8}, [r4], r5
297233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d9}, [r4], r5
298233d2500723e5594f3e7c70896ffeeef32b9c950ywan
299233d2500723e5594f3e7c70896ffeeef32b9c950ywan    add             sp, sp, #32
300233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pop             {r4-r5,pc}
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan;--------------------
303233d2500723e5594f3e7c70896ffeeef32b9c950ywanfirstpass_filter8x4_only
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q12, q14
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q13, q15
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q3}, [r0], r1          ;load src data
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan
310233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q4}, [r0], r1
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d1, d24[4]
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q5}, [r0], r1
314233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d2, d25[0]
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {q6}, [r0], r1
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d3, d25[4]
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d4, d26[0]
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d5, d26[4]
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan;First pass: output_height lines x output_width columns (4x8)
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0]
322233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0, r1]
323233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pld             [r0, r1, lsl #1]
324233d2500723e5594f3e7c70896ffeeef32b9c950ywan
325233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q8, d8, d0
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q9, d10, d0
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q10, d12, d0
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #1
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #1
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #1
334233d2500723e5594f3e7c70896ffeeef32b9c950ywan
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d29, d1
337233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d30, d1
338233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d31, d1
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan
340233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
341233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #4
342233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #4
343233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #4
344233d2500723e5594f3e7c70896ffeeef32b9c950ywan
345233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
346233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q8, d29, d4
347233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q9, d30, d4
348233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q10, d31, d4
349233d2500723e5594f3e7c70896ffeeef32b9c950ywan
350233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
351233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #2
352233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #2
353233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #2
354233d2500723e5594f3e7c70896ffeeef32b9c950ywan
355233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
356233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d29, d2
357233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d30, d2
358233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d31, d2
359233d2500723e5594f3e7c70896ffeeef32b9c950ywan
360233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
361233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #5
362233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #5
363233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #5
364233d2500723e5594f3e7c70896ffeeef32b9c950ywan
365233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
366233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q8, d29, d5
367233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q9, d30, d5
368233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q10, d31, d5
369233d2500723e5594f3e7c70896ffeeef32b9c950ywan
370233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
371233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d29, d8, d9, #3
372233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d30, d10, d11, #3
373233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vext.8          d31, d12, d13, #3
374233d2500723e5594f3e7c70896ffeeef32b9c950ywan
375233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
376233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q4, d29, d3
377233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q5, d30, d3
378233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q6, d31, d3
379233d2500723e5594f3e7c70896ffeeef32b9c950ywan
380233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
381233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q8, q4
382233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q9, q5
383233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q10, q6
384233d2500723e5594f3e7c70896ffeeef32b9c950ywan
385233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
386233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d23, q8, #7
387233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d24, q9, #7
388233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d25, q10, #7
389233d2500723e5594f3e7c70896ffeeef32b9c950ywan
390233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d22}, [r4], r5         ;store result
391233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d23}, [r4], r5
392233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d24}, [r4], r5
393233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d25}, [r4], r5
394233d2500723e5594f3e7c70896ffeeef32b9c950ywan
395233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pop             {r4-r5,pc}
396233d2500723e5594f3e7c70896ffeeef32b9c950ywan
397233d2500723e5594f3e7c70896ffeeef32b9c950ywan;---------------------
398233d2500723e5594f3e7c70896ffeeef32b9c950ywansecondpass_filter8x4_only
399233d2500723e5594f3e7c70896ffeeef32b9c950ywan;Second pass: 8x4
400233d2500723e5594f3e7c70896ffeeef32b9c950ywan    add             r3, r12, r3, lsl #5
401233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sub             r0, r0, r1, lsl #1
402233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
403233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q7, q5
404233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vabs.s32        q8, q6
405233d2500723e5594f3e7c70896ffeeef32b9c950ywan
406233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d22}, [r0], r1
407233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d23}, [r0], r1
408233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d24}, [r0], r1
409233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
410233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d25}, [r0], r1
411233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d1, d14[4]
412233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d26}, [r0], r1
413233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d2, d15[0]
414233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d27}, [r0], r1
415233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d3, d15[4]
416233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d28}, [r0], r1
417233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d4, d16[0]
418233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d29}, [r0], r1
419233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vdup.8          d5, d16[4]
420233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.u8         {d30}, [r0], r1
421233d2500723e5594f3e7c70896ffeeef32b9c950ywan
422233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
423233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q4, d23, d0
424233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q5, d24, d0
425233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q6, d25, d0
426233d2500723e5594f3e7c70896ffeeef32b9c950ywan
427233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
428233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q4, d24, d1
429233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q5, d25, d1
430233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q6, d26, d1
431233d2500723e5594f3e7c70896ffeeef32b9c950ywan
432233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
433233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q4, d27, d4
434233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q5, d28, d4
435233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlsl.u8        q6, d29, d4
436233d2500723e5594f3e7c70896ffeeef32b9c950ywan
437233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
438233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q4, d25, d2
439233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q5, d26, d2
440233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q6, d27, d2
441233d2500723e5594f3e7c70896ffeeef32b9c950ywan
442233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
443233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q4, d28, d5
444233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q5, d29, d5
445233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmlal.u8        q6, d30, d5
446233d2500723e5594f3e7c70896ffeeef32b9c950ywan
447233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
448233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q8, d26, d3
449233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q9, d27, d3
450233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vmull.u8        q10, d28, d3
451233d2500723e5594f3e7c70896ffeeef32b9c950ywan
452233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
453233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q8, q4
454233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q9, q5
455233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q10, q6
456233d2500723e5594f3e7c70896ffeeef32b9c950ywan
457233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
458233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d7, q8, #7
459233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d8, q9, #7
460233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqrshrun.s16    d9, q10, #7
461233d2500723e5594f3e7c70896ffeeef32b9c950ywan
462233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d6}, [r4], r5          ;store result
463233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d7}, [r4], r5
464233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d8}, [r4], r5
465233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.u8         {d9}, [r4], r5
466233d2500723e5594f3e7c70896ffeeef32b9c950ywan
467233d2500723e5594f3e7c70896ffeeef32b9c950ywan    pop             {r4-r5,pc}
468233d2500723e5594f3e7c70896ffeeef32b9c950ywan
469233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ENDP
470233d2500723e5594f3e7c70896ffeeef32b9c950ywan
471233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-----------------
472233d2500723e5594f3e7c70896ffeeef32b9c950ywan
473233d2500723e5594f3e7c70896ffeeef32b9c950ywan    END
474