190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_sixtap_predict8x4_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
18d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel
19d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvelfilter8_coeff
20d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     0,  0,  128,    0,   0,  0,   0,  0
21d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     0, -6,  123,   12,  -1,  0,   0,  0
22d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     2, -11, 108,   36,  -8,  1,   0,  0
23d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     0, -9,   93,   50,  -6,  0,   0,  0
24d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     3, -16,  77,   77, -16,  3,   0,  0
25d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     0, -6,   50,   93,  -9,  0,   0,  0
26d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     1, -8,   36,  108, -11,  2,   0,  0
27d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    DCD     0, -1,   12,  123,  -6,   0,  0,  0
28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel
29d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel;-----------------
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r4    unsigned char *dst_ptr,
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int  dst_pitch
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sixtap_predict8x4_neon| PROC
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4-r5, lr}
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, filter8_coeff
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #12]           ;load parameters from stack
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r5, [sp, #16]           ;load parameters from stack
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             secondpass_filter8x4_only
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #5     ;calculate filter location
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             firstpass_filter8x4_only
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #32             ;reserve space on stack for temporary storage
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q12, q14
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q13, q15
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             lr, sp
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, r1, lsl #1
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d24[4]
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d25[0]
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (9x8)
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d25[4]
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d26[0]
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d26[4]
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d8, d0
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d10, d0
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d12, d0
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #1
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #1
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #1
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d1
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d1
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d1
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #4
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #4
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #4
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d4
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d4
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d4
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #2
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #2
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #2
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d2
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d2
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d2
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #5
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #5
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #5
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d5
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d5
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d5
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #3
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #3
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #3
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d29, d3
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d30, d3
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d31, d3
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d23, q8, #7
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d24, q9, #7
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d25, q10, #7
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d22}, [lr]!            ;store result
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d23}, [lr]!
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d24}, [lr]!
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q7}, [r0], r1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d25}, [lr]!
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;first_pass filtering on the rest 5-line data
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d10, d0
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d12, d0
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d14, d0
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #1
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #1
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #1
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #1
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d28, d1
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d29, d1
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q11, d30, d1
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q12, d31, d1
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #4
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #4
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #4
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #4
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d28, d4
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d29, d4
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q11, d30, d4
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q12, d31, d4
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #2
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #2
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #2
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #2
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d28, d2
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d29, d2
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d30, d2
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d31, d2
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #5
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #5
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #5
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #5
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d28, d5
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d29, d5
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d30, d5
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d31, d5
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #3
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #3
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #3
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #3
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d28, d3
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d29, d3
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d30, d3
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d31, d3
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q4
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q5
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q11, q6
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q12, q7
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d27, q9, #7
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d28, q10, #7
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d29, q11, #7                ;load intermediate data from stack
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d30, q12, #7
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x4
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #5
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             lr, lr, #32
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q11}, [lr]!
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q7, q5
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q8, q6
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q12}, [lr]!
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d14[4]
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d15[0]
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d15[4]
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d16[0]
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d16[4]
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d23, d0
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d24, d0
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d25, d0
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d24, d1
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d25, d1
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d26, d1
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d27, d4
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d28, d4
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d29, d4
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d25, d2
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d26, d2
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d27, d2
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d28, d5
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d29, d5
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d30, d5
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d26, d3
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d27, d3
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d28, d3
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d7, q8, #7
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d8, q9, #7
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d9, q10, #7
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6}, [r4], r5          ;store result
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d7}, [r4], r5
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8}, [r4], r5
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d9}, [r4], r5
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             sp, sp, #32
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_filter8x4_only
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q12, q14
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q13, q15
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d24[4]
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d25[0]
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d25[4]
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d26[0]
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d26[4]
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (4x8)
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d8, d0
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d10, d0
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d12, d0
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #1
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #1
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #1
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d1
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d1
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d1
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #4
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #4
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #4
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d4
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d4
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d4
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #2
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #2
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #2
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d2
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d2
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d2
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #5
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #5
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #5
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d5
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d5
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d5
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #3
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #3
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #3
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d29, d3
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d30, d3
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d31, d3
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d23, q8, #7
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d24, q9, #7
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d25, q10, #7
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d22}, [r4], r5         ;store result
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d23}, [r4], r5
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d24}, [r4], r5
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d25}, [r4], r5
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter8x4_only
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x4
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #5
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, r1, lsl #1
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q7, q5
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q8, q6
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22}, [r0], r1
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d23}, [r0], r1
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24}, [r0], r1
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d25}, [r0], r1
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d14[4]
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26}, [r0], r1
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d15[0]
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d27}, [r0], r1
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d15[4]
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28}, [r0], r1
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d16[0]
41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d29}, [r0], r1
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d16[4]
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30}, [r0], r1
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d22, d0             ;(src_ptr[-2] * vp8_filter[0])
42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d23, d0
42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d24, d0
42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d25, d0
42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d23, d1             ;-(src_ptr[-1] * vp8_filter[1])
42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d24, d1
43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d25, d1
43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d26, d1
43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d26, d4             ;-(src_ptr[2] * vp8_filter[4])
43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d27, d4
43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d28, d4
43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d29, d4
43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d24, d2             ;(src_ptr[0] * vp8_filter[2])
43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d25, d2
44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d26, d2
44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d27, d2
44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d27, d5             ;(src_ptr[3] * vp8_filter[5])
44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d28, d5
44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d29, d5
44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d30, d5
44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d25, d3             ;(src_ptr[1] * vp8_filter[3])
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d26, d3
45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d27, d3
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d28, d3
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d7, q8, #7
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d8, q9, #7
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d9, q10, #7
46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6}, [r4], r5          ;store result
46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d7}, [r4], r5
46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8}, [r4], r5
46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d9}, [r4], r5
46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
473