190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_sixtap_predict8x8_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int  dst_pitch
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sixtap_predict8x8_neon| PROC
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4-r5, lr}
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adrl            r12, filter8_coeff
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #12]           ;load parameters from stack
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r5, [sp, #16]           ;load parameters from stack
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             secondpass_filter8x8_only
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #5     ;calculate filter location
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q14, q15}, [r2]        ;load first_pass filter
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             firstpass_filter8x8_only
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #64             ;reserve space on stack for temporary storage
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             lr, sp
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q12, q14
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q13, q15
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #2                  ;loop counter
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, r1, lsl #1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d24[4]
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d25[0]
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (13x8)
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d25[4]
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d26[0]
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d26[4]
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fp8x8_loop_neon
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d8, d0
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d10, d0
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d12, d0
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #1
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #1
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #1
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d1
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d1
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #4
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #4
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #4
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d4
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d4
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d4
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #2
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #2
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #2
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d2
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d2
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d2
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #5
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #5
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #5
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d5
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d5
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d5
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #3
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #3
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #3
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d29, d3
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d30, d3
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d31, d3
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d23, q8, #7
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d24, q9, #7
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d25, q10, #7
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d22}, [lr]!            ;store result
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d23}, [lr]!
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d24}, [lr]!
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d25}, [lr]!
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_fp8x8_loop_neon
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;first_pass filtering on the rest 5-line data
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vld1.u8            {q3}, [r0], r1          ;load src data
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vld1.u8            {q4}, [r0], r1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vld1.u8            {q5}, [r0], r1
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vld1.u8            {q6}, [r0], r1
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q7}, [r0], r1
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d10, d0
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d12, d0
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d14, d0
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #1         ;construct src_ptr[-1]
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #1
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #1
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #1
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #1
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d27, d1             ;-(src_ptr[-1] * vp8_filter[1])
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d28, d1
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d29, d1
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q11, d30, d1
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q12, d31, d1
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #4         ;construct src_ptr[2]
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #4
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #4
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #4
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #4
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d27, d4             ;-(src_ptr[2] * vp8_filter[4])
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d28, d4
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d29, d4
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q11, d30, d4
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q12, d31, d4
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #2         ;construct src_ptr[0]
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #2
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #2
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #2
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #2
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d27, d2             ;(src_ptr[0] * vp8_filter[2])
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d28, d2
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d29, d2
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d30, d2
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d31, d2
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #5         ;construct src_ptr[3]
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #5
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #5
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #5
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #5
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d27, d5             ;(src_ptr[3] * vp8_filter[5])
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d28, d5
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d29, d5
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d30, d5
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d31, d5
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d27, d6, d7, #3         ;construct src_ptr[1]
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d8, d9, #3
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d10, d11, #3
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d12, d13, #3
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d14, d15, #3
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d27, d3             ;(src_ptr[1] * vp8_filter[3])
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d28, d3
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d29, d3
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d30, d3
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d31, d3
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q3                  ;sum of all (src_data*filter_parameters)
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q4
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q5
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q11, q6
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q12, q7
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #5
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d26, q8, #7             ;shift/round/saturate to u8
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             lr, lr, #64
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d27, q9, #7
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q9}, [lr]!             ;load intermediate data from stack
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d28, q10, #7
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q10}, [lr]!
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d29, q11, #7
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q11}, [lr]!
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q7, q5
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q8, q6
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d30, q12, #7
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q12}, [lr]!
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r3, #2                  ;loop counter
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d14[4]
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d15[0]
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d15[4]
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d16[0]
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d16[4]
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_sp8x8_loop_neon
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d19, d0
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d20, d0
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d21, d0
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d20, d1
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d21, d1
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d22, d1
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d23, d4
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d24, d4
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d25, d4
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d21, d2
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d22, d2
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d23, d2
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d24, d5
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d25, d5
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d26, d5
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d22, d3
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d23, d3
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d24, d3
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r3, r3, #1
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d7, q8, #7
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d8, q9, #7
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d9, q10, #7
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q9, q11
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6}, [r4], r5          ;store result
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q10, q12
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d7}, [r4], r5
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q13
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8}, [r4], r5
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q12, q14
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d9}, [r4], r5
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            d26, d30
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne filt_blk2d_sp8x8_loop_neon
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             sp, sp, #64
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_filter8x8_only
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;add                r2, r12, r2, lsl #5     ;calculate filter location
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vld1.s32       {q14, q15}, [r2]        ;load first_pass filter
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q12, q14
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q13, q15
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #2                  ;loop counter
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, #2              ;move srcptr back to (line-2) and (column-2)
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d24[0]              ;first_pass filter (d0-d5)
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d24[4]
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d25[0]
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d25[4]
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d26[0]
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d26[4]
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (8x8)
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fpo8x8_loop_neon
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1          ;load src data
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q6}, [r0], r1
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d6, d0              ;(src_ptr[-2] * vp8_filter[0])
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d8, d0
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d10, d0
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d12, d0
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #1         ;construct src_ptr[-1]
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #1
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #1
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #1
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d1             ;-(src_ptr[-1] * vp8_filter[1])
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d1
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d1
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d1
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #4         ;construct src_ptr[2]
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #4
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #4
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #4
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q7, d28, d4             ;-(src_ptr[2] * vp8_filter[4])
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q8, d29, d4
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q9, d30, d4
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q10, d31, d4
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #2         ;construct src_ptr[0]
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #2
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #2
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #2
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d2             ;(src_ptr[0] * vp8_filter[2])
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d2
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d2
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d2
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #5         ;construct src_ptr[3]
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #5
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #5
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #5
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d28, d5             ;(src_ptr[3] * vp8_filter[5])
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d29, d5
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d30, d5
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d31, d5
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d28, d6, d7, #3         ;construct src_ptr[1]
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d29, d8, d9, #3
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d30, d10, d11, #3
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d31, d12, d13, #3
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d28, d3             ;(src_ptr[1] * vp8_filter[3])
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d29, d3
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d30, d3
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d31, d3
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d22, q7, #7             ;shift/round/saturate to u8
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d23, q8, #7
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d24, q9, #7
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d25, q10, #7
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d22}, [r4], r5         ;store result
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d23}, [r4], r5
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d24}, [r4], r5
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d25}, [r4], r5
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_fpo8x8_loop_neon
41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter8x8_only
42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r0, r0, r1, lsl #1
42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #5
42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d18}, [r0], r1         ;load src data
42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {q5, q6}, [r3]          ;load second_pass filter
42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d19}, [r0], r1
43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q7, q5
43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d20}, [r0], r1
43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabs.s32        q8, q6
43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d21}, [r0], r1
43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r3, #2                  ;loop counter
43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22}, [r0], r1
43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d14[0]              ;second_pass filter parameters (d0-d5)
43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d23}, [r0], r1
43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d14[4]
43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24}, [r0], r1
44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d2, d15[0]
44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d25}, [r0], r1
44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d3, d15[4]
44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26}, [r0], r1
44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d4, d16[0]
44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d27}, [r0], r1
44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d5, d16[4]
44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28}, [r0], r1
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d29}, [r0], r1
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30}, [r0], r1
45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_spo8x8_loop_neon
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d18, d0             ;(src_ptr[-2] * vp8_filter[0])
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d19, d0
45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d20, d0
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d21, d0
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d19, d1             ;-(src_ptr[-1] * vp8_filter[1])
45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d20, d1
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d21, d1
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d22, d1
46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q3, d22, d4             ;-(src_ptr[2] * vp8_filter[4])
46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q4, d23, d4
46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q5, d24, d4
46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlsl.u8        q6, d25, d4
46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d20, d2             ;(src_ptr[0] * vp8_filter[2])
46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d21, d2
47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d22, d2
47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d23, d2
47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d23, d5             ;(src_ptr[3] * vp8_filter[5])
47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d24, d5
47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d25, d5
47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d26, d5
47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d21, d3             ;(src_ptr[1] * vp8_filter[3])
47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d22, d3
48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d23, d3
48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d24, d3
48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r3, r3, #1
48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q7, q3                  ;sum of all (src_data*filter_parameters)
48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q8, q4
48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q9, q5
48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q10, q6
48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d6, q7, #7              ;shift/round/saturate to u8
49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d7, q8, #7
49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d8, q9, #7
49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrun.s16    d9, q10, #7
49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q9, q11
49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6}, [r4], r5          ;store result
49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q10, q12
49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d7}, [r4], r5
49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q13
50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8}, [r4], r5
50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q12, q14
50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d9}, [r4], r5
50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            d26, d30
50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne filt_blk2d_spo8x8_loop_neon
50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
51279f15823c34ae1e423108295e416213200bb280fAndreas Huber
51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilter8_coeff
51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0,  0,  128,    0,   0,  0,   0,  0
51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0, -6,  123,   12,  -1,  0,   0,  0
51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     2, -11, 108,   36,  -8,  1,   0,  0
51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0, -9,   93,   50,  -6,  0,   0,  0
51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     3, -16,  77,   77, -16,  3,   0,  0
51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0, -6,   50,   93,  -9,  0,   0,  0
52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     1, -8,   36,  108, -11,  2,   0,  0
52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0, -1,   12,  123,  -6,   0,  0,  0
52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
524