190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_bilinear_predict16x16_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r4    unsigned char *dst_ptr,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int  dst_pitch
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_bilinear_predict16x16_neon| PROC
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4-r5, lr}
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, bifilter16_coeff
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #12]           ;load parameters from stack
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r5, [sp, #16]           ;load parameters from stack
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             secondpass_bfilter16x16_only
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #3     ;calculate filter location
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {d31}, [r2]             ;load first_pass filter
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             firstpass_bfilter16x16_only
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #272            ;reserve space on stack for temporary storage
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             lr, sp
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #3                  ;loop counter
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (17x16)
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fp16x16_loop_neon
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d3, d0
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d5, d0
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d6, d0
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d8, d0
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d9, d0
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d11, d0
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d12, d0
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d5, d1
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d8, d1
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d11, d1
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d6, d1
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d9, d1
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d12, d1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q8, #7
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q9, #7
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q10, #7
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q11, #7
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q12, #7
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d20, q13, #7
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d21, q14, #7
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15, d16, d17}, [lr]!     ;store result
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19, d20, d21}, [lr]!
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_fp16x16_loop_neon
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First-pass filtering for rest 5 lines
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d14, d15, d16}, [r0], r1
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d2, d0              ;(src_ptr[0] * vp8_filter[0])
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d3, d0
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d5, d0
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d6, d0
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d8, d0
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d9, d0
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d2, d1              ;(src_ptr[0] * vp8_filter[1])
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d5, d1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d8, d1
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d3, d1             ;(src_ptr[0] * vp8_filter[1])
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d6, d1
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d9, d1
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d11, d0
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d12, d0
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d14, d0
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d15, d0
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1       ;construct src_ptr[1]
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d14, d14, d15, #1
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d11, d1             ;(src_ptr[0] * vp8_filter[1])
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d14, d1
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d15, d15, d16, #1
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d12, d1             ;(src_ptr[0] * vp8_filter[1])
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d15, d1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d10, q9, #7              ;shift/round/saturate to u8
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d11, q10, #7
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d12, q11, #7
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d13, q12, #7
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q13, #7
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q14, #7
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q1, #7
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q2, #7
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q3, #7
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q4, #7
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d10, d11, d12, d13}, [lr]!         ;store result
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15, d16, d17}, [lr]!
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19}, [lr]!
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             lr, lr, #272
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]             ;load second_pass filter
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22, d23}, [lr]!       ;load src data
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #4                 ;loop counter
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_sp16x16_loop_neon
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24, d25}, [lr]!
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26, d27}, [lr]!
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28, d29}, [lr]!
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30, d31}, [lr]!
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d25, d1
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d26, d1
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d27, d1
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d28, d1
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d29, d1
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d30, d1
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d31, d1
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d3, q2, #7
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d4, q3, #7
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d5, q4, #7
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d6, q5, #7
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d7, q6, #7
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d8, q7, #7
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d9, q8, #7
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d2, d3}, [r4], r5      ;store result
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d4, d5}, [r4], r5
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6, d7}, [r4], r5
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q15
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8, d9}, [r4], r5
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_sp16x16_loop_neon
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             sp, sp, #272
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_bfilter16x16_only
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #4                      ;loop counter
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]                  ;first_pass filter (d0 d1)
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (16x16)
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fpo16x16_loop_neon
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d2, d0              ;(src_ptr[0] * vp8_filter[0])
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d3, d0
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d5, d0
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d6, d0
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d8, d0
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d9, d0
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d11, d0
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d12, d0
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * vp8_filter[1])
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d5, d1
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d8, d1
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d11, d1
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * vp8_filter[1])
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d6, d1
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d9, d1
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d12, d1
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q8, #7
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q9, #7
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q10, #7
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q11, #7
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q12, #7
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d20, q13, #7
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15}, [r4], r5        ;store result
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d21, q14, #7
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d16, d17}, [r4], r5
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19}, [r4], r5
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d20, d21}, [r4], r5
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_fpo16x16_loop_neon
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_bfilter16x16_only
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #4                     ;loop counter
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]                 ;load second_pass filter
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22, d23}, [r0], r1        ;load src data
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]                  ;second_pass filter parameters (d0 d1)
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_spo16x16_loop_neon
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24, d25}, [r0], r1
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26, d27}, [r0], r1
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28, d29}, [r0], r1
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30, d31}, [r0], r1
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d25, d1
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d26, d1
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d27, d1
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d28, d1
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d29, d1
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d30, d1
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d31, d1
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d3, q2, #7
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d4, q3, #7
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d5, q4, #7
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d6, q5, #7
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d7, q6, #7
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d8, q7, #7
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d9, q8, #7
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d2, d3}, [r4], r5      ;store result
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d4, d5}, [r4], r5
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q15
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6, d7}, [r4], r5
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8, d9}, [r4], r5
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             filt_blk2d_spo16x16_loop_neon
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5,pc}
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
35379f15823c34ae1e423108295e416213200bb280fAndreas Huber
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbifilter16_coeff
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
358