190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1279f15823c34ae1e423108295e416213200bb280fAndreas Huber    EXPORT  |vp8_sub_pixel_variance16x16_neon_func|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pixels_per_line,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r6) unsigned int *sse
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2779f15823c34ae1e423108295e416213200bb280fAndreas Huber|vp8_sub_pixel_variance16x16_neon_func| PROC
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4-r6, lr}
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, BilinearTaps_coeff
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #16]           ;load *dst_ptr from stack
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r5, [sp, #20]           ;load dst_pixels_per_line from stack
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r6, [sp, #24]           ;load *sse from stack
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             secondpass_bfilter16x16_only
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #3     ;calculate filter location
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s32        {d31}, [r2]             ;load first_pass filter
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             firstpass_bfilter16x16_only
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #272            ;reserve space on stack for temporary storage
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             lr, sp
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #3                  ;loop counter
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (17x16)
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_fp16x16_loop_neon
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d2, d0              ;(src_ptr[0] * Filter[0])
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d3, d0
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d5, d0
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d6, d0
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d8, d0
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d9, d0
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d11, d0
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d12, d0
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * Filter[1])
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d5, d1
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d8, d1
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d11, d1
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * Filter[1])
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d6, d1
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d9, d1
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d12, d1
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q8, #7
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q9, #7
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q10, #7
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q11, #7
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q12, #7
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d20, q13, #7
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d21, q14, #7
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15, d16, d17}, [lr]!     ;store result
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19, d20, d21}, [lr]!
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             vp8e_filt_blk2d_fp16x16_loop_neon
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First-pass filtering for rest 5 lines
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d14, d15, d16}, [r0], r1
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d2, d0              ;(src_ptr[0] * Filter[0])
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d3, d0
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d5, d0
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d6, d0
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d8, d0
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d9, d0
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d2, d1              ;(src_ptr[0] * Filter[1])
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d5, d1
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d8, d1
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d3, d1             ;(src_ptr[0] * Filter[1])
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d6, d1
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d9, d1
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d11, d0
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d12, d0
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d14, d0
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d15, d0
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1       ;construct src_ptr[1]
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d14, d14, d15, #1
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d11, d1             ;(src_ptr[0] * Filter[1])
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d14, d1
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d15, d15, d16, #1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d12, d1             ;(src_ptr[0] * Filter[1])
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d15, d1
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d10, q9, #7              ;shift/round/saturate to u8
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d11, q10, #7
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d12, q11, #7
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d13, q12, #7
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q13, #7
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q14, #7
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q1, #7
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q2, #7
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q3, #7
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q4, #7
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d10, d11, d12, d13}, [lr]!         ;store result
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15, d16, d17}, [lr]!
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19}, [lr]!
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             lr, lr, #272
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]             ;load second_pass filter
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #256
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r3, sp
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22, d23}, [lr]!       ;load src data
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #4                 ;loop counter
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_sp16x16_loop_neon
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24, d25}, [lr]!
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * Filter[0])
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26, d27}, [lr]!
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28, d29}, [lr]!
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30, d31}, [lr]!
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * Filter[1])
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d25, d1
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d26, d1
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d27, d1
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d28, d1
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d29, d1
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d30, d1
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d31, d1
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d3, q2, #7
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d4, q3, #7
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d5, q4, #7
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d6, q5, #7
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d7, q6, #7
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d8, q7, #7
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d9, q8, #7
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d2, d3}, [r3]!         ;store result
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d4, d5}, [r3]!
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6, d7}, [r3]!
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q15
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8, d9}, [r3]!
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             vp8e_filt_blk2d_sp16x16_loop_neon
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               sub_pixel_variance16x16_neon
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_bfilter16x16_only
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r2, #4                      ;loop counter
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #528            ;reserve space on stack for temporary storage
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]                  ;first_pass filter (d0 d1)
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r3, sp
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (16x16)
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_fpo16x16_loop_neon
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d2, d3, d4}, [r0], r1      ;load src data
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d5, d6, d7}, [r0], r1
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d8, d9, d10}, [r0], r1
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d11, d12, d13}, [r0], r1
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0]
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1]
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pld             [r0, r1, lsl #1]
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d2, d0              ;(src_ptr[0] * Filter[0])
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d3, d0
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d5, d0
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d6, d0
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q11, d8, d0
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q12, d9, d0
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q13, d11, d0
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q14, d12, d0
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d2, d2, d3, #1          ;construct src_ptr[1]
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d5, d6, #1
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d8, d8, d9, #1
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d11, d12, #1
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d2, d1              ;(src_ptr[0] * Filter[1])
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d5, d1
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q11, d8, d1
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q13, d11, d1
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d3, d4, #1
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d6, d6, d7, #1
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d9, d10, #1
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d12, d12, d13, #1
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d3, d1              ;(src_ptr[0] * Filter[1])
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d6, d1
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q12, d9, d1
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q14, d12, d1
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r2, r2, #1
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d14, q7, #7              ;shift/round/saturate to u8
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d15, q8, #7
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d16, q9, #7
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d17, q10, #7
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d18, q11, #7
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d19, q12, #7
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d20, q13, #7
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d14, d15}, [r3]!       ;store result
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d21, q14, #7
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d16, d17}, [r3]!
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d18, d19}, [r3]!
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d20, d21}, [r3]!
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             vp8e_filt_blk2d_fpo16x16_loop_neon
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               sub_pixel_variance16x16_neon
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_bfilter16x16_only
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             sp, sp, #528            ;reserve space on stack for temporary storage
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #4                     ;loop counter
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]                 ;load second_pass filter
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22, d23}, [r0], r1        ;load src data
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r3, sp
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]                  ;second_pass filter parameters (d0 d1)
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_spo16x16_loop_neon
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24, d25}, [r0], r1
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * Filter[0])
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26, d27}, [r0], r1
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28, d29}, [r0], r1
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30, d31}, [r0], r1
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d24, d1             ;(src_ptr[pixel_step] * Filter[1])
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d25, d1
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d26, d1
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d27, d1
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d28, d1
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d29, d1
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d30, d1
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d31, d1
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d3, q2, #7
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d4, q3, #7
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d5, q4, #7
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d6, q5, #7
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d7, q6, #7
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d8, q7, #7
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d9, q8, #7
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d2, d3}, [r3]!         ;store result
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d4, d5}, [r3]!
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q15
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6, d7}, [r3]!
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8, d9}, [r3]!
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             vp8e_filt_blk2d_spo16x16_loop_neon
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               sub_pixel_variance16x16_neon
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------------------
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;variance16x16
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance16x16_neon
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q8, #0                      ;q8 - sum
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q9, #0                      ;q9, q10 - sse
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q10, #0
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub             r3, r3, #256
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #8
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance16x16_neon_loop
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {q0}, [r3]!                 ;Load up source and reference
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {q2}, [r4], r5
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {q1}, [r3]!
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {q3}, [r4], r5
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q11, d0, d4                 ;diff
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q12, d1, d5
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q13, d2, d6
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q14, d3, d7
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q11                     ;sum
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d22, d22                ;sse
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d23, d23
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q12
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d24, d24
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d25, d25
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q13
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d26, d26
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d27, d27
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q14
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d28, d28
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d29, d29
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             sub_pixel_variance16x16_neon_loop
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u32        q10, q9, q10                ;accumulate sse
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpaddl.s32      q0, q8                      ;accumulate sum
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpaddl.u32      q1, q10
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s64        d0, d0, d1
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u64        d1, d2, d3
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.s32       q5, d0, d0
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.32         {d1[0]}, [r6]               ;store sse
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s32        d10, d10, #8
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsub.s32        d0, d1, d10
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             sp, sp, #528
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.32         r0, d0[0]                   ;return
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r6,pc}
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
41979f15823c34ae1e423108295e416213200bb280fAndreas Huber
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_taps_coeff
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
424