190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_sub_pixel_variance8x8_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pixels_per_line,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r6) unsigned int *sse
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;note: most of the code is copied from bilinear_predict8x8_neon and vp8_variance8x8_neon.
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sub_pixel_variance8x8_neon| PROC
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4-r5, lr}
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, BilinearTaps_coeff
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #12]           ;load *dst_ptr from stack
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r5, [sp, #16]           ;load dst_pixels_per_line from stack
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             lr, [sp, #20]           ;load *sse from stack
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             skip_firstpass_filter
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (9x8)
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #3     ;calculate filter location
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q1}, [r0], r1          ;load src data
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r2]             ;load first_pass filter
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q2}, [r0], r1
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d2, d0              ;(src_ptr[0] * Filter[0])
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d4, d0
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d4, d5, #1
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d7, d6, d7, #1
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d8, d9, #1
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * Filter[1])
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d5, d1
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d7, d1
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d9, d1
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q1}, [r0], r1          ;load src data
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q2}, [r0], r1
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d23, q7, #7
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d24, q8, #7
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d25, q9, #7
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;first_pass filtering on the rest 5-line data
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d2, d0              ;(src_ptr[0] * Filter[0])
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d4, d0
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d10, d0
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d4, d5, #1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d7, d6, d7, #1
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d8, d9, #1
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d10, d11, #1
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * Filter[1])
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d5, d1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d7, d1
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d9, d1
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d11, d1
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d26, q6, #7              ;shift/round/saturate to u8
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d27, q7, #7
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d28, q8, #7
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d29, q9, #7
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d30, q10, #7
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;skip_secondpass_filter
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             sub_pixel_variance8x8_neon
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]             ;load second_pass filter
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * Filter[0])
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * Filter[1])
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d24, d1
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d25, d1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d26, d1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d27, d1
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d28, d1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d29, d1
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d30, d1
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d22, q1, #7              ;shift/round/saturate to u8
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d23, q2, #7
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d24, q3, #7
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d25, q4, #7
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d26, q5, #7
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d27, q6, #7
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d28, q7, #7
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d29, q8, #7
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               sub_pixel_variance8x8_neon
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_firstpass_filter
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22}, [r0], r1         ;load src data
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d23}, [r0], r1
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24}, [r0], r1
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d25}, [r0], r1
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26}, [r0], r1
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d27}, [r0], r1
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28}, [r0], r1
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d29}, [r0], r1
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30}, [r0], r1
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               secondpass_filter
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------------
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;vp8_variance8x8_neon
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance8x8_neon
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q8, #0                      ;q8 - sum
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q9, #0                      ;q9, q10 - sse
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.i8         q10, #0
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov             r12, #2
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance8x8_neon_loop
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {d0}, [r4], r5              ;load dst data
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs            r12, r12, #1
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {d1}, [r4], r5
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {d2}, [r4], r5
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q4, d22, d0                 ;calculate diff
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.8          {d3}, [r4], r5
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q5, d23, d1
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q6, d24, d2
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q4                      ;sum
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d8, d8                  ;sse
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d9, d9
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.u8        q7, d25, d3
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q5
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d10, d10
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d11, d11
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q11, q13
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q6
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d12, d12
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d13, d13
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov            q12, q14
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpadal.s16      q8, q7
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q9, d14, d14
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.s16       q10, d15, d15
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne             sub_pixel_variance8x8_neon_loop
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u32        q10, q9, q10                ;accumulate sse
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpaddl.s32      q0, q8                      ;accumulate sum
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpaddl.u32      q1, q10
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s64        d0, d0, d1
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u64        d1, d2, d3
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.s32       q5, d0, d0
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.32         {d1[0]}, [lr]               ;store sse
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s32        d10, d10, #6
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsub.s32        d0, d1, d10
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmov.32         r0, d0[0]                   ;return
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4-r5, pc}
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
21879f15823c34ae1e423108295e416213200bb280fAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_taps_coeff
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
223