190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_bilinear_predict8x8_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char  *src_ptr,
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int  src_pixels_per_line,
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    int  xoffset,
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    int  yoffset,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r4    unsigned char *dst_ptr,
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(lr) int  dst_pitch
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_bilinear_predict8x8_neon| PROC
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push            {r4, lr}
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, bifilter8_coeff
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             r4, [sp, #8]            ;load parameters from stack
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr             lr, [sp, #12]           ;load parameters from stack
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r2, #0                  ;skip first_pass filter if xoffset=0
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             skip_firstpass_filter
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (9x8)
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r2, r12, r2, lsl #3     ;calculate filter location
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q1}, [r0], r1          ;load src data
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r2]             ;load first_pass filter
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q2}, [r0], r1
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;first_pass filter (d0 d1)
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d4, d0
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d4, d5, #1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d7, d6, d7, #1
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d8, d9, #1
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d5, d1
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d7, d1
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d9, d1
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q1}, [r0], r1          ;load src data
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d22, q6, #7              ;shift/round/saturate to u8
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q2}, [r0], r1
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d23, q7, #7
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q3}, [r0], r1
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d24, q8, #7
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q4}, [r0], r1
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d25, q9, #7
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;first_pass filtering on the rest 5-line data
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {q5}, [r0], r1
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d2, d0              ;(src_ptr[0] * vp8_filter[0])
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d4, d0
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d6, d0
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q9, d8, d0
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q10, d10, d0
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d3, d2, d3, #1          ;construct src_ptr[-1]
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d5, d4, d5, #1
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d7, d6, d7, #1
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d9, d8, d9, #1
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vext.8          d11, d10, d11, #1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d3, d1              ;(src_ptr[1] * vp8_filter[1])
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d5, d1
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d7, d1
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q9, d9, d1
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q10, d11, d1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d26, q6, #7              ;shift/round/saturate to u8
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d27, q7, #7
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d28, q8, #7
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d29, q9, #7
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d30, q10, #7
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp             r3, #0                  ;skip second_pass filter if yoffset=0
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq             skip_secondpass_filter
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r12, r3, lsl #3
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r0, r4, lr
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u32        {d31}, [r3]             ;load second_pass filter
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r1, r0, lr
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d0, d31[0]              ;second_pass filter parameters (d0 d1)
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vdup.8          d1, d31[4]
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q1, d22, d0             ;(src_ptr[0] * vp8_filter[0])
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q2, d23, d0
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q3, d24, d0
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q4, d25, d0
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q5, d26, d0
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q6, d27, d0
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q7, d28, d0
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmull.u8        q8, d29, d0
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q1, d23, d1             ;(src_ptr[pixel_step] * vp8_filter[1])
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q2, d24, d1
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q3, d25, d1
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q4, d26, d1
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q5, d27, d1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q6, d28, d1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q7, d29, d1
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmlal.u8        q8, d30, d1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d2, q1, #7               ;shift/round/saturate to u8
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d3, q2, #7
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d4, q3, #7
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d5, q4, #7
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d6, q5, #7
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d7, q6, #7
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d8, q7, #7
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqrshrn.u16    d9, q8, #7
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d2}, [r4]              ;store result
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d3}, [r0]
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d4}, [r1], lr
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d5}, [r1], lr
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d6}, [r1], lr
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d7}, [r1], lr
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d8}, [r1], lr
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d9}, [r1], lr
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4, pc}
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_firstpass_filter
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d22}, [r0], r1         ;load src data
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d23}, [r0], r1
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d24}, [r0], r1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d25}, [r0], r1
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d26}, [r0], r1
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d27}, [r0], r1
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d28}, [r0], r1
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d29}, [r0], r1
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8         {d30}, [r0], r1
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b               secondpass_filter
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_secondpass_filter
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d22}, [r4], lr         ;store result
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d23}, [r4], lr
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d24}, [r4], lr
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d25}, [r4], lr
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d26}, [r4], lr
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d27}, [r4], lr
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d28}, [r4], lr
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8         {d29}, [r4], lr
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop             {r4, pc}
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
17979f15823c34ae1e423108295e416213200bb280fAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbifilter8_coeff
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
184