190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_loop_filter_simple_vertical_edge_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;are equal. So, in the code, only one load is needed
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;for flimit. Same way applies to limit and thresh.
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char *s,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int p, //pitch
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    const signed char *flimit,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    const signed char *limit,
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) const signed char *thresh,
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; //stack(r5)   int count --unused
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_vertical_edge_neon| PROC
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         r0, r0, #2                  ; move src pointer down by 2 columns
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s8     {d2[], d3[]}, [r2]          ; flimit
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s8     {d26[], d27[]}, [r3]        ; limit -> q13
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
35d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr         r12, vlfy_coeff
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q0}, [r12]!                ; 0x80
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q11}, [r12]!               ; 0x03
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q12}, [r12]!               ; 0x04
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp        d7, d10
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp        d12, d9
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vswp       q4, q5                      ; p1:q3, p0:q5, q0:q4, q1:q6
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_filter_mask() function
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_hevmask() function
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         r0, r0, r1, lsl #4
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabd.u8     q15, q5, q4                 ; abs(p0 - q0)
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabd.u8     q14, q3, q6                 ; abs(p1 - q1)
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.u8    q15, q15, q15               ; abs(p0 - q0) * 2
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.u8     q14, q14, #1                ; abs(p1 - q1) / 2
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.u8    q15, q15, q14               ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q4, q4, q0                  ; qs0: q0 offset to convert to a signed value
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q5, q5, q0                  ; ps0: p0 offset to convert to a signed value
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q3, q3, q0                  ; ps1: p1 offset to convert to a signed value
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q6, q6, q0                  ; qs1: q1 offset to convert to a signed value
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u8     q1, q1, q1                  ; flimit * 2
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u8     q1, q1, q13                 ; flimit * 2 + limit
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vcge.u8     q15, q1, q15                ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_filter() function
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vqsub.s8   q2, q5, q4                  ; ( qs0 - ps0)
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.s8    q2, d8, d10                 ; ( qs0 - ps0)
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.s8    q13, d9, d11
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s8    q1, q3, q6                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vmul.i8    q2, q2, q11                 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q10, q2, q2                 ;  3 * ( qs0 - ps0)
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q14, q13, q13
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q2, q2, q10
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q13, q13, q14
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vqadd.s8   q1, q1, q2
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddw.s8    q13, q13, d3
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqmovn.s16  d3, q13
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r0, r0, #1
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r2, r0, r1
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;;
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand        q1, q1, q15                 ; vp8_filter &= mask
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q2, q1, q11                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q1, q1, q12                 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;calculate output
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s8    q10, q4, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q11, q5, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q7, q10, q0                 ; *oq0 = u^0x80
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q6, q11, q0                 ; *op0 = u^0x80
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r3, r2, r1
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp        d13, d14
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r12, r3, r1
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;store op1, op0, oq0, oq1
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[0], d13[0]}, [r0]
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[1], d13[1]}, [r2]
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[2], d13[2]}, [r3]
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[3], d13[3]}, [r12], r1
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r0, r12, r1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[4], d13[4]}, [r12]
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[5], d13[5]}, [r0], r1
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r2, r0, r1
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[6], d13[6]}, [r0]
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d12[7], d13[7]}, [r2], r1
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r3, r2, r1
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[0], d15[0]}, [r2]
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[1], d15[1]}, [r3], r1
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r12, r3, r1
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[2], d15[2]}, [r3]
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[3], d15[3]}, [r12], r1
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r0, r12, r1
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[4], d15[4]}, [r12]
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[5], d15[5]}, [r0], r1
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r2, r0, r1
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[6], d15[6]}, [r0]
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst2.8      {d14[7], d15[7]}, [r2]
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bx          lr
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP        ; |vp8_loop_filter_simple_vertical_edge_neon|
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
14879f15823c34ae1e423108295e416213200bb280fAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervlfy_coeff
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
155