190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_loop_filter_simple_horizontal_edge_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;are equal. So, in the code, only one load is needed
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;for flimit. Same way applies to limit and thresh.
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0    unsigned char *s,
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1    int p, //pitch
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2    const signed char *flimit,
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3    const signed char *limit,
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) const signed char *thresh,
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; //stack(r5)   int count --unused
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_horizontal_edge_neon| PROC
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         r0, r0, r1, lsl #1          ; move src pointer down by 2 lines
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr         r12, lfhy_coeff
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q5}, [r0], r1              ; p1
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s8     {d2[], d3[]}, [r2]          ; flimit
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.s8     {d26[], d27[]}, [r3]        ; limit -> q13
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q6}, [r0], r1              ; p0
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q0}, [r12]!                ; 0x80
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q7}, [r0], r1              ; q0
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q10}, [r12]!               ; 0x03
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q8}, [r0]                  ; q1
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_filter_mask() function
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabd.u8     q15, q6, q7                 ; abs(p0 - q0)
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vabd.u8     q14, q5, q8                 ; abs(p1 - q1)
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.u8    q15, q15, q15               ; abs(p0 - q0) * 2
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.u8     q14, q14, #1                ; abs(p1 - q1) / 2
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.u8    q15, q15, q14               ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_filter() function
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u8     q1, q1, q1                  ; flimit * 2
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.u8     q1, q1, q13                 ; flimit * 2 + limit
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vcge.u8     q15, q1, q15                ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubl.s8    q3, d15, d13
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s8    q4, q5, q8                  ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vmul.i8    q2, q2, q10                 ;  3 * ( qs0 - ps0)
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q11, q2, q2                 ;  3 * ( qs0 - ps0)
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q12, q3, q3
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.u8     {q9}, [r12]!                ; 0x04
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q2, q2, q11
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadd.s16    q3, q3, q12
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddw.s8    q2, q2, d8                  ; vp8_filter + 3 * ( qs0 - ps0)
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddw.s8    q3, q3, d9
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vqadd.s8   q4, q4, q2                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqmovn.s16  d8, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqmovn.s16  d9, q3
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;;;;
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vand        q4, q4, q15                 ; vp8_filter &= mask
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q2, q4, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q4, q4, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s8     q4, q4, #3                  ; Filter1 >>= 3
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         r0, r0, r1, lsl #1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;calculate output
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s8    q10, q7, q4                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add         r3, r0, r1
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q6, q11, q0                 ; *op0 = u^0x80
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    veor        q7, q10, q0                 ; *oq0 = u^0x80
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8     {q6}, [r0]                  ; store op0
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.u8     {q7}, [r3]                  ; store oq0
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bx          lr
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP        ; |vp8_loop_filter_simple_horizontal_edge_neon|
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
10779f15823c34ae1e423108295e416213200bb280fAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberlfhy_coeff
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
114