190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA    |.text|, CODE, READONLY  ; name this block of code
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    MACRO
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; a0: 03 02 01 00
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; a1: 13 12 11 10
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; a2: 23 22 21 20
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; a3: 33 32 31 30
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;     b3 b2 b1 b0
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $b1, $a1                    ; xx 12 xx 10
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $b0, $a0                    ; xx 02 xx 00
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $b3, $a3                    ; xx 32 xx 30
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $b2, $a2                    ; xx 22 xx 20
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         $b1, $b0, $b1, lsl #8       ; 12 02 10 00
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         $b3, $b2, $b3, lsl #8       ; 32 22 30 20
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $a1, $a1, ror #8            ; xx 13 xx 11
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $a3, $a3, ror #8            ; xx 33 xx 31
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $a0, $a0, ror #8            ; xx 03 xx 01
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uxtb16      $a2, $a2, ror #8            ; xx 23 xx 21
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         $a0, $a0, $a1, lsl #8       ; 13 03 11 01
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         $a2, $a2, $a3, lsl #8       ; 33 23 31 21
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhtb       $b2, $b3, $b1, asr #16      ; 32 22 12 02   -- p1
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       $b0, $b1, $b3, lsl #16      ; 30 20 10 00   -- p3
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhtb       $b3, $a2, $a0, asr #16      ; 33 23 13 03   -- p0
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       $b1, $a0, $a2, lsl #16      ; 31 21 11 01   -- p2
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    MEND
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
481b362b15af34006e6a11974088a46d42b903418eJohann
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersrc         RN  r0
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberpstep       RN  r1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r0     unsigned char *src_ptr,
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r1     int src_pixel_step,
541b362b15af34006e6a11974088a46d42b903418eJohann;r2     const char *blimit
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stmdb       sp!, {r4 - r11, lr}
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
611b362b15af34006e6a11974088a46d42b903418eJohann    ldrb        r12, [r2]                   ; blimit
62f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r3, [src, -pstep, lsl #1]   ; p1
63f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r4, [src, -pstep]           ; p0
64f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r5, [src]                   ; q0
65f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r6, [src, pstep]            ; q1
661b362b15af34006e6a11974088a46d42b903418eJohann    orr         r12, r12, r12, lsl #8       ; blimit
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr         r2, c0x80808080
681b362b15af34006e6a11974088a46d42b903418eJohann    orr         r12, r12, r12, lsl #16      ; blimit
691b362b15af34006e6a11974088a46d42b903418eJohann    mov         r9, #4                      ; double the count. we're doing 4 at a time
70f71323e297a928af368937089d3ed71239786f86Andreas Huber    mov         lr, #0                      ; need 0 in a couple places
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|simple_hnext8|
73f71323e297a928af368937089d3ed71239786f86Andreas Huber    ; vp8_simple_filter_mask()
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r7, r3, r6                  ; p1 - q1
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r8, r6, r3                  ; q1 - p1
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r10, r4, r5                 ; p0 - q0
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r11, r5, r4                 ; q0 - p0
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         r8, r8, r7                  ; abs(p1 - q1)
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         r10, r10, r11               ; abs(p0 - q0)
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqadd8      r10, r10, r10               ; abs(p0 - q0) * 2
82f71323e297a928af368937089d3ed71239786f86Andreas Huber    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqadd8      r10, r10, r8                ; abs(p0 - q0)*2 + abs(p1 - q1)/2
84f71323e297a928af368937089d3ed71239786f86Andreas Huber    mvn         r8, #0
85f71323e297a928af368937089d3ed71239786f86Andreas Huber    usub8       r10, r12, r10               ; compare to flimit. usub8 sets GE flags
86f71323e297a928af368937089d3ed71239786f86Andreas Huber    sel         r10, r8, lr                 ; filter mask: F or 0
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp         r10, #0
88f71323e297a928af368937089d3ed71239786f86Andreas Huber    beq         simple_hskip_filter         ; skip filtering if all masks are 0x00
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
90f71323e297a928af368937089d3ed71239786f86Andreas Huber    ;vp8_simple_filter()
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r3, r3, r2                  ; p1 offset to convert to a signed value
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r6, r6, r2                  ; q1 offset to convert to a signed value
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r4, r4, r2                  ; p0 offset to convert to a signed value
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r5, r5, r2                  ; q0 offset to convert to a signed value
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
97f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
98f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r6, r5, r4                  ; q0 - p0
99f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; += q0 - p0
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr         r7, c0x04040404
101f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; += q0 - p0
102f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r8, c0x03030303
103f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
104f71323e297a928af368937089d3ed71239786f86Andreas Huber    ;STALL
105f71323e297a928af368937089d3ed71239786f86Andreas Huber    and         r3, r3, r10                 ; vp8_filter &= mask
106f71323e297a928af368937089d3ed71239786f86Andreas Huber
107f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r7 , r3 , r7                ; Filter1 = vp8_filter + 4
108f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r8 , r3 , r8                ; Filter2 = vp8_filter + 3
109f71323e297a928af368937089d3ed71239786f86Andreas Huber
110f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r7 , r7 , lr
111f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r8 , r8 , lr
112f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r7 , r7 , lr
113f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r8 , r8 , lr
114f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r7 , r7 , lr                ; Filter1 >>= 3
115f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r8 , r8 , lr                ; Filter2 >>= 3
116f71323e297a928af368937089d3ed71239786f86Andreas Huber
117f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r5 ,r5, r7                  ; u = q0 - Filter1
118f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r4, r4, r8                  ; u = p0 + Filter2
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r5, r5, r2                  ; *oq0 = u^0x80
120f71323e297a928af368937089d3ed71239786f86Andreas Huber    str         r5, [src]                   ; store oq0 result
121f71323e297a928af368937089d3ed71239786f86Andreas Huber    eor         r4, r4, r2                  ; *op0 = u^0x80
122f71323e297a928af368937089d3ed71239786f86Andreas Huber    str         r4, [src, -pstep]           ; store op0 result
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|simple_hskip_filter|
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs        r9, r9, #1
126f71323e297a928af368937089d3ed71239786f86Andreas Huber    addne       src, src, #4                ; next row
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
128f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldrne       r3, [src, -pstep, lsl #1]   ; p1
129f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldrne       r4, [src, -pstep]           ; p0
130f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldrne       r5, [src]                   ; q0
131f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldrne       r6, [src, pstep]            ; q1
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne         simple_hnext8
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldmia       sp!, {r4 - r11, pc}
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP        ; |vp8_loop_filter_simple_horizontal_edge_armv6|
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_vertical_edge_armv6| PROC
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stmdb       sp!, {r4 - r11, lr}
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1441b362b15af34006e6a11974088a46d42b903418eJohann    ldrb        r12, [r2]                   ; r12: blimit
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr         r2, c0x80808080
1461b362b15af34006e6a11974088a46d42b903418eJohann    orr         r12, r12, r12, lsl #8
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; load soure data to r7, r8, r9, r10
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r3, [src, #-2]
1501b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]                  ; preload for next block
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r4, [src], pstep
1521b362b15af34006e6a11974088a46d42b903418eJohann    orr         r12, r12, r12, lsl #16
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r5, [src, #-2]
1551b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r6, [src], pstep
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r7, r3, r4, lsl #16
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r3, [src, #-2]
1611b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r4, [src], pstep
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r8, r5, r6, lsl #16
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r5, [src, #-2]
1671b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrh        r6, [src], pstep
1691b362b15af34006e6a11974088a46d42b903418eJohann    mov         r11, #4                     ; double the count. we're doing 4 at a time
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|simple_vnext8|
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; vp8_simple_filter_mask() function
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r9, r3, r4, lsl #16
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r10, r5, r6, lsl #16
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;transpose r7, r8, r9, r10 to r3, r4, r5, r6
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r7, r3, r6                  ; p1 - q1
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r8, r6, r3                  ; q1 - p1
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r9, r4, r5                  ; p0 - q0
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqsub8      r10, r5, r4                 ; q0 - p0
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         r7, r7, r8                  ; abs(p1 - q1)
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    orr         r9, r9, r10                 ; abs(p0 - q0)
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r8, #0
186f71323e297a928af368937089d3ed71239786f86Andreas Huber    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
187f71323e297a928af368937089d3ed71239786f86Andreas Huber    uhadd8      r7, r7, r8                  ; abs(p1 - q1) / 2
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    uqadd8      r7, r7, r9                  ; abs(p0 - q0)*2 + abs(p1 - q1)/2
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mvn         r10, #0                     ; r10 == -1
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
191f71323e297a928af368937089d3ed71239786f86Andreas Huber    usub8       r7, r12, r7                 ; compare to flimit
192f71323e297a928af368937089d3ed71239786f86Andreas Huber    sel         lr, r10, r8                 ; filter mask
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmp         lr, #0
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq         simple_vskip_filter         ; skip filtering
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;vp8_simple_filter() function
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r3, r3, r2                  ; p1 offset to convert to a signed value
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r6, r6, r2                  ; q1 offset to convert to a signed value
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r4, r4, r2                  ; p0 offset to convert to a signed value
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r5, r5, r2                  ; q0 offset to convert to a signed value
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
203f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
204f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r6, r5, r4                  ; q0 - p0
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
206f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
207f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldr         r9, c0x03030303             ; r9 = 3
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
209f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldr         r7, c0x04040404
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
212f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
213f71323e297a928af368937089d3ed71239786f86Andreas Huber    ;STALL
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    and         r3, r3, lr                  ; vp8_filter &= mask
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
216f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r9 , r3 , r9                ; Filter2 = vp8_filter + 3
217f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r3 , r3 , r7                ; Filter1 = vp8_filter + 4
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
219f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r9 , r9 , r8
220f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r3 , r3 , r8
221f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r9 , r9 , r8
222f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r3 , r3 , r8
223f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r9 , r9 , r8                ; Filter2 >>= 3
224f71323e297a928af368937089d3ed71239786f86Andreas Huber    shadd8      r3 , r3 , r8                ; Filter1 >>= 3
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;calculate output
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    sub         src, src, pstep, lsl #2
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
229f71323e297a928af368937089d3ed71239786f86Andreas Huber    qadd8       r4, r4, r9                  ; u = p0 + Filter2
230f71323e297a928af368937089d3ed71239786f86Andreas Huber    qsub8       r5, r5, r3                  ; u = q0 - Filter1
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r4, r4, r2                  ; *op0 = u^0x80
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    eor         r5, r5, r2                  ; *oq0 = u^0x80
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r4, [src, #-1]              ; store the result
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r4, r4, lsr #8
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r5, [src], pstep
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r5, r5, lsr #8
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r4, [src, #-1]
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r4, r4, lsr #8
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r5, [src], pstep
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r5, r5, lsr #8
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r4, [src, #-1]
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r4, r4, lsr #8
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r5, [src], pstep
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         r5, r5, lsr #8
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r4, [src, #-1]
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    strb        r5, [src], pstep
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|simple_vskip_filter|
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subs        r11, r11, #1
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; load soure data to r7, r8, r9, r10
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r3, [src, #-2]
2571b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]                  ; preload for next block
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r4, [src], pstep
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r5, [src, #-2]
2611b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r6, [src], pstep
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r7, r3, r4, lsl #16
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r3, [src, #-2]
2671b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r4, [src], pstep
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pkhbt       r8, r5, r6, lsl #16
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r5, [src, #-2]
2731b362b15af34006e6a11974088a46d42b903418eJohann    pld         [src, #23]
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ldrneh      r6, [src], pstep
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bne         simple_vnext8
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
278f71323e297a928af368937089d3ed71239786f86Andreas Huber    ldmia       sp!, {r4 - r11, pc}
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP        ; |vp8_loop_filter_simple_vertical_edge_armv6|
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; Constant Pool
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberc0x80808080 DCD     0x80808080
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberc0x03030303 DCD     0x03030303
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberc0x04040404 DCD     0x04040404
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
287