1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_first_pass_armv6|
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_first_pass_16x16_armv6|
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_first_pass_8x8_armv6|
15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_second_pass_armv6|
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter4_block2d_second_pass_armv6|
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_first_pass_only_armv6|
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    EXPORT  |vp8_filter_block2d_second_pass_only_armv6|
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    AREA    |.text|, CODE, READONLY  ; name this block of code
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;-------------------------------------
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0    unsigned char *src_ptr
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1    short         *output_ptr
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2    unsigned int src_pixels_per_line
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3    unsigned int output_width
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_height
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;-------------------------------------
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; vp8_filter the input and put in the output array.  Apply the 6 tap FIR filter with
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; the output being a 2 byte value and the intput being a 1 byte value.
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_armv6| PROC
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #40]                  ; vp8_filter address
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r7, [sp, #36]                   ; output height
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r2, r2, r3                      ; inside loop increments input array,
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; so the height loop only needs to add
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; r2 - width to the input pointer
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r3, r3, lsl #1                  ; multiply width by 2 because using shorts
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r12, r3, #16                    ; square off the output
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #4
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]                        ; push destination to stack
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r7, lsl #16                 ; height is top part of counter
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter
53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_6|
54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r8, [r0, #-2]                   ; load source data
55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0, #-1]
56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], #2
57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3, lsr #2              ; construct loop counter
58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_6|
60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #-1]
61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0]
66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   lr, lr, r4                      ; apply the filter
68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   r8, r8, r4
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r10, r5, lr
73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0, #1]
74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r11, r5, r8
75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #2]
76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #1
78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r9, r6, lr
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r11, r10, r6, r8
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ands    r10, r7, #0xff                  ; test loop counter
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, lr, #0x40                   ; round_shift_and_clamp
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r8, [r0, #-2]                   ; load data for next loop
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r11, #0x40
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r9, [r0, #-1]
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r11, #8, r11, asr #7
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    lr, [r1], r12                   ; result is transposed and stored, which
95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; will make second pass filtering easier.
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r10, [r0], #2
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    r11, [r1], r12
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_1st_6
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r1, [sp]                        ; load and update dst address
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, r2                      ; move to next input line
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, #2                      ; move over to next column
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_1st_6
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #4
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; --------------------------
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 16x16 version
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; -----------------------------
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_16x16_armv6| PROC
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #40]                  ; vp8_filter address
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r7, [sp, #36]                   ; output height
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r4, r2, #18                     ; preload next low
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r4]
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r2, r2, r3                      ; inside loop increments input array,
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; so the height loop only needs to add
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; r2 - width to the input pointer
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r3, r3, lsl #1                  ; multiply width by 2 because using shorts
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r12, r3, #16                    ; square off the output
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #4
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]                        ; push destination to stack
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r7, lsl #16                 ; height is top part of counter
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter
143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_16_6|
144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r8, [r0, #-2]                   ; load source data
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0, #-1]
146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], #2
147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3, lsr #2              ; construct loop counter
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_16_6|
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #-1]
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0]
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   lr, lr, r4                      ; apply the filter
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   r8, r8, r4
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r10, r5, lr
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0, #1]
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r11, r5, r8
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #2]
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #1
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r9, r6, lr
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r11, r10, r6, r8
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ands    r10, r7, #0xff                  ; test loop counter
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, lr, #0x40                   ; round_shift_and_clamp
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r8, [r0, #-2]                   ; load data for next loop
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r11, #0x40
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r9, [r0, #-1]
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r11, #8, r11, asr #7
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    lr, [r1], r12                   ; result is transposed and stored, which
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; will make second pass filtering easier.
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r10, [r0], #2
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    r11, [r1], r12
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_1st_16_6
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r1, [sp]                        ; load and update dst address
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, r2                      ; move to next input line
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r2, #34                    ; adding back block width(=16)
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r11]                       ; preload next low
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, #2                      ; move over to next column
199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_1st_16_6
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #4
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; --------------------------
209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 8x8 version
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; -----------------------------
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_8x8_armv6| PROC
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #40]                  ; vp8_filter address
215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r7, [sp, #36]                   ; output height
216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r4, r2, #10                     ; preload next low
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r4]
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r2, r2, r3                      ; inside loop increments input array,
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; so the height loop only needs to add
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; r2 - width to the input pointer
223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r3, r3, lsl #1                  ; multiply width by 2 because using shorts
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r12, r3, #16                    ; square off the output
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #4
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]                        ; push destination to stack
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r7, lsl #16                 ; height is top part of counter
234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_8_6|
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r8, [r0, #-2]                   ; load source data
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0, #-1]
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], #2
240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3, lsr #2              ; construct loop counter
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_8_6|
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #-1]
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0]
249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   lr, lr, r4                      ; apply the filter
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   r8, r8, r4
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r10, r5, lr
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0, #1]
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r11, r5, r8
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #2]
259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #1
261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r9, r6, lr
266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r11, r10, r6, r8
267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ands    r10, r7, #0xff                  ; test loop counter
269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, lr, #0x40                   ; round_shift_and_clamp
271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r8, [r0, #-2]                   ; load data for next loop
272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r11, #0x40
274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r9, [r0, #-1]
275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r11, #8, r11, asr #7
276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    lr, [r1], r12                   ; result is transposed and stored, which
278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; will make second pass filtering easier.
279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r10, [r0], #2
280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strh    r11, [r1], r12
281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_1st_8_6
283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r1, [sp]                        ; load and update dst address
285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, r2                      ; move to next input line
287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r2, #18                    ; adding back block width(=8)
289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r11]                       ; preload next low
290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, #2                      ; move over to next column
292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]
293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_1st_8_6
295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #4
297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;---------------------------------
302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0    short         *src_ptr,
303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1    unsigned char *output_ptr,
304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2    unsigned int output_pitch,
305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3    unsigned int cnt,
306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter
307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;---------------------------------
308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_second_pass_armv6| PROC
309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #36]                  ; vp8_filter address
312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #4
313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r3, lsl #16                 ; height is top part of counter
314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]                        ; push destination to stack
315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r12, r5, r4                     ; pack the filter differently
321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r6, r5
322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r0, r0, #4                      ; offset input buffer
324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd|
326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r8, [r0]                        ; load the data
327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r9, [r0, #4]
328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3, lsr #1              ; loop counter
329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd|
331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   lr, r4, r8                      ; apply filter
332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #1
333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smulbt  r8, r4, r8
334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r10, [r0, #8]
336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r5, r9, lr
338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smladx  r8, r12, r9, r8
339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrh    r9, [r0, #12]
341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r6, r10, lr
343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smladx  r8, r11, r10, r8
344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, #4
346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlatb  r10, r6, r9, r8
347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, lr, #0x40                   ; round_shift_and_clamp
349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ands    r8, r7, #0xff
350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r10, r10, #0x40
352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    lr, [r1], r2                    ; the result is transposed back and stored
353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r10, #8, r10, asr #7
354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrne   r8, [r0]                        ; load data for next loop
356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrne   r9, [r0, #4]
357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    r10, [r1], r2
358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_2nd
360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r1, [sp]                        ; update dst for next loop
362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, #16                     ; updata src for next loop
364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, #1
365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp]
366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_2nd
368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #4
370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;---------------------------------
375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0    short         *src_ptr,
376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1    unsigned char *output_ptr,
377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2    unsigned int output_pitch,
378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3    unsigned int cnt,
379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter
380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;---------------------------------
381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter4_block2d_second_pass_armv6| PROC
382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #36]                  ; vp8_filter address
385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r3, lsl #16                 ; height is top part of counter
386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, r1, r3                      ; save final destination pointer
389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r12, r5, r4                     ; pack the filter differently
393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r6, r5
394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r4, #0x40                       ; rounding factor (for smlad{x})
395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd_4|
39710a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org    ldrd    r8, r9, [r0, #-4]               ; load the data
398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3, lsr #1              ; loop counter
399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd_4|
401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r10, [r0, #4]!
402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smladx  r6, r9, r12, r4                 ; apply filter
403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r8
404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r5, r8, r12, r4
405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r10, r9
406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smladx  r6, r10, r11, r6
407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #1
408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r5, r8, r11, r5
409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r8, r9                          ; shift the data for the next loop
411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r9, r10
412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r6, #8, r6, asr #7              ; shift and clamp
414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r5, #8, r5, asr #7
415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    r5, [r1], r2                    ; the result is transposed back and stored
417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    tst     r7, #0xff
418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    r6, [r1], r2
419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_2nd_4
421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, #16                     ; update src for next loop
424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r1, lr, r7, lsr #16             ; update dst for next loop
425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_2nd_4
427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP
431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------
433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0    unsigned char *src_ptr
434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1    unsigned char *output_ptr,
435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2    unsigned int src_pixels_per_line
436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3    unsigned int cnt,
437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_pitch,
438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter
439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------
440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_only_armv6| PROC
441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r2, r3                      ; preload next low
444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, #2
445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r7]
446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [sp, #36]                   ; output pitch
448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #40]                  ; HFilter address
449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #8
450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r3
452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r2, r2, r3                      ; inside loop increments input array,
453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; so the height loop only needs to add
454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                            ; r2 - width to the input pointer
455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r4, r4, r3
457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r4, [sp]                        ; save modified output pitch
458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r2, [sp, #4]
459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r2, #0x40
461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter
467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_only_6|
468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r8, [r0, #-2]                   ; load data
469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0, #-1]
470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], #2
471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r12, r3, lsr #1                 ; loop counter
473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_only_6|
475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #-1]
476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0]
481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;;  smuad   lr, lr, r4
483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, lr, r4, r2
484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;;  smuad   r8, r8, r4
486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r8, r4, r2
487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r10, r5, lr
490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0, #1]
491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r11, r5, r8
492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0, #2]
493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r12, r12, #1
495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r9, r6, lr
500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r10, r10, r6, r8
501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;;  add     lr, lr, #0x40                   ; round_shift_and_clamp
503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r8, [r0, #-2]                   ; load data for next loop
504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;;  add     r10, r10, #0x40
506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    lr, [r1], #1                    ; store the result
507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r10, #8, r10, asr #7
508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r9, [r0, #-1]
510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    r10, [r1], #1
511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r10, [r0], #2
512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_1st_only_6
514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     lr, [sp]                        ; load back output pitch
516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r12, [sp, #4]                   ; load back output pitch
517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #1
518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, r12                     ; updata src for next loop
519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r12, r3                    ; preload next low
521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r11, r11, #2
522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pld     [r0, r11]
523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, lr                      ; update dst for next loop
525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_1st_only_6
527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #8
529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP  ; |vp8_filter_block2d_first_pass_only_armv6|
531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------
534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0    unsigned char *src_ptr,
535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1    unsigned char *output_ptr,
536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2    unsigned int src_pixels_per_line
537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3    unsigned int cnt,
538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_pitch,
539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter
540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------
541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_second_pass_only_armv6| PROC
542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stmdb   sp!, {r4 - r11, lr}
543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r11, [sp, #40]                  ; VFilter address
545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r12, [sp, #36]                  ; output pitch
546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mov     r7, r3, lsl #16                 ; height is top part of counter
548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r0, r0, r2, lsl #1              ; need 6 elements for filtering, 2 before, 3 after
549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     sp, sp, #8
551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r4, [r11]                       ; load up packed filter coefficients
553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r5, [r11, #4]
554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r6, [r11, #8]
555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r0, [sp]                        ; save r0 to stack
557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp, #4]                    ; save dst to stack
558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter
560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd_only_6|
561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r8, [r0], r2                    ; load data
562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    orr     r7, r7, r3                      ; loop counter
563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0], r2
564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], r2
565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd_only_6|
567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ; filter first column in this inner loop, than, move to next colum.
568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0], r2
569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   lr, r8, r9, lsl #16             ; r9 | r8
571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r8, r9, r10, lsl #16            ; r10 | r9
572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r9, [r0], r2
574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   lr, lr, r4
576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smuad   r8, r8, r4
578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r11, r11, r9, lsl #16           ; r9 | r11
579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r10, r5, lr
581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r10, [r0], r2
582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r8, r11, r5, r8
583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrb    r11, [r0]
584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r7, r7, #2
586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r0, r0, r2, lsl #2
587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r9, r9, r10, lsl #16            ; r10 | r9
589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    pkhbt   r10, r10, r11, lsl #16          ; r11 | r10
590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   lr, r9, r6, lr
592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    smlad   r10, r10, r6, r8
593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ands    r9, r7, #0xff
595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     lr, lr, #0x40                   ; round_shift_and_clamp
597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r8, [r0], r2                    ; load data for next loop
598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    lr, #8, lr, asr #7
599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r10, r10, #0x40
600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    lr, [r1], r12                   ; store the result for the column
601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    usat    r10, #8, r10, asr #7
602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r9, [r0], r2
604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    strb    r10, [r1], r12
605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldrneb  r10, [r0], r2
606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     height_loop_2nd_only_6
608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r0, [sp]
610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldr     r1, [sp, #4]
611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subs    r7, r7, #0x10000
612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r0, #1                      ; move to filter next column
613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r0, [sp]
614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r1, r1, #1
615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    str     r1, [sp, #4]
616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bne     width_loop_2nd_only_6
618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     sp, sp, #8
620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ldmia   sp!, {r4 - r11, pc}
622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ENDP  ; |vp8_filter_block2d_second_pass_only_armv6|
623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    END
625