1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_subtract_mby_armv6|
13    EXPORT  |vp8_subtract_mbuv_armv6|
14    EXPORT  |vp8_subtract_b_armv6|
15
16    INCLUDE vp8_asm_enc_offsets.asm
17
18    ARM
19    REQUIRE8
20    PRESERVE8
21
22    AREA ||.text||, CODE, READONLY, ALIGN=2
23
24; r0    BLOCK *be
25; r1    BLOCKD *bd
26; r2    int pitch
27|vp8_subtract_b_armv6| PROC
28
29    stmfd   sp!, {r4-r9}
30
31    ldr     r4, [r0, #vp8_block_base_src]
32    ldr     r5, [r0, #vp8_block_src]
33    ldr     r6, [r0, #vp8_block_src_diff]
34
35    ldr     r3, [r4]
36    ldr     r7, [r0, #vp8_block_src_stride]
37    add     r3, r3, r5          ; src = *base_src + src
38    ldr     r8, [r1, #vp8_blockd_predictor]
39
40    mov     r9, #4              ; loop count
41
42loop_block
43
44    ldr     r0, [r3], r7        ; src
45    ldr     r1, [r8], r2        ; pred
46
47    uxtb16  r4, r0              ; [s2 | s0]
48    uxtb16  r5, r1              ; [p2 | p0]
49    uxtb16  r0, r0, ror #8      ; [s3 | s1]
50    uxtb16  r1, r1, ror #8      ; [p3 | p1]
51
52    usub16  r4, r4, r5          ; [d2 | d0]
53    usub16  r5, r0, r1          ; [d3 | d1]
54
55    subs    r9, r9, #1          ; decrement loop counter
56
57    pkhbt   r0, r4, r5, lsl #16 ; [d1 | d0]
58    pkhtb   r1, r5, r4, asr #16 ; [d3 | d2]
59
60    str     r0, [r6, #0]        ; diff
61    str     r1, [r6, #4]        ; diff
62
63    add     r6, r6, r2, lsl #1  ; update diff pointer
64    bne     loop_block
65
66    ldmfd   sp!, {r4-r9}
67    mov     pc, lr
68
69    ENDP
70
71
72; r0    short *diff
73; r1    unsigned char *usrc
74; r2    unsigned char *vsrc
75; r3    int src_stride
76; sp    unsigned char *upred
77; sp    unsigned char *vpred
78; sp    int pred_stride
79|vp8_subtract_mbuv_armv6| PROC
80
81    stmfd   sp!, {r4-r11}
82
83    add     r0, r0, #512        ; set *diff point to Cb
84    mov     r4, #8              ; loop count
85    ldr     r5, [sp, #32]       ; upred
86    ldr     r12, [sp, #40]      ; pred_stride
87
88    ; Subtract U block
89loop_u
90    ldr     r6, [r1]            ; usrc      (A)
91    ldr     r7, [r5]            ; upred     (A)
92
93    uxtb16  r8, r6              ; [s2 | s0] (A)
94    uxtb16  r9, r7              ; [p2 | p0] (A)
95    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
96    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
97
98    usub16  r6, r8, r9          ; [d2 | d0] (A)
99    usub16  r7, r10, r11        ; [d3 | d1] (A)
100
101    ldr     r10, [r1, #4]       ; usrc      (B)
102    ldr     r11, [r5, #4]       ; upred     (B)
103
104    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
105    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
106
107    str     r8, [r0], #4        ; diff      (A)
108    uxtb16  r8, r10             ; [s2 | s0] (B)
109    str     r9, [r0], #4        ; diff      (A)
110
111    uxtb16  r9, r11             ; [p2 | p0] (B)
112    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
113    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
114
115    usub16  r6, r8, r9          ; [d2 | d0] (B)
116    usub16  r7, r10, r11        ; [d3 | d1] (B)
117
118    add     r1, r1, r3          ; update usrc pointer
119    add     r5, r5, r12         ; update upred pointer
120
121    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
122    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
123
124    str     r8, [r0], #4        ; diff      (B)
125    subs    r4, r4, #1          ; update loop counter
126    str     r9, [r0], #4        ; diff      (B)
127
128    bne     loop_u
129
130    ldr     r5, [sp, #36]       ; vpred
131    mov     r4, #8              ; loop count
132
133    ; Subtract V block
134loop_v
135    ldr     r6, [r2]            ; vsrc      (A)
136    ldr     r7, [r5]            ; vpred     (A)
137
138    uxtb16  r8, r6              ; [s2 | s0] (A)
139    uxtb16  r9, r7              ; [p2 | p0] (A)
140    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
141    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
142
143    usub16  r6, r8, r9          ; [d2 | d0] (A)
144    usub16  r7, r10, r11        ; [d3 | d1] (A)
145
146    ldr     r10, [r2, #4]       ; vsrc      (B)
147    ldr     r11, [r5, #4]       ; vpred     (B)
148
149    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
150    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
151
152    str     r8, [r0], #4        ; diff      (A)
153    uxtb16  r8, r10             ; [s2 | s0] (B)
154    str     r9, [r0], #4        ; diff      (A)
155
156    uxtb16  r9, r11             ; [p2 | p0] (B)
157    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
158    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
159
160    usub16  r6, r8, r9          ; [d2 | d0] (B)
161    usub16  r7, r10, r11        ; [d3 | d1] (B)
162
163    add     r2, r2, r3          ; update vsrc pointer
164    add     r5, r5, r12         ; update vpred pointer
165
166    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
167    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
168
169    str     r8, [r0], #4        ; diff      (B)
170    subs    r4, r4, #1          ; update loop counter
171    str     r9, [r0], #4        ; diff      (B)
172
173    bne     loop_v
174
175    ldmfd   sp!, {r4-r11}
176    bx      lr
177
178    ENDP
179
180
181; r0    short *diff
182; r1    unsigned char *src
183; r2    int src_stride
184; r3    unsigned char *pred
185; sp    int pred_stride
186|vp8_subtract_mby_armv6| PROC
187
188    stmfd   sp!, {r4-r11}
189    ldr     r12, [sp, #32]      ; pred_stride
190    mov     r4, #16
191loop
192    ldr     r6, [r1]            ; src       (A)
193    ldr     r7, [r3]            ; pred      (A)
194
195    uxtb16  r8, r6              ; [s2 | s0] (A)
196    uxtb16  r9, r7              ; [p2 | p0] (A)
197    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
198    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
199
200    usub16  r6, r8, r9          ; [d2 | d0] (A)
201    usub16  r7, r10, r11        ; [d3 | d1] (A)
202
203    ldr     r10, [r1, #4]       ; src       (B)
204    ldr     r11, [r3, #4]       ; pred      (B)
205
206    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
207    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
208
209    str     r8, [r0], #4        ; diff      (A)
210    uxtb16  r8, r10             ; [s2 | s0] (B)
211    str     r9, [r0], #4        ; diff      (A)
212
213    uxtb16  r9, r11             ; [p2 | p0] (B)
214    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
215    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
216
217    usub16  r6, r8, r9          ; [d2 | d0] (B)
218    usub16  r7, r10, r11        ; [d3 | d1] (B)
219
220    ldr     r10, [r1, #8]       ; src       (C)
221    ldr     r11, [r3, #8]       ; pred      (C)
222
223    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
224    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
225
226    str     r8, [r0], #4        ; diff      (B)
227    uxtb16  r8, r10             ; [s2 | s0] (C)
228    str     r9, [r0], #4        ; diff      (B)
229
230    uxtb16  r9, r11             ; [p2 | p0] (C)
231    uxtb16  r10, r10, ror #8    ; [s3 | s1] (C)
232    uxtb16  r11, r11, ror #8    ; [p3 | p1] (C)
233
234    usub16  r6, r8, r9          ; [d2 | d0] (C)
235    usub16  r7, r10, r11        ; [d3 | d1] (C)
236
237    ldr     r10, [r1, #12]      ; src       (D)
238    ldr     r11, [r3, #12]      ; pred      (D)
239
240    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (C)
241    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (C)
242
243    str     r8, [r0], #4        ; diff      (C)
244    uxtb16  r8, r10             ; [s2 | s0] (D)
245    str     r9, [r0], #4        ; diff      (C)
246
247    uxtb16  r9, r11             ; [p2 | p0] (D)
248    uxtb16  r10, r10, ror #8    ; [s3 | s1] (D)
249    uxtb16  r11, r11, ror #8    ; [p3 | p1] (D)
250
251    usub16  r6, r8, r9          ; [d2 | d0] (D)
252    usub16  r7, r10, r11        ; [d3 | d1] (D)
253
254    add     r1, r1, r2          ; update src pointer
255    add     r3, r3, r12         ; update pred pointer
256
257    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (D)
258    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (D)
259
260    str     r8, [r0], #4        ; diff      (D)
261    subs    r4, r4, #1          ; update loop counter
262    str     r9, [r0], #4        ; diff      (D)
263
264    bne     loop
265
266    ldmfd   sp!, {r4-r11}
267    bx      lr
268
269    ENDP
270
271    END
272
273