1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_recon_b_armv6|
13    EXPORT  |vp8_recon2b_armv6|
14    EXPORT  |vp8_recon4b_armv6|
15
16    AREA    |.text|, CODE, READONLY  ; name this block of code
17prd     RN  r0
18dif     RN  r1
19dst     RN  r2
20stride      RN  r3
21
22;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
23; R0 char* pred_ptr
24; R1 short * dif_ptr
25; R2 char * dst_ptr
26; R3 int stride
27
28; Description:
29; Loop through the block adding the Pred and Diff together.  Clamp and then
30; store back into the Dst.
31
32; Restrictions :
33; all buffers are expected to be 4 byte aligned coming in and
34; going out.
35;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
36;
37;
38;
39;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
40|vp8_recon_b_armv6| PROC
41    stmdb   sp!, {r4 - r9, lr}
42
43    ;0, 1, 2, 3
44    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
45    ldr     r6, [dif, #0]           ;     1 |     0
46    ldr     r7, [dif, #4]           ;     3 |     2
47
48    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
49    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
50
51    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
52    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
53
54    usat16  r8, #8, r8
55    usat16  r9, #8, r9
56    add     dif, dif, #32
57    orr     r8, r8, r9, lsl #8
58
59    str     r8, [dst], stride
60
61    ;0, 1, 2, 3
62    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
63;;  ldr     r6, [dif, #8]           ;     1 |     0
64;;  ldr     r7, [dif, #12]          ;     3 |     2
65    ldr     r6, [dif, #0]           ;     1 |     0
66    ldr     r7, [dif, #4]           ;     3 |     2
67
68    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
69    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
70
71    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
72    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
73
74    usat16  r8, #8, r8
75    usat16  r9, #8, r9
76    add     dif, dif, #32
77    orr     r8, r8, r9, lsl #8
78
79    str     r8, [dst], stride
80
81    ;0, 1, 2, 3
82    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
83;;  ldr     r6, [dif, #16]          ;     1 |     0
84;;  ldr     r7, [dif, #20]          ;     3 |     2
85    ldr     r6, [dif, #0]           ;     1 |     0
86    ldr     r7, [dif, #4]           ;     3 |     2
87
88    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
89    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
90
91    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
92    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
93
94    usat16  r8, #8, r8
95    usat16  r9, #8, r9
96    add     dif, dif, #32
97    orr     r8, r8, r9, lsl #8
98
99    str     r8, [dst], stride
100
101    ;0, 1, 2, 3
102    ldr     r4, [prd], #16          ; 3 | 2 | 1 | 0
103;;  ldr     r6, [dif, #24]          ;     1 |     0
104;;  ldr     r7, [dif, #28]          ;     3 |     2
105    ldr     r6, [dif, #0]           ;     1 |     0
106    ldr     r7, [dif, #4]           ;     3 |     2
107
108    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
109    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
110
111    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
112    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
113
114    usat16  r8, #8, r8
115    usat16  r9, #8, r9
116    orr     r8, r8, r9, lsl #8
117
118    str     r8, [dst], stride
119
120    ldmia   sp!, {r4 - r9, pc}
121
122    ENDP    ; |recon_b|
123
124;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
125;
126;
127;
128;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
129; R0 char  *pred_ptr
130; R1 short *dif_ptr
131; R2 char  *dst_ptr
132; R3 int stride
133|vp8_recon4b_armv6| PROC
134    stmdb   sp!, {r4 - r9, lr}
135
136    mov     lr, #4
137
138recon4b_loop
139    ;0, 1, 2, 3
140    ldr     r4, [prd], #4           ; 3 | 2 | 1 | 0
141    ldr     r6, [dif, #0]           ;     1 |     0
142    ldr     r7, [dif, #4]           ;     3 |     2
143
144    pkhbt   r8, r6, r7, lsl #16     ;     2 |     0
145    pkhtb   r9, r7, r6, asr #16     ;     3 |     1
146
147    uxtab16 r8, r8, r4              ;     2 |     0  +  3 | 2 | 2 | 0
148    uxtab16 r9, r9, r4, ror #8      ;     3 |     1  +  0 | 3 | 2 | 1
149
150    usat16  r8, #8, r8
151    usat16  r9, #8, r9
152    orr     r8, r8, r9, lsl #8
153
154    str     r8, [dst]
155
156    ;4, 5, 6, 7
157    ldr     r4, [prd], #4
158;;  ldr     r6, [dif, #32]
159;;  ldr     r7, [dif, #36]
160    ldr     r6, [dif, #8]
161    ldr     r7, [dif, #12]
162
163    pkhbt   r8, r6, r7, lsl #16
164    pkhtb   r9, r7, r6, asr #16
165
166    uxtab16 r8, r8, r4
167    uxtab16 r9, r9, r4, ror #8
168    usat16  r8, #8, r8
169    usat16  r9, #8, r9
170    orr     r8, r8, r9, lsl #8
171
172    str     r8, [dst, #4]
173
174    ;8, 9, 10, 11
175    ldr     r4, [prd], #4
176;;  ldr     r6, [dif, #64]
177;;  ldr     r7, [dif, #68]
178    ldr     r6, [dif, #16]
179    ldr     r7, [dif, #20]
180
181    pkhbt   r8, r6, r7, lsl #16
182    pkhtb   r9, r7, r6, asr #16
183
184    uxtab16 r8, r8, r4
185    uxtab16 r9, r9, r4, ror #8
186    usat16  r8, #8, r8
187    usat16  r9, #8, r9
188    orr     r8, r8, r9, lsl #8
189
190    str     r8, [dst, #8]
191
192    ;12, 13, 14, 15
193    ldr     r4, [prd], #4
194;;  ldr     r6, [dif, #96]
195;;  ldr     r7, [dif, #100]
196    ldr     r6, [dif, #24]
197    ldr     r7, [dif, #28]
198
199    pkhbt   r8, r6, r7, lsl #16
200    pkhtb   r9, r7, r6, asr #16
201
202    uxtab16 r8, r8, r4
203    uxtab16 r9, r9, r4, ror #8
204    usat16  r8, #8, r8
205    usat16  r9, #8, r9
206    orr     r8, r8, r9, lsl #8
207
208    str     r8, [dst, #12]
209
210    add     dst, dst, stride
211;;  add     dif, dif, #8
212    add     dif, dif, #32
213
214    subs    lr, lr, #1
215    bne     recon4b_loop
216
217    ldmia   sp!, {r4 - r9, pc}
218
219    ENDP    ; |Recon4B|
220
221;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
222;
223;
224;
225;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
226; R0 char  *pred_ptr
227; R1 short *dif_ptr
228; R2 char  *dst_ptr
229; R3 int stride
230|vp8_recon2b_armv6| PROC
231    stmdb   sp!, {r4 - r9, lr}
232
233    mov     lr, #4
234
235recon2b_loop
236    ;0, 1, 2, 3
237    ldr     r4, [prd], #4
238    ldr     r6, [dif, #0]
239    ldr     r7, [dif, #4]
240
241    pkhbt   r8, r6, r7, lsl #16
242    pkhtb   r9, r7, r6, asr #16
243
244    uxtab16 r8, r8, r4
245    uxtab16 r9, r9, r4, ror #8
246    usat16  r8, #8, r8
247    usat16  r9, #8, r9
248    orr     r8, r8, r9, lsl #8
249
250    str     r8, [dst]
251
252    ;4, 5, 6, 7
253    ldr     r4, [prd], #4
254;;  ldr     r6, [dif, #32]
255;;  ldr     r7, [dif, #36]
256    ldr     r6, [dif, #8]
257    ldr     r7, [dif, #12]
258
259    pkhbt   r8, r6, r7, lsl #16
260    pkhtb   r9, r7, r6, asr #16
261
262    uxtab16 r8, r8, r4
263    uxtab16 r9, r9, r4, ror #8
264    usat16  r8, #8, r8
265    usat16  r9, #8, r9
266    orr     r8, r8, r9, lsl #8
267
268    str     r8, [dst, #4]
269
270    add     dst, dst, stride
271;;  add     dif, dif, #8
272    add     dif, dif, #16
273
274    subs    lr, lr, #1
275    bne     recon2b_loop
276
277    ldmia   sp!, {r4 - r9, pc}
278
279    ENDP    ; |Recon2B|
280
281    END
282