1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license and patent
5;  grant that can be found in the LICENSE file in the root of the source
6;  tree. All contributing project authors may be found in the AUTHORS
7;  file in the root of the source tree.
8;
9
10
11    EXPORT |vp8_dequant_dc_idct_add_v6|
12
13    AREA |.text|, CODE, READONLY
14
15;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
16; unsigned char *dest, int pitch, int stride, int Dc)
17; r0 = input
18; r1 = dq
19; r2 = pred
20; r3 = dest
21; sp + 36 = pitch  ; +4 = 40
22; sp + 40 = stride  ; +4 = 44
23; sp + 44 = Dc  ; +4 = 48
24
25
26|vp8_dequant_dc_idct_add_v6| PROC
27    stmdb   sp!, {r4-r11, lr}
28
29    ldr     r6, [sp, #44]
30
31    ldr     r4, [r0]                ;input
32    ldr     r5, [r1], #4            ;dq
33
34    sub     sp, sp, #4
35    str     r3, [sp]
36
37    smultt  r7, r4, r5
38
39    ldr     r4, [r0, #4]            ;input
40    ldr     r5, [r1], #4            ;dq
41
42    strh    r6, [r0], #2
43    strh    r7, [r0], #2
44
45    smulbb  r6, r4, r5
46    smultt  r7, r4, r5
47
48    ldr     r4, [r0, #4]            ;input
49    ldr     r5, [r1], #4            ;dq
50
51    strh    r6, [r0], #2
52    strh    r7, [r0], #2
53
54    mov     r12, #3
55
56vp8_dequant_dc_add_loop
57    smulbb  r6, r4, r5
58    smultt  r7, r4, r5
59
60    ldr     r4, [r0, #4]            ;input
61    ldr     r5, [r1], #4            ;dq
62
63    strh    r6, [r0], #2
64    strh    r7, [r0], #2
65
66    smulbb  r6, r4, r5
67    smultt  r7, r4, r5
68
69    subs    r12, r12, #1
70
71    ldrne   r4, [r0, #4]
72    ldrne   r5, [r1], #4
73
74    strh    r6, [r0], #2
75    strh    r7, [r0], #2
76
77    bne     vp8_dequant_dc_add_loop
78
79    sub     r0, r0, #32
80    mov     r1, r0
81
82; short_idct4x4llm_v6_dual
83    ldr     r3, cospi8sqrt2minus1
84    ldr     r4, sinpi8sqrt2
85    ldr     r6, [r0, #8]
86    mov     r5, #2
87vp8_dequant_dc_idct_loop1_v6
88    ldr     r12, [r0, #24]
89    ldr     r14, [r0, #16]
90    smulwt  r9, r3, r6
91    smulwb  r7, r3, r6
92    smulwt  r10, r4, r6
93    smulwb  r8, r4, r6
94    pkhbt   r7, r7, r9, lsl #16
95    smulwt  r11, r3, r12
96    pkhbt   r8, r8, r10, lsl #16
97    uadd16  r6, r6, r7
98    smulwt  r7, r4, r12
99    smulwb  r9, r3, r12
100    smulwb  r10, r4, r12
101    subs    r5, r5, #1
102    pkhbt   r9, r9, r11, lsl #16
103    ldr     r11, [r0], #4
104    pkhbt   r10, r10, r7, lsl #16
105    uadd16  r7, r12, r9
106    usub16  r7, r8, r7
107    uadd16  r6, r6, r10
108    uadd16  r10, r11, r14
109    usub16  r8, r11, r14
110    uadd16  r9, r10, r6
111    usub16  r10, r10, r6
112    uadd16  r6, r8, r7
113    usub16  r7, r8, r7
114    str     r6, [r1, #8]
115    ldrne   r6, [r0, #8]
116    str     r7, [r1, #16]
117    str     r10, [r1, #24]
118    str     r9, [r1], #4
119    bne     vp8_dequant_dc_idct_loop1_v6
120
121    mov     r5, #2
122    sub     r0, r1, #8
123vp8_dequant_dc_idct_loop2_v6
124    ldr     r6, [r0], #4
125    ldr     r7, [r0], #4
126    ldr     r8, [r0], #4
127    ldr     r9, [r0], #4
128    smulwt  r1, r3, r6
129    smulwt  r12, r4, r6
130    smulwt  lr, r3, r8
131    smulwt  r10, r4, r8
132    pkhbt   r11, r8, r6, lsl #16
133    pkhbt   r1, lr, r1, lsl #16
134    pkhbt   r12, r10, r12, lsl #16
135    pkhtb   r6, r6, r8, asr #16
136    uadd16  r6, r1, r6
137    pkhbt   lr, r9, r7, lsl #16
138    uadd16  r10, r11, lr
139    usub16  lr, r11, lr
140    pkhtb   r8, r7, r9, asr #16
141    subs    r5, r5, #1
142    smulwt  r1, r3, r8
143    smulwb  r7, r3, r8
144    smulwt  r11, r4, r8
145    smulwb  r9, r4, r8
146    pkhbt   r1, r7, r1, lsl #16
147    uadd16  r8, r1, r8
148    pkhbt   r11, r9, r11, lsl #16
149    usub16  r1, r12, r8
150    uadd16  r8, r11, r6
151    ldr     r9, c0x00040004
152    ldr     r12, [sp, #40]
153    uadd16  r6, r10, r8
154    usub16  r7, r10, r8
155    uadd16  r7, r7, r9
156    uadd16  r6, r6, r9
157    uadd16  r10, r14, r1
158    usub16  r1, r14, r1
159    uadd16  r10, r10, r9
160    uadd16  r1, r1, r9
161    ldr     r11, [r2], r12
162    mov     r8, r7, asr #3
163    pkhtb   r9, r8, r10, asr #19
164    mov     r8, r1, asr #3
165    pkhtb   r8, r8, r6, asr #19
166    uxtb16  lr, r11, ror #8
167    qadd16  r9, r9, lr
168    uxtb16  lr, r11
169    qadd16  r8, r8, lr
170    usat16  r9, #8, r9
171    usat16  r8, #8, r8
172    orr     r9, r8, r9, lsl #8
173    ldr     r11, [r2], r12
174    ldr     lr, [sp]
175    ldr     r12, [sp, #44]
176    mov     r7, r7, lsl #16
177    mov     r1, r1, lsl #16
178    mov     r10, r10, lsl #16
179    mov     r6, r6, lsl #16
180    mov     r7, r7, asr #3
181    pkhtb   r7, r7, r10, asr #19
182    mov     r1, r1, asr #3
183    pkhtb   r1, r1, r6, asr #19
184    uxtb16  r8, r11, ror #8
185    qadd16  r7, r7, r8
186    uxtb16  r8, r11
187    qadd16  r1, r1, r8
188    usat16  r7, #8, r7
189    usat16  r1, #8, r1
190    orr     r1, r1, r7, lsl #8
191    str     r9, [lr], r12
192    str     r1, [lr], r12
193    str     lr, [sp]
194    bne     vp8_dequant_dc_idct_loop2_v6
195
196; vpx_memset
197    sub     r0, r0, #32
198    add     sp, sp, #4
199
200    mov     r12, #0
201    str     r12, [r0]
202    str     r12, [r0, #4]
203    str     r12, [r0, #8]
204    str     r12, [r0, #12]
205    str     r12, [r0, #16]
206    str     r12, [r0, #20]
207    str     r12, [r0, #24]
208    str     r12, [r0, #28]
209
210    ldmia   sp!, {r4 - r11, pc}
211    ENDP    ; |vp8_dequant_dc_idct_add_v6|
212
213; Constant Pool
214cospi8sqrt2minus1 DCD 0x00004E7B
215sinpi8sqrt2       DCD 0x00008A8C
216c0x00040004       DCD 0x00040004
217
218    END
219