dequant_idct_v6.asm revision f71323e297a928af368937089d3ed71239786f86
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license and patent
5;  grant that can be found in the LICENSE file in the root of the source
6;  tree. All contributing project authors may be found in the AUTHORS
7;  file in the root of the source tree.
8;
9
10    EXPORT |vp8_dequant_idct_add_v6|
11
12    AREA |.text|, CODE, READONLY
13;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
14; unsigned char *dest, int pitch, int stride)
15; r0 = input
16; r1 = dq
17; r2 = pred
18; r3 = dest
19; sp + 36 = pitch  ; +4 = 40
20; sp + 40 = stride  ; +4 = 44
21
22
23|vp8_dequant_idct_add_v6| PROC
24    stmdb   sp!, {r4-r11, lr}
25
26    ldr     r4, [r0]                ;input
27    ldr     r5, [r1], #4            ;dq
28
29    sub     sp, sp, #4
30    str     r3, [sp]
31
32    mov     r12, #4
33
34vp8_dequant_add_loop
35    smulbb  r6, r4, r5
36    smultt  r7, r4, r5
37
38    ldr     r4, [r0, #4]            ;input
39    ldr     r5, [r1], #4            ;dq
40
41    strh    r6, [r0], #2
42    strh    r7, [r0], #2
43
44    smulbb  r6, r4, r5
45    smultt  r7, r4, r5
46
47    subs    r12, r12, #1
48
49    ldrne   r4, [r0, #4]
50    ldrne   r5, [r1], #4
51
52    strh    r6, [r0], #2
53    strh    r7, [r0], #2
54
55    bne     vp8_dequant_add_loop
56
57    sub     r0, r0, #32
58    mov     r1, r0
59
60; short_idct4x4llm_v6_dual
61    ldr     r3, cospi8sqrt2minus1
62    ldr     r4, sinpi8sqrt2
63    ldr     r6, [r0, #8]
64    mov     r5, #2
65vp8_dequant_idct_loop1_v6
66    ldr     r12, [r0, #24]
67    ldr     r14, [r0, #16]
68    smulwt  r9, r3, r6
69    smulwb  r7, r3, r6
70    smulwt  r10, r4, r6
71    smulwb  r8, r4, r6
72    pkhbt   r7, r7, r9, lsl #16
73    smulwt  r11, r3, r12
74    pkhbt   r8, r8, r10, lsl #16
75    uadd16  r6, r6, r7
76    smulwt  r7, r4, r12
77    smulwb  r9, r3, r12
78    smulwb  r10, r4, r12
79    subs    r5, r5, #1
80    pkhbt   r9, r9, r11, lsl #16
81    ldr     r11, [r0], #4
82    pkhbt   r10, r10, r7, lsl #16
83    uadd16  r7, r12, r9
84    usub16  r7, r8, r7
85    uadd16  r6, r6, r10
86    uadd16  r10, r11, r14
87    usub16  r8, r11, r14
88    uadd16  r9, r10, r6
89    usub16  r10, r10, r6
90    uadd16  r6, r8, r7
91    usub16  r7, r8, r7
92    str     r6, [r1, #8]
93    ldrne   r6, [r0, #8]
94    str     r7, [r1, #16]
95    str     r10, [r1, #24]
96    str     r9, [r1], #4
97    bne     vp8_dequant_idct_loop1_v6
98
99    mov     r5, #2
100    sub     r0, r1, #8
101vp8_dequant_idct_loop2_v6
102    ldr     r6, [r0], #4
103    ldr     r7, [r0], #4
104    ldr     r8, [r0], #4
105    ldr     r9, [r0], #4
106    smulwt  r1, r3, r6
107    smulwt  r12, r4, r6
108    smulwt  lr, r3, r8
109    smulwt  r10, r4, r8
110    pkhbt   r11, r8, r6, lsl #16
111    pkhbt   r1, lr, r1, lsl #16
112    pkhbt   r12, r10, r12, lsl #16
113    pkhtb   r6, r6, r8, asr #16
114    uadd16  r6, r1, r6
115    pkhbt   lr, r9, r7, lsl #16
116    uadd16  r10, r11, lr
117    usub16  lr, r11, lr
118    pkhtb   r8, r7, r9, asr #16
119    subs    r5, r5, #1
120    smulwt  r1, r3, r8
121    smulwb  r7, r3, r8
122    smulwt  r11, r4, r8
123    smulwb  r9, r4, r8
124    pkhbt   r1, r7, r1, lsl #16
125    uadd16  r8, r1, r8
126    pkhbt   r11, r9, r11, lsl #16
127    usub16  r1, r12, r8
128    uadd16  r8, r11, r6
129    ldr     r9, c0x00040004
130    ldr     r12, [sp, #40]
131    uadd16  r6, r10, r8
132    usub16  r7, r10, r8
133    uadd16  r7, r7, r9
134    uadd16  r6, r6, r9
135    uadd16  r10, r14, r1
136    usub16  r1, r14, r1
137    uadd16  r10, r10, r9
138    uadd16  r1, r1, r9
139    ldr     r11, [r2], r12
140    mov     r8, r7, asr #3
141    pkhtb   r9, r8, r10, asr #19
142    mov     r8, r1, asr #3
143    pkhtb   r8, r8, r6, asr #19
144    uxtb16  lr, r11, ror #8
145    qadd16  r9, r9, lr
146    uxtb16  lr, r11
147    qadd16  r8, r8, lr
148    usat16  r9, #8, r9
149    usat16  r8, #8, r8
150    orr     r9, r8, r9, lsl #8
151    ldr     r11, [r2], r12
152    ldr     lr, [sp]
153    ldr     r12, [sp, #44]
154    mov     r7, r7, lsl #16
155    mov     r1, r1, lsl #16
156    mov     r10, r10, lsl #16
157    mov     r6, r6, lsl #16
158    mov     r7, r7, asr #3
159    pkhtb   r7, r7, r10, asr #19
160    mov     r1, r1, asr #3
161    pkhtb   r1, r1, r6, asr #19
162    uxtb16  r8, r11, ror #8
163    qadd16  r7, r7, r8
164    uxtb16  r8, r11
165    qadd16  r1, r1, r8
166    usat16  r7, #8, r7
167    usat16  r1, #8, r1
168    orr     r1, r1, r7, lsl #8
169    str     r9, [lr], r12
170    str     r1, [lr], r12
171    str     lr, [sp]
172    bne     vp8_dequant_idct_loop2_v6
173
174; vpx_memset
175    sub     r0, r0, #32
176    add     sp, sp, #4
177
178    mov     r12, #0
179    str     r12, [r0]
180    str     r12, [r0, #4]
181    str     r12, [r0, #8]
182    str     r12, [r0, #12]
183    str     r12, [r0, #16]
184    str     r12, [r0, #20]
185    str     r12, [r0, #24]
186    str     r12, [r0, #28]
187
188    ldmia   sp!, {r4 - r11, pc}
189    ENDP    ; |vp8_dequant_idct_add_v6|
190
191; Constant Pool
192cospi8sqrt2minus1 DCD 0x00004E7B
193sinpi8sqrt2       DCD 0x00008A8C
194c0x00040004       DCD 0x00040004
195
196    END
197