1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_copy_mem16x16_v6|
13    ; ARM
14    ; REQUIRE8
15    ; PRESERVE8
16
17    AREA    Block, CODE, READONLY ; name this block of code
18;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
19;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
20|vp8_copy_mem16x16_v6| PROC
21    stmdb       sp!, {r4 - r7}
22    ;push   {r4-r7}
23
24    ;preload
25    pld     [r0, #31]                ; preload for next 16x16 block
26
27    ands    r4, r0, #15
28    beq     copy_mem16x16_fast
29
30    ands    r4, r0, #7
31    beq     copy_mem16x16_8
32
33    ands    r4, r0, #3
34    beq     copy_mem16x16_4
35
36    ;copy one byte each time
37    ldrb    r4, [r0]
38    ldrb    r5, [r0, #1]
39    ldrb    r6, [r0, #2]
40    ldrb    r7, [r0, #3]
41
42    mov     r12, #16
43
44copy_mem16x16_1_loop
45    strb    r4, [r2]
46    strb    r5, [r2, #1]
47    strb    r6, [r2, #2]
48    strb    r7, [r2, #3]
49
50    ldrb    r4, [r0, #4]
51    ldrb    r5, [r0, #5]
52    ldrb    r6, [r0, #6]
53    ldrb    r7, [r0, #7]
54
55    subs    r12, r12, #1
56
57    strb    r4, [r2, #4]
58    strb    r5, [r2, #5]
59    strb    r6, [r2, #6]
60    strb    r7, [r2, #7]
61
62    ldrb    r4, [r0, #8]
63    ldrb    r5, [r0, #9]
64    ldrb    r6, [r0, #10]
65    ldrb    r7, [r0, #11]
66
67    strb    r4, [r2, #8]
68    strb    r5, [r2, #9]
69    strb    r6, [r2, #10]
70    strb    r7, [r2, #11]
71
72    ldrb    r4, [r0, #12]
73    ldrb    r5, [r0, #13]
74    ldrb    r6, [r0, #14]
75    ldrb    r7, [r0, #15]
76
77    add     r0, r0, r1
78
79    strb    r4, [r2, #12]
80    strb    r5, [r2, #13]
81    strb    r6, [r2, #14]
82    strb    r7, [r2, #15]
83
84    add     r2, r2, r3
85
86    ldrneb  r4, [r0]
87    ldrneb  r5, [r0, #1]
88    ldrneb  r6, [r0, #2]
89    ldrneb  r7, [r0, #3]
90
91    pld     [r0, #31]               ; preload for next 16x16 block
92
93    bne     copy_mem16x16_1_loop
94
95    ldmia       sp!, {r4 - r7}
96    ;pop        {r4-r7}
97    mov     pc, lr
98
99;copy 4 bytes each time
100copy_mem16x16_4
101    ldr     r4, [r0]
102    ldr     r5, [r0, #4]
103    ldr     r6, [r0, #8]
104    ldr     r7, [r0, #12]
105
106    mov     r12, #16
107
108copy_mem16x16_4_loop
109    subs    r12, r12, #1
110    add     r0, r0, r1
111
112    str     r4, [r2]
113    str     r5, [r2, #4]
114    str     r6, [r2, #8]
115    str     r7, [r2, #12]
116
117    add     r2, r2, r3
118
119    ldrne   r4, [r0]
120    ldrne   r5, [r0, #4]
121    ldrne   r6, [r0, #8]
122    ldrne   r7, [r0, #12]
123
124    pld     [r0, #31]               ; preload for next 16x16 block
125
126    bne     copy_mem16x16_4_loop
127
128    ldmia       sp!, {r4 - r7}
129    ;pop        {r4-r7}
130    mov     pc, lr
131
132;copy 8 bytes each time
133copy_mem16x16_8
134    sub     r1, r1, #16
135    sub     r3, r3, #16
136
137    mov     r12, #16
138
139copy_mem16x16_8_loop
140    ldmia   r0!, {r4-r5}
141    ;ldm        r0, {r4-r5}
142    ldmia   r0!, {r6-r7}
143
144    add     r0, r0, r1
145
146    stmia   r2!, {r4-r5}
147    subs    r12, r12, #1
148    ;stm        r2, {r4-r5}
149    stmia   r2!, {r6-r7}
150
151    add     r2, r2, r3
152
153    pld     [r0, #31]               ; preload for next 16x16 block
154    bne     copy_mem16x16_8_loop
155
156    ldmia       sp!, {r4 - r7}
157    ;pop        {r4-r7}
158    mov     pc, lr
159
160;copy 16 bytes each time
161copy_mem16x16_fast
162    ;sub        r1, r1, #16
163    ;sub        r3, r3, #16
164
165    mov     r12, #16
166
167copy_mem16x16_fast_loop
168    ldmia   r0, {r4-r7}
169    ;ldm        r0, {r4-r7}
170    add     r0, r0, r1
171
172    subs    r12, r12, #1
173    stmia   r2, {r4-r7}
174    ;stm        r2, {r4-r7}
175    add     r2, r2, r3
176
177    pld     [r0, #31]               ; preload for next 16x16 block
178    bne     copy_mem16x16_fast_loop
179
180    ldmia       sp!, {r4 - r7}
181    ;pop        {r4-r7}
182    mov     pc, lr
183
184    ENDP  ; |vp8_copy_mem16x16_v6|
185
186    END
187