1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 *    products derived from this software without specific prior written
42 *    permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#include <machine/asm.h>
57
58    .syntax unified
59
60    .thumb
61    .thumb_func
62
63    .macro m_push
64    push    {r0, r4, r5, lr}
65    .endm // m_push
66
67    .macro m_pop
68    pop     {r0, r4, r5, pc}
69    .endm // m_pop
70
71    .macro m_copy_byte reg, cmd, label
72    ldrb    \reg, [r1], #1
73    strb    \reg, [r0], #1
74    \cmd    \reg, \label
75    .endm // m_copy_byte
76
77ENTRY(strcpy)
78    // For short copies, hard-code checking the first 8 bytes since this
79    // new code doesn't win until after about 8 bytes.
80    m_push
81    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
82    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
83    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
84    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
85    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
86    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
87    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
88    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
89
90strcpy_finish:
91    m_pop
92
93strcpy_continue:
94    pld     [r1, #0]
95    ands    r3, r0, #7
96    beq     strcpy_check_src_align
97
98    // Align to a double word (64 bits).
99    rsb     r3, r3, #8
100    lsls    ip, r3, #31
101    beq     strcpy_align_to_32
102
103    ldrb    r2, [r1], #1
104    strb    r2, [r0], #1
105    cbz     r2, strcpy_complete
106
107strcpy_align_to_32:
108    bcc     strcpy_align_to_64
109
110    ldrb    r2, [r1], #1
111    strb    r2, [r0], #1
112    cbz     r2, strcpy_complete
113    ldrb    r2, [r1], #1
114    strb    r2, [r0], #1
115    cbz     r2, strcpy_complete
116
117strcpy_align_to_64:
118    tst     r3, #4
119    beq     strcpy_check_src_align
120    ldr     r2, [r1], #4
121
122    sub     ip, r2, #0x01010101
123    bic     ip, ip, r2
124    ands    ip, ip, #0x80808080
125    bne     strcpy_zero_in_first_register
126    str     r2, [r0], #4
127
128strcpy_check_src_align:
129    // At this point dst is aligned to a double word, check if src
130    // is also aligned to a double word.
131    ands    r3, r1, #7
132    bne     strcpy_unaligned_copy
133
134    .p2align 2
135strcpy_mainloop:
136    ldrd    r2, r3, [r1], #8
137
138    pld     [r1, #64]
139
140    sub     ip, r2, #0x01010101
141    bic     ip, ip, r2
142    ands    ip, ip, #0x80808080
143    bne     strcpy_zero_in_first_register
144
145    sub     ip, r3, #0x01010101
146    bic     ip, ip, r3
147    ands    ip, ip, #0x80808080
148    bne     strcpy_zero_in_second_register
149
150    strd    r2, r3, [r0], #8
151    b       strcpy_mainloop
152
153strcpy_complete:
154    m_pop
155
156strcpy_zero_in_first_register:
157    lsls    lr, ip, #17
158    bne     strcpy_copy1byte
159    bcs     strcpy_copy2bytes
160    lsls    ip, ip, #1
161    bne     strcpy_copy3bytes
162
163strcpy_copy4bytes:
164    // Copy 4 bytes to the destiniation.
165    str     r2, [r0]
166    m_pop
167
168strcpy_copy1byte:
169    strb    r2, [r0]
170    m_pop
171
172strcpy_copy2bytes:
173    strh    r2, [r0]
174    m_pop
175
176strcpy_copy3bytes:
177    strh    r2, [r0], #2
178    lsr     r2, #16
179    strb    r2, [r0]
180    m_pop
181
182strcpy_zero_in_second_register:
183    lsls    lr, ip, #17
184    bne     strcpy_copy5bytes
185    bcs     strcpy_copy6bytes
186    lsls    ip, ip, #1
187    bne     strcpy_copy7bytes
188
189    // Copy 8 bytes to the destination.
190    strd    r2, r3, [r0]
191    m_pop
192
193strcpy_copy5bytes:
194    str     r2, [r0], #4
195    strb    r3, [r0]
196    m_pop
197
198strcpy_copy6bytes:
199    str     r2, [r0], #4
200    strh    r3, [r0]
201    m_pop
202
203strcpy_copy7bytes:
204    str     r2, [r0], #4
205    strh    r3, [r0], #2
206    lsr     r3, #16
207    strb    r3, [r0]
208    m_pop
209
210strcpy_unaligned_copy:
211    // Dst is aligned to a double word, while src is at an unknown alignment.
212    // There are 7 different versions of the unaligned copy code
213    // to prevent overreading the src. The mainloop of every single version
214    // will store 64 bits per loop. The difference is how much of src can
215    // be read without potentially crossing a page boundary.
216    tbb     [pc, r3]
217strcpy_unaligned_branchtable:
218    .byte 0
219    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
220    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
221    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
222    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
223    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
224    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
225    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
226
227    .p2align 2
228    // Can read 7 bytes before possibly crossing a page.
229strcpy_unalign7:
230    ldr     r2, [r1], #4
231
232    sub     ip, r2, #0x01010101
233    bic     ip, ip, r2
234    ands    ip, ip, #0x80808080
235    bne     strcpy_zero_in_first_register
236
237    ldrb    r3, [r1]
238    cbz     r3, strcpy_unalign7_copy5bytes
239    ldrb    r4, [r1, #1]
240    cbz     r4, strcpy_unalign7_copy6bytes
241    ldrb    r5, [r1, #2]
242    cbz     r5, strcpy_unalign7_copy7bytes
243
244    ldr     r3, [r1], #4
245    pld     [r1, #64]
246
247    lsrs    ip, r3, #24
248    strd    r2, r3, [r0], #8
249    beq     strcpy_unalign_return
250    b       strcpy_unalign7
251
252strcpy_unalign7_copy5bytes:
253    str     r2, [r0], #4
254    strb    r3, [r0]
255strcpy_unalign_return:
256    m_pop
257
258strcpy_unalign7_copy6bytes:
259    str     r2, [r0], #4
260    strb    r3, [r0], #1
261    strb    r4, [r0], #1
262    m_pop
263
264strcpy_unalign7_copy7bytes:
265    str     r2, [r0], #4
266    strb    r3, [r0], #1
267    strb    r4, [r0], #1
268    strb    r5, [r0], #1
269    m_pop
270
271    .p2align 2
272    // Can read 6 bytes before possibly crossing a page.
273strcpy_unalign6:
274    ldr     r2, [r1], #4
275
276    sub     ip, r2, #0x01010101
277    bic     ip, ip, r2
278    ands    ip, ip, #0x80808080
279    bne     strcpy_zero_in_first_register
280
281    ldrb    r4, [r1]
282    cbz     r4, strcpy_unalign_copy5bytes
283    ldrb    r5, [r1, #1]
284    cbz     r5, strcpy_unalign_copy6bytes
285
286    ldr     r3, [r1], #4
287    pld     [r1, #64]
288
289    tst     r3, #0xff0000
290    beq     strcpy_copy7bytes
291    lsrs    ip, r3, #24
292    strd    r2, r3, [r0], #8
293    beq     strcpy_unalign_return
294    b       strcpy_unalign6
295
296    .p2align 2
297    // Can read 5 bytes before possibly crossing a page.
298strcpy_unalign5:
299    ldr     r2, [r1], #4
300
301    sub     ip, r2, #0x01010101
302    bic     ip, ip, r2
303    ands    ip, ip, #0x80808080
304    bne     strcpy_zero_in_first_register
305
306    ldrb    r4, [r1]
307    cbz     r4, strcpy_unalign_copy5bytes
308
309    ldr     r3, [r1], #4
310
311    pld     [r1, #64]
312
313    sub     ip, r3, #0x01010101
314    bic     ip, ip, r3
315    ands    ip, ip, #0x80808080
316    bne     strcpy_zero_in_second_register
317
318    strd    r2, r3, [r0], #8
319    b       strcpy_unalign5
320
321strcpy_unalign_copy5bytes:
322    str     r2, [r0], #4
323    strb    r4, [r0]
324    m_pop
325
326strcpy_unalign_copy6bytes:
327    str     r2, [r0], #4
328    strb    r4, [r0], #1
329    strb    r5, [r0]
330    m_pop
331
332    .p2align 2
333    // Can read 4 bytes before possibly crossing a page.
334strcpy_unalign4:
335    ldr     r2, [r1], #4
336
337    sub     ip, r2, #0x01010101
338    bic     ip, ip, r2
339    ands    ip, ip, #0x80808080
340    bne     strcpy_zero_in_first_register
341
342    ldr     r3, [r1], #4
343    pld     [r1, #64]
344
345    sub     ip, r3, #0x01010101
346    bic     ip, ip, r3
347    ands    ip, ip, #0x80808080
348    bne     strcpy_zero_in_second_register
349
350    strd    r2, r3, [r0], #8
351    b       strcpy_unalign4
352
353    .p2align 2
354    // Can read 3 bytes before possibly crossing a page.
355strcpy_unalign3:
356    ldrb    r2, [r1]
357    cbz     r2, strcpy_unalign3_copy1byte
358    ldrb    r3, [r1, #1]
359    cbz     r3, strcpy_unalign3_copy2bytes
360    ldrb    r4, [r1, #2]
361    cbz     r4, strcpy_unalign3_copy3bytes
362
363    ldr     r2, [r1], #4
364    ldr     r3, [r1], #4
365
366    pld     [r1, #64]
367
368    lsrs    lr, r2, #24
369    beq     strcpy_copy4bytes
370
371    sub     ip, r3, #0x01010101
372    bic     ip, ip, r3
373    ands    ip, ip, #0x80808080
374    bne     strcpy_zero_in_second_register
375
376    strd    r2, r3, [r0], #8
377    b       strcpy_unalign3
378
379strcpy_unalign3_copy1byte:
380    strb    r2, [r0]
381    m_pop
382
383strcpy_unalign3_copy2bytes:
384    strb    r2, [r0], #1
385    strb    r3, [r0]
386    m_pop
387
388strcpy_unalign3_copy3bytes:
389    strb    r2, [r0], #1
390    strb    r3, [r0], #1
391    strb    r4, [r0]
392    m_pop
393
394    .p2align 2
395    // Can read 2 bytes before possibly crossing a page.
396strcpy_unalign2:
397    ldrb    r2, [r1]
398    cbz     r2, strcpy_unalign_copy1byte
399    ldrb    r4, [r1, #1]
400    cbz     r4, strcpy_unalign_copy2bytes
401
402    ldr     r2, [r1], #4
403    ldr     r3, [r1], #4
404    pld     [r1, #64]
405
406    tst     r2, #0xff0000
407    beq     strcpy_copy3bytes
408    lsrs    ip, r2, #24
409    beq     strcpy_copy4bytes
410
411    sub     ip, r3, #0x01010101
412    bic     ip, ip, r3
413    ands    ip, ip, #0x80808080
414    bne     strcpy_zero_in_second_register
415
416    strd    r2, r3, [r0], #8
417    b       strcpy_unalign2
418
419    .p2align 2
420    // Can read 1 byte before possibly crossing a page.
421strcpy_unalign1:
422    ldrb    r2, [r1]
423    cbz     r2, strcpy_unalign_copy1byte
424
425    ldr     r2, [r1], #4
426    ldr     r3, [r1], #4
427
428    pld     [r1, #64]
429
430    sub     ip, r2, #0x01010101
431    bic     ip, ip, r2
432    ands    ip, ip, #0x80808080
433    bne     strcpy_zero_in_first_register
434
435    sub     ip, r3, #0x01010101
436    bic     ip, ip, r3
437    ands    ip, ip, #0x80808080
438    bne     strcpy_zero_in_second_register
439
440    strd    r2, r3, [r0], #8
441    b       strcpy_unalign1
442
443strcpy_unalign_copy1byte:
444    strb    r2, [r0]
445    m_pop
446
447strcpy_unalign_copy2bytes:
448    strb    r2, [r0], #1
449    strb    r4, [r0]
450    m_pop
451END(strcpy)
452