strcat.S revision 851e68a2402fa414544e66650e09dfdaac813e51
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 *    products derived from this software without specific prior written
42 *    permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#include <private/bionic_asm.h>
57
58    .syntax unified
59
60    .thumb
61    .thumb_func
62
63    .macro m_push
64    push    {r0, r4, r5, lr}
65    .endm // m_push
66
67    .macro m_ret inst
68    \inst   {r0, r4, r5, pc}
69    .endm // m_ret
70
71    .macro m_scan_byte
72    ldrb    r3, [r0]
73    cbz     r3, strcat_r0_scan_done
74    add     r0, #1
75    .endm // m_scan_byte
76
77    .macro m_copy_byte reg, cmd, label
78    ldrb    \reg, [r1], #1
79    strb    \reg, [r0], #1
80    \cmd    \reg, \label
81    .endm // m_copy_byte
82
83ENTRY(strcat)
84    // Quick check to see if src is empty.
85    ldrb        r2, [r1]
86    pld         [r1, #0]
87    cbnz        r2, strcat_continue
88    bx          lr
89
90strcat_continue:
91    // To speed up really small dst strings, unroll checking the first 4 bytes.
92    m_push
93    m_scan_byte
94    m_scan_byte
95    m_scan_byte
96    m_scan_byte
97
98    ands    r3, r0, #7
99    bne     strcat_align_src
100
101    .p2align 2
102strcat_mainloop:
103    ldmia   r0!, {r2, r3}
104
105    pld     [r0, #64]
106
107    sub     ip, r2, #0x01010101
108    bic     ip, ip, r2
109    ands    ip, ip, #0x80808080
110    bne     strcat_zero_in_first_register
111
112    sub     ip, r3, #0x01010101
113    bic     ip, ip, r3
114    ands    ip, ip, #0x80808080
115    bne     strcat_zero_in_second_register
116    b       strcat_mainloop
117
118strcat_zero_in_first_register:
119    sub     r0, r0, #4
120
121strcat_zero_in_second_register:
122    // Check for zero in byte 0.
123    tst     ip, #0x80
124    it      ne
125    subne   r0, r0, #4
126    bne     strcat_r0_scan_done
127    // Check for zero in byte 1.
128    tst     ip, #0x8000
129    it      ne
130    subne   r0, r0, #3
131    bne     strcat_r0_scan_done
132    // Check for zero in byte 2.
133    tst     ip, #0x800000
134    it      ne
135    subne   r0, r0, #2
136    it      eq
137    // Zero is in byte 3.
138    subeq   r0, r0, #1
139
140strcat_r0_scan_done:
141    // Unroll the first 8 bytes that will be copied.
142    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
143    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
144    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
145    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
146    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
147    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
148    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
149    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
150
151strcpy_finish:
152    m_ret   inst=pop
153
154strcpy_continue:
155    pld     [r1, #0]
156    ands    r3, r0, #7
157    bne     strcpy_align_dst
158
159strcpy_check_src_align:
160    // At this point dst is aligned to a double word, check if src
161    // is also aligned to a double word.
162    ands    r3, r1, #7
163    bne     strcpy_unaligned_copy
164
165    .p2align 2
166strcpy_mainloop:
167    ldmia   r1!, {r2, r3}
168
169    pld     [r1, #64]
170
171    sub     ip, r2, #0x01010101
172    bic     ip, ip, r2
173    ands    ip, ip, #0x80808080
174    bne     strcpy_zero_in_first_register
175
176    sub     ip, r3, #0x01010101
177    bic     ip, ip, r3
178    ands    ip, ip, #0x80808080
179    bne     strcpy_zero_in_second_register
180
181    stmia   r0!, {r2, r3}
182    b       strcpy_mainloop
183
184strcpy_zero_in_first_register:
185    lsls    lr, ip, #17
186    itt     ne
187    strbne  r2, [r0]
188    m_ret   inst=popne
189    itt     cs
190    strhcs  r2, [r0]
191    m_ret   inst=popcs
192    lsls    ip, ip, #1
193    itt     eq
194    streq   r2, [r0]
195    m_ret   inst=popeq
196    strh    r2, [r0], #2
197    lsr     r3, r2, #16
198    strb    r3, [r0]
199    m_ret   inst=pop
200
201strcpy_zero_in_second_register:
202    lsls    lr, ip, #17
203    ittt    ne
204    stmiane r0!, {r2}
205    strbne  r3, [r0]
206    m_ret   inst=popne
207    ittt    cs
208    strcs   r2, [r0], #4
209    strhcs  r3, [r0]
210    m_ret   inst=popcs
211    lsls    ip, ip, #1
212    itt     eq
213    stmiaeq r0, {r2, r3}
214    m_ret   inst=popeq
215    stmia   r0!, {r2}
216    strh    r3, [r0], #2
217    lsr     r4, r3, #16
218    strb    r4, [r0]
219    m_ret   inst=pop
220
221strcpy_align_dst:
222    // Align to a double word (64 bits).
223    rsb     r3, r3, #8
224    lsls    ip, r3, #31
225    beq     strcpy_align_to_32
226
227    ldrb    r2, [r1], #1
228    strb    r2, [r0], #1
229    cbz     r2, strcpy_complete
230
231strcpy_align_to_32:
232    bcc     strcpy_align_to_64
233
234    ldrb    r4, [r1], #1
235    strb    r4, [r0], #1
236    cmp     r4, #0
237    it      eq
238    m_ret   inst=popeq
239    ldrb    r5, [r1], #1
240    strb    r5, [r0], #1
241    cmp     r5, #0
242    it      eq
243    m_ret   inst=popeq
244
245strcpy_align_to_64:
246    tst     r3, #4
247    beq     strcpy_check_src_align
248    ldr     r2, [r1], #4
249
250    sub     ip, r2, #0x01010101
251    bic     ip, ip, r2
252    ands    ip, ip, #0x80808080
253    bne     strcpy_zero_in_first_register
254    stmia   r0!, {r2}
255    b       strcpy_check_src_align
256
257strcpy_complete:
258    m_ret   inst=pop
259
260strcpy_unaligned_copy:
261    // Dst is aligned to a double word, while src is at an unknown alignment.
262    // There are 7 different versions of the unaligned copy code
263    // to prevent overreading the src. The mainloop of every single version
264    // will store 64 bits per loop. The difference is how much of src can
265    // be read without potentially crossing a page boundary.
266    tbb     [pc, r3]
267strcpy_unaligned_branchtable:
268    .byte 0
269    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
270    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
271    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
272    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
273    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
274    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
275    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
276
277    .p2align 2
278    // Can read 7 bytes before possibly crossing a page.
279strcpy_unalign7:
280    ldr     r2, [r1], #4
281
282    sub     ip, r2, #0x01010101
283    bic     ip, ip, r2
284    ands    ip, ip, #0x80808080
285    bne     strcpy_zero_in_first_register
286
287    ldrb    r3, [r1]
288    cbz     r3, strcpy_unalign7_copy5bytes
289    ldrb    r4, [r1, #1]
290    cbz     r4, strcpy_unalign7_copy6bytes
291    ldrb    r5, [r1, #2]
292    cbz     r5, strcpy_unalign7_copy7bytes
293
294    ldr     r3, [r1], #4
295    pld     [r1, #64]
296
297    lsrs    ip, r3, #24
298    stmia   r0!, {r2, r3}
299    beq     strcpy_unalign_return
300    b       strcpy_unalign7
301
302strcpy_unalign7_copy5bytes:
303    stmia   r0!, {r2}
304    strb    r3, [r0]
305strcpy_unalign_return:
306    m_ret   inst=pop
307
308strcpy_unalign7_copy6bytes:
309    stmia   r0!, {r2}
310    strb    r3, [r0], #1
311    strb    r4, [r0], #1
312    m_ret   inst=pop
313
314strcpy_unalign7_copy7bytes:
315    stmia   r0!, {r2}
316    strb    r3, [r0], #1
317    strb    r4, [r0], #1
318    strb    r5, [r0], #1
319    m_ret   inst=pop
320
321    .p2align 2
322    // Can read 6 bytes before possibly crossing a page.
323strcpy_unalign6:
324    ldr     r2, [r1], #4
325
326    sub     ip, r2, #0x01010101
327    bic     ip, ip, r2
328    ands    ip, ip, #0x80808080
329    bne     strcpy_zero_in_first_register
330
331    ldrb    r4, [r1]
332    cbz     r4, strcpy_unalign_copy5bytes
333    ldrb    r5, [r1, #1]
334    cbz     r5, strcpy_unalign_copy6bytes
335
336    ldr     r3, [r1], #4
337    pld     [r1, #64]
338
339    tst     r3, #0xff0000
340    beq     strcpy_unalign6_copy7bytes
341    lsrs    ip, r3, #24
342    stmia   r0!, {r2, r3}
343    beq     strcpy_unalign_return
344    b       strcpy_unalign6
345
346strcpy_unalign6_copy7bytes:
347    stmia   r0!, {r2}
348    strh    r3, [r0], #2
349    lsr     r3, #16
350    strb    r3, [r0]
351    m_ret   inst=pop
352
353    .p2align 2
354    // Can read 5 bytes before possibly crossing a page.
355strcpy_unalign5:
356    ldr     r2, [r1], #4
357
358    sub     ip, r2, #0x01010101
359    bic     ip, ip, r2
360    ands    ip, ip, #0x80808080
361    bne     strcpy_zero_in_first_register
362
363    ldrb    r4, [r1]
364    cbz     r4, strcpy_unalign_copy5bytes
365
366    ldr     r3, [r1], #4
367
368    pld     [r1, #64]
369
370    sub     ip, r3, #0x01010101
371    bic     ip, ip, r3
372    ands    ip, ip, #0x80808080
373    bne     strcpy_zero_in_second_register
374
375    stmia   r0!, {r2, r3}
376    b       strcpy_unalign5
377
378strcpy_unalign_copy5bytes:
379    stmia   r0!, {r2}
380    strb    r4, [r0]
381    m_ret   inst=pop
382
383strcpy_unalign_copy6bytes:
384    stmia   r0!, {r2}
385    strb    r4, [r0], #1
386    strb    r5, [r0]
387    m_ret   inst=pop
388
389    .p2align 2
390    // Can read 4 bytes before possibly crossing a page.
391strcpy_unalign4:
392    ldmia   r1!, {r2}
393
394    sub     ip, r2, #0x01010101
395    bic     ip, ip, r2
396    ands    ip, ip, #0x80808080
397    bne     strcpy_zero_in_first_register
398
399    ldmia   r1!, {r3}
400    pld     [r1, #64]
401
402    sub     ip, r3, #0x01010101
403    bic     ip, ip, r3
404    ands    ip, ip, #0x80808080
405    bne     strcpy_zero_in_second_register
406
407    stmia   r0!, {r2, r3}
408    b       strcpy_unalign4
409
410    .p2align 2
411    // Can read 3 bytes before possibly crossing a page.
412strcpy_unalign3:
413    ldrb    r2, [r1]
414    cbz     r2, strcpy_unalign3_copy1byte
415    ldrb    r3, [r1, #1]
416    cbz     r3, strcpy_unalign3_copy2bytes
417    ldrb    r4, [r1, #2]
418    cbz     r4, strcpy_unalign3_copy3bytes
419
420    ldr     r2, [r1], #4
421    ldr     r3, [r1], #4
422
423    pld     [r1, #64]
424
425    lsrs    lr, r2, #24
426    beq     strcpy_unalign_copy4bytes
427
428    sub     ip, r3, #0x01010101
429    bic     ip, ip, r3
430    ands    ip, ip, #0x80808080
431    bne     strcpy_zero_in_second_register
432
433    stmia   r0!, {r2, r3}
434    b       strcpy_unalign3
435
436strcpy_unalign3_copy1byte:
437    strb    r2, [r0]
438    m_ret   inst=pop
439
440strcpy_unalign3_copy2bytes:
441    strb    r2, [r0], #1
442    strb    r3, [r0]
443    m_ret   inst=pop
444
445strcpy_unalign3_copy3bytes:
446    strb    r2, [r0], #1
447    strb    r3, [r0], #1
448    strb    r4, [r0]
449    m_ret   inst=pop
450
451    .p2align 2
452    // Can read 2 bytes before possibly crossing a page.
453strcpy_unalign2:
454    ldrb    r2, [r1]
455    cbz     r2, strcpy_unalign_copy1byte
456    ldrb    r3, [r1, #1]
457    cbz     r3, strcpy_unalign_copy2bytes
458
459    ldr     r2, [r1], #4
460    ldr     r3, [r1], #4
461    pld     [r1, #64]
462
463    tst     r2, #0xff0000
464    beq     strcpy_unalign_copy3bytes
465    lsrs    ip, r2, #24
466    beq     strcpy_unalign_copy4bytes
467
468    sub     ip, r3, #0x01010101
469    bic     ip, ip, r3
470    ands    ip, ip, #0x80808080
471    bne     strcpy_zero_in_second_register
472
473    stmia   r0!, {r2, r3}
474    b       strcpy_unalign2
475
476    .p2align 2
477    // Can read 1 byte before possibly crossing a page.
478strcpy_unalign1:
479    ldrb    r2, [r1]
480    cbz     r2, strcpy_unalign_copy1byte
481
482    ldr     r2, [r1], #4
483    ldr     r3, [r1], #4
484
485    pld     [r1, #64]
486
487    sub     ip, r2, #0x01010101
488    bic     ip, ip, r2
489    ands    ip, ip, #0x80808080
490    bne     strcpy_zero_in_first_register
491
492    sub     ip, r3, #0x01010101
493    bic     ip, ip, r3
494    ands    ip, ip, #0x80808080
495    bne     strcpy_zero_in_second_register
496
497    stmia   r0!, {r2, r3}
498    b       strcpy_unalign1
499
500strcpy_unalign_copy1byte:
501    strb    r2, [r0]
502    m_ret   inst=pop
503
504strcpy_unalign_copy2bytes:
505    strb    r2, [r0], #1
506    strb    r3, [r0]
507    m_ret   inst=pop
508
509strcpy_unalign_copy3bytes:
510    strh    r2, [r0], #2
511    lsr     r2, #16
512    strb    r2, [r0]
513    m_ret   inst=pop
514
515strcpy_unalign_copy4bytes:
516    stmia   r0, {r2}
517    m_ret   inst=pop
518
519strcat_align_src:
520    // Align to a double word (64 bits).
521    rsb     r3, r3, #8
522    lsls    ip, r3, #31
523    beq     strcat_align_to_32
524    ldrb    r2, [r0], #1
525    cbz     r2, strcat_r0_update
526
527strcat_align_to_32:
528    bcc     strcat_align_to_64
529    ldrb    r2, [r0], #1
530    cbz     r2, strcat_r0_update
531    ldrb    r2, [r0], #1
532    cbz     r2, strcat_r0_update
533
534strcat_align_to_64:
535    tst     r3, #4
536    beq     strcat_mainloop
537    ldr     r3, [r0], #4
538
539    sub     ip, r3, #0x01010101
540    bic     ip, ip, r3
541    ands    ip, ip, #0x80808080
542    bne     strcat_zero_in_second_register
543    b       strcat_mainloop
544
545strcat_r0_update:
546    sub     r0, r0, #1
547    b strcat_r0_scan_done
548END(strcat)
549