1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 *    products derived from this software without specific prior written
42 *    permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#include <private/bionic_asm.h>
57
58    .syntax unified
59
60    .thumb
61    .thumb_func
62
63    .macro m_push
64    push    {r0, r4, r5, lr}
65    .endm // m_push
66
67    .macro m_pop
68    pop     {r0, r4, r5, pc}
69    .endm // m_pop
70
71    .macro m_scan_byte
72    ldrb    r3, [r0]
73    cbz     r3, .L_strcat_r0_scan_done
74    add     r0, #1
75    .endm // m_scan_byte
76
77    .macro m_copy_byte reg, cmd, label
78    ldrb    \reg, [r1], #1
79    strb    \reg, [r0], #1
80    \cmd    \reg, \label
81    .endm // m_copy_byte
82
83ENTRY(strcat)
84    // Quick check to see if src is empty.
85    ldrb    r2, [r1]
86    pld     [r1, #0]
87    cbnz    r2, .L_strcat_continue
88    bx      lr
89
90.L_strcat_continue:
91    // To speed up really small dst strings, unroll checking the first 4 bytes.
92    m_push
93    m_scan_byte
94    m_scan_byte
95    m_scan_byte
96    m_scan_byte
97
98    ands    r3, r0, #7
99    beq     .L_strcat_mainloop
100
101    // Align to a double word (64 bits).
102    rsb     r3, r3, #8
103    lsls    ip, r3, #31
104    beq     .L_strcat_align_to_32
105
106    ldrb    r5, [r0]
107    cbz     r5, .L_strcat_r0_scan_done
108    add     r0, r0, #1
109
110.L_strcat_align_to_32:
111    bcc     .L_strcat_align_to_64
112
113    ldrb    r2, [r0]
114    cbz     r2, .L_strcat_r0_scan_done
115    add     r0, r0, #1
116    ldrb    r4, [r0]
117    cbz     r4, .L_strcat_r0_scan_done
118    add     r0, r0, #1
119
120.L_strcat_align_to_64:
121    tst     r3, #4
122    beq     .L_strcat_mainloop
123    ldr     r3, [r0], #4
124
125    sub     ip, r3, #0x01010101
126    bic     ip, ip, r3
127    ands    ip, ip, #0x80808080
128    bne     .L_strcat_zero_in_second_register
129    b       .L_strcat_mainloop
130
131.L_strcat_r0_scan_done:
132    // For short copies, hard-code checking the first 8 bytes since this
133    // new code doesn't win until after about 8 bytes.
134    m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
135    m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
136    m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
137    m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
138    m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
139    m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
140    m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
141    m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
142
143.L_strcpy_finish:
144    m_pop
145
146.L_strcpy_continue:
147    ands    r3, r0, #7
148    beq     .L_strcpy_check_src_align
149
150    // Align to a double word (64 bits).
151    rsb     r3, r3, #8
152    lsls    ip, r3, #31
153    beq     .L_strcpy_align_to_32
154
155    ldrb    r2, [r1], #1
156    strb    r2, [r0], #1
157    cbz     r2, .L_strcpy_complete
158
159.L_strcpy_align_to_32:
160    bcc     .L_strcpy_align_to_64
161
162    ldrb    r2, [r1], #1
163    strb    r2, [r0], #1
164    cbz     r2, .L_strcpy_complete
165    ldrb    r2, [r1], #1
166    strb    r2, [r0], #1
167    cbz     r2, .L_strcpy_complete
168
169.L_strcpy_align_to_64:
170    tst     r3, #4
171    beq     .L_strcpy_check_src_align
172    // Read one byte at a time since we don't know the src alignment
173    // and we don't want to read into a different page.
174    ldrb    r2, [r1], #1
175    strb    r2, [r0], #1
176    cbz     r2, .L_strcpy_complete
177    ldrb    r2, [r1], #1
178    strb    r2, [r0], #1
179    cbz     r2, .L_strcpy_complete
180    ldrb    r2, [r1], #1
181    strb    r2, [r0], #1
182    cbz     r2, .L_strcpy_complete
183    ldrb    r2, [r1], #1
184    strb    r2, [r0], #1
185    cbz     r2, .L_strcpy_complete
186
187.L_strcpy_check_src_align:
188    // At this point dst is aligned to a double word, check if src
189    // is also aligned to a double word.
190    ands    r3, r1, #7
191    bne     .L_strcpy_unaligned_copy
192
193    .p2align 2
194.L_strcpy_mainloop:
195    ldrd    r2, r3, [r1], #8
196
197    pld     [r1, #64]
198
199    sub     ip, r2, #0x01010101
200    bic     ip, ip, r2
201    ands    ip, ip, #0x80808080
202    bne     .L_strcpy_zero_in_first_register
203
204    sub     ip, r3, #0x01010101
205    bic     ip, ip, r3
206    ands    ip, ip, #0x80808080
207    bne     .L_strcpy_zero_in_second_register
208
209    strd    r2, r3, [r0], #8
210    b       .L_strcpy_mainloop
211
212.L_strcpy_complete:
213    m_pop
214
215.L_strcpy_zero_in_first_register:
216    lsls    lr, ip, #17
217    bne     .L_strcpy_copy1byte
218    bcs     .L_strcpy_copy2bytes
219    lsls    ip, ip, #1
220    bne     .L_strcpy_copy3bytes
221
222.L_strcpy_copy4bytes:
223    // Copy 4 bytes to the destiniation.
224    str     r2, [r0]
225    m_pop
226
227.L_strcpy_copy1byte:
228    strb    r2, [r0]
229    m_pop
230
231.L_strcpy_copy2bytes:
232    strh    r2, [r0]
233    m_pop
234
235.L_strcpy_copy3bytes:
236    strh    r2, [r0], #2
237    lsr     r2, #16
238    strb    r2, [r0]
239    m_pop
240
241.L_strcpy_zero_in_second_register:
242    lsls    lr, ip, #17
243    bne     .L_strcpy_copy5bytes
244    bcs     .L_strcpy_copy6bytes
245    lsls    ip, ip, #1
246    bne     .L_strcpy_copy7bytes
247
248    // Copy 8 bytes to the destination.
249    strd    r2, r3, [r0]
250    m_pop
251
252.L_strcpy_copy5bytes:
253    str     r2, [r0], #4
254    strb    r3, [r0]
255    m_pop
256
257.L_strcpy_copy6bytes:
258    str     r2, [r0], #4
259    strh    r3, [r0]
260    m_pop
261
262.L_strcpy_copy7bytes:
263    str     r2, [r0], #4
264    strh    r3, [r0], #2
265    lsr     r3, #16
266    strb    r3, [r0]
267    m_pop
268
269.L_strcpy_unaligned_copy:
270    // Dst is aligned to a double word, while src is at an unknown alignment.
271    // There are 7 different versions of the unaligned copy code
272    // to prevent overreading the src. The mainloop of every single version
273    // will store 64 bits per loop. The difference is how much of src can
274    // be read without potentially crossing a page boundary.
275    tbb     [pc, r3]
276.L_strcpy_unaligned_branchtable:
277    .byte 0
278    .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
279    .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
280    .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
281    .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
282    .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
283    .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
284    .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
285
286    .p2align 2
287    // Can read 7 bytes before possibly crossing a page.
288.L_strcpy_unalign7:
289    ldr     r2, [r1], #4
290
291    sub     ip, r2, #0x01010101
292    bic     ip, ip, r2
293    ands    ip, ip, #0x80808080
294    bne     .L_strcpy_zero_in_first_register
295
296    ldrb    r3, [r1]
297    cbz     r3, .L_strcpy_unalign7_copy5bytes
298    ldrb    r4, [r1, #1]
299    cbz     r4, .L_strcpy_unalign7_copy6bytes
300    ldrb    r5, [r1, #2]
301    cbz     r5, .L_strcpy_unalign7_copy7bytes
302
303    ldr     r3, [r1], #4
304    pld     [r1, #64]
305
306    lsrs    ip, r3, #24
307    strd    r2, r3, [r0], #8
308    beq     .L_strcpy_unalign_return
309    b       .L_strcpy_unalign7
310
311.L_strcpy_unalign7_copy5bytes:
312    str     r2, [r0], #4
313    strb    r3, [r0]
314.L_strcpy_unalign_return:
315    m_pop
316
317.L_strcpy_unalign7_copy6bytes:
318    str     r2, [r0], #4
319    strb    r3, [r0], #1
320    strb    r4, [r0], #1
321    m_pop
322
323.L_strcpy_unalign7_copy7bytes:
324    str     r2, [r0], #4
325    strb    r3, [r0], #1
326    strb    r4, [r0], #1
327    strb    r5, [r0], #1
328    m_pop
329
330    .p2align 2
331    // Can read 6 bytes before possibly crossing a page.
332.L_strcpy_unalign6:
333    ldr     r2, [r1], #4
334
335    sub     ip, r2, #0x01010101
336    bic     ip, ip, r2
337    ands    ip, ip, #0x80808080
338    bne     .L_strcpy_zero_in_first_register
339
340    ldrb    r4, [r1]
341    cbz     r4, .L_strcpy_unalign_copy5bytes
342    ldrb    r5, [r1, #1]
343    cbz     r5, .L_strcpy_unalign_copy6bytes
344
345    ldr     r3, [r1], #4
346    pld     [r1, #64]
347
348    tst     r3, #0xff0000
349    beq     .L_strcpy_copy7bytes
350    lsrs    ip, r3, #24
351    strd    r2, r3, [r0], #8
352    beq     .L_strcpy_unalign_return
353    b       .L_strcpy_unalign6
354
355    .p2align 2
356    // Can read 5 bytes before possibly crossing a page.
357.L_strcpy_unalign5:
358    ldr     r2, [r1], #4
359
360    sub     ip, r2, #0x01010101
361    bic     ip, ip, r2
362    ands    ip, ip, #0x80808080
363    bne     .L_strcpy_zero_in_first_register
364
365    ldrb    r4, [r1]
366    cbz     r4, .L_strcpy_unalign_copy5bytes
367
368    ldr     r3, [r1], #4
369
370    pld     [r1, #64]
371
372    sub     ip, r3, #0x01010101
373    bic     ip, ip, r3
374    ands    ip, ip, #0x80808080
375    bne     .L_strcpy_zero_in_second_register
376
377    strd    r2, r3, [r0], #8
378    b       .L_strcpy_unalign5
379
380.L_strcpy_unalign_copy5bytes:
381    str     r2, [r0], #4
382    strb    r4, [r0]
383    m_pop
384
385.L_strcpy_unalign_copy6bytes:
386    str     r2, [r0], #4
387    strb    r4, [r0], #1
388    strb    r5, [r0]
389    m_pop
390
391    .p2align 2
392    // Can read 4 bytes before possibly crossing a page.
393.L_strcpy_unalign4:
394    ldr     r2, [r1], #4
395
396    sub     ip, r2, #0x01010101
397    bic     ip, ip, r2
398    ands    ip, ip, #0x80808080
399    bne     .L_strcpy_zero_in_first_register
400
401    ldr     r3, [r1], #4
402    pld     [r1, #64]
403
404    sub     ip, r3, #0x01010101
405    bic     ip, ip, r3
406    ands    ip, ip, #0x80808080
407    bne     .L_strcpy_zero_in_second_register
408
409    strd    r2, r3, [r0], #8
410    b       .L_strcpy_unalign4
411
412    .p2align 2
413    // Can read 3 bytes before possibly crossing a page.
414.L_strcpy_unalign3:
415    ldrb    r2, [r1]
416    cbz     r2, .L_strcpy_unalign3_copy1byte
417    ldrb    r3, [r1, #1]
418    cbz     r3, .L_strcpy_unalign3_copy2bytes
419    ldrb    r4, [r1, #2]
420    cbz     r4, .L_strcpy_unalign3_copy3bytes
421
422    ldr     r2, [r1], #4
423    ldr     r3, [r1], #4
424
425    pld     [r1, #64]
426
427    lsrs    lr, r2, #24
428    beq     .L_strcpy_copy4bytes
429
430    sub     ip, r3, #0x01010101
431    bic     ip, ip, r3
432    ands    ip, ip, #0x80808080
433    bne     .L_strcpy_zero_in_second_register
434
435    strd    r2, r3, [r0], #8
436    b       .L_strcpy_unalign3
437
438.L_strcpy_unalign3_copy1byte:
439    strb    r2, [r0]
440    m_pop
441
442.L_strcpy_unalign3_copy2bytes:
443    strb    r2, [r0], #1
444    strb    r3, [r0]
445    m_pop
446
447.L_strcpy_unalign3_copy3bytes:
448    strb    r2, [r0], #1
449    strb    r3, [r0], #1
450    strb    r4, [r0]
451    m_pop
452
453    .p2align 2
454    // Can read 2 bytes before possibly crossing a page.
455.L_strcpy_unalign2:
456    ldrb    r2, [r1]
457    cbz     r2, .L_strcpy_unalign_copy1byte
458    ldrb    r4, [r1, #1]
459    cbz     r4, .L_strcpy_unalign_copy2bytes
460
461    ldr     r2, [r1], #4
462    ldr     r3, [r1], #4
463    pld     [r1, #64]
464
465    tst     r2, #0xff0000
466    beq     .L_strcpy_copy3bytes
467    lsrs    ip, r2, #24
468    beq     .L_strcpy_copy4bytes
469
470    sub     ip, r3, #0x01010101
471    bic     ip, ip, r3
472    ands    ip, ip, #0x80808080
473    bne     .L_strcpy_zero_in_second_register
474
475    strd    r2, r3, [r0], #8
476    b       .L_strcpy_unalign2
477
478    .p2align 2
479    // Can read 1 byte before possibly crossing a page.
480.L_strcpy_unalign1:
481    ldrb    r2, [r1]
482    cbz     r2, .L_strcpy_unalign_copy1byte
483
484    ldr     r2, [r1], #4
485    ldr     r3, [r1], #4
486
487    pld     [r1, #64]
488
489    sub     ip, r2, #0x01010101
490    bic     ip, ip, r2
491    ands    ip, ip, #0x80808080
492    bne     .L_strcpy_zero_in_first_register
493
494    sub     ip, r3, #0x01010101
495    bic     ip, ip, r3
496    ands    ip, ip, #0x80808080
497    bne     .L_strcpy_zero_in_second_register
498
499    strd    r2, r3, [r0], #8
500    b       .L_strcpy_unalign1
501
502.L_strcpy_unalign_copy1byte:
503    strb    r2, [r0]
504    m_pop
505
506.L_strcpy_unalign_copy2bytes:
507    strb    r2, [r0], #1
508    strb    r4, [r0]
509    m_pop
510
511    .p2align 2
512.L_strcat_mainloop:
513    ldrd    r2, r3, [r0], #8
514
515    pld     [r0, #64]
516
517    sub     ip, r2, #0x01010101
518    bic     ip, ip, r2
519    ands    ip, ip, #0x80808080
520    bne     .L_strcat_zero_in_first_register
521
522    sub     ip, r3, #0x01010101
523    bic     ip, ip, r3
524    ands    ip, ip, #0x80808080
525    bne     .L_strcat_zero_in_second_register
526    b       .L_strcat_mainloop
527
528.L_strcat_zero_in_first_register:
529    // Prefetch the src now, it's going to be used soon.
530    pld     [r1, #0]
531    lsls    lr, ip, #17
532    bne     .L_strcat_sub8
533    bcs     .L_strcat_sub7
534    lsls    ip, ip, #1
535    bne     .L_strcat_sub6
536
537    sub     r0, r0, #5
538    b       .L_strcat_r0_scan_done
539
540.L_strcat_sub8:
541    sub     r0, r0, #8
542    b       .L_strcat_r0_scan_done
543
544.L_strcat_sub7:
545    sub     r0, r0, #7
546    b       .L_strcat_r0_scan_done
547
548.L_strcat_sub6:
549    sub     r0, r0, #6
550    b       .L_strcat_r0_scan_done
551
552.L_strcat_zero_in_second_register:
553    // Prefetch the src now, it's going to be used soon.
554    pld     [r1, #0]
555    lsls    lr, ip, #17
556    bne     .L_strcat_sub4
557    bcs     .L_strcat_sub3
558    lsls    ip, ip, #1
559    bne     .L_strcat_sub2
560
561    sub     r0, r0, #1
562    b       .L_strcat_r0_scan_done
563
564.L_strcat_sub4:
565    sub     r0, r0, #4
566    b       .L_strcat_r0_scan_done
567
568.L_strcat_sub3:
569    sub     r0, r0, #3
570    b       .L_strcat_r0_scan_done
571
572.L_strcat_sub2:
573    sub     r0, r0, #2
574    b       .L_strcat_r0_scan_done
575END(strcat)
576