1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30   Copyright (c) 2014, Linaro Limited
31   All rights reserved.
32
33   Redistribution and use in source and binary forms, with or without
34   modification, are permitted provided that the following conditions are met:
35       * Redistributions of source code must retain the above copyright
36         notice, this list of conditions and the following disclaimer.
37       * Redistributions in binary form must reproduce the above copyright
38         notice, this list of conditions and the following disclaimer in the
39         documentation and/or other materials provided with the distribution.
40       * Neither the name of the Linaro nor the
41         names of its contributors may be used to endorse or promote products
42         derived from this software without specific prior written permission.
43
44   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
47   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
48   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
50   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
54   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55*/
56
57/* Assumptions:
58 *
59 * ARMv8-a, AArch64
60 */
61
62#if !defined(STPCPY) && !defined(STRCPY)
63#error "Either STPCPY or STRCPY must be defined."
64#endif
65
66#include <private/bionic_asm.h>
67
68/* Arguments and results.  */
69#if defined(STPCPY)
70#define dst         x0
71#elif defined(STRCPY)
72#define dstin       x0
73#endif
74#define src         x1
75
76/* Locals and temporaries.  */
77#if defined(STRCPY)
78#define dst         x2
79#endif
80#define data1       x3
81#define data1_w     w3
82#define data2       x4
83#define data2_w     w4
84#define has_nul1    x5
85#define has_nul1_w  w5
86#define has_nul2    x6
87#define tmp1        x7
88#define tmp2        x8
89#define tmp3        x9
90#define tmp4        x10
91#define zeroones    x11
92#define zeroones_w  w11
93#define pos         x12
94
95#define REP8_01 0x0101010101010101
96#define REP8_7f 0x7f7f7f7f7f7f7f7f
97#define REP8_80 0x8080808080808080
98
99#if defined(STPCPY)
100ENTRY(stpcpy)
101#elif defined(STRCPY)
102ENTRY(strcpy)
103#endif
104    mov     zeroones, #REP8_01
105#if defined(STRCPY)
106    mov     dst, dstin
107#endif
108    ands    tmp1, src, #15
109    b.ne    .Lmisaligned
110    // NUL detection works on the principle that (X - 1) & (~X) & 0x80
111    // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
112    // can be done in parallel across the entire word.
113    // The inner loop deals with two Dwords at a time.  This has a
114    // slightly higher start-up cost, but we should win quite quickly,
115    // especially on cores with a high number of issue slots per
116    // cycle, as we get much better parallelism out of the operations.
117.Lloop:
118    ldp     data1, data2, [src], #16
119    sub     tmp1, data1, zeroones
120    orr     tmp2, data1, #REP8_7f
121    bic     has_nul1, tmp1, tmp2
122    cbnz    has_nul1, .Lnul_in_data1
123    sub     tmp3, data2, zeroones
124    orr     tmp4, data2, #REP8_7f
125    bic     has_nul2, tmp3, tmp4
126    cbnz    has_nul2, .Lnul_in_data2
127    // No NUL in either register, copy it in a single instruction.
128    stp     data1, data2, [dst], #16
129    b       .Lloop
130
131.Lnul_in_data1:
132    rev     has_nul1, has_nul1
133    clz     pos, has_nul1
134    add     tmp1, pos, #0x8
135
136    tbz     tmp1, #6, 1f
137#if defined(STPCPY)
138    str     data1, [dst], #7
139#elif defined(STRCPY)
140    str     data1, [dst]
141#endif
142    ret
1431:
144    tbz     tmp1, #5, 1f
145    str     data1_w, [dst], #4
146    lsr     data1, data1, #32
1471:
148    tbz     tmp1, #4, 1f
149    strh    data1_w, [dst], #2
150    lsr     data1, data1, #16
1511:
152    tbz     tmp1, #3, 1f
153    strb    data1_w, [dst]
154#if defined(STPCPY)
155    ret
156#endif
1571:
158#if defined(STPCPY)
159    // Back up one so that dst points to the '\0' string terminator.
160    sub     dst, dst, #1
161#endif
162    ret
163
164.Lnul_in_data2:
165    str     data1, [dst], #8
166    rev     has_nul2, has_nul2
167    clz     pos, has_nul2
168    add     tmp1, pos, #0x8
169
170    tbz     tmp1, #6, 1f
171#if defined(STPCPY)
172    str     data2, [dst], #7
173#elif defined(STRCPY)
174    str     data2, [dst]
175#endif
176    ret
1771:
178    tbz     tmp1, #5, 1f
179    str     data2_w, [dst], #4
180    lsr     data2, data2, #32
1811:
182    tbz     tmp1, #4, 1f
183    strh    data2_w, [dst], #2
184    lsr     data2, data2, #16
1851:
186    tbz     tmp1, #3, 1f
187    strb    data2_w, [dst]
188#if defined(STPCPY)
189    ret
190#endif
1911:
192#if defined(STPCPY)
193    // Back up one so that dst points to the '\0' string terminator.
194    sub     dst, dst, #1
195#endif
196    ret
197
198.Lmisaligned:
199    tbz     src, #0, 1f
200    ldrb    data1_w, [src], #1
201    strb    data1_w, [dst], #1
202    cbnz    data1_w, 1f
203#if defined(STPCPY)
204    // Back up one so that dst points to the '\0' string terminator.
205    sub     dst, dst, #1
206#endif
207    ret
2081:
209    tbz     src, #1, 1f
210    ldrb    data1_w, [src], #1
211    strb    data1_w, [dst], #1
212    cbz     data1_w, .Ldone
213    ldrb    data2_w, [src], #1
214    strb    data2_w, [dst], #1
215    cbnz    data2_w, 1f
216.Ldone:
217#if defined(STPCPY)
218    // Back up one so that dst points to the '\0' string terminator.
219    sub     dst, dst, #1
220#endif
221    ret
2221:
223    tbz     src, #2, 1f
224    ldr     data1_w, [src], #4
225    // Check for a zero.
226    sub     has_nul1_w, data1_w, zeroones_w
227    bic     has_nul1_w, has_nul1_w, data1_w
228    ands    has_nul1_w, has_nul1_w, #0x80808080
229    b.ne    .Lnul_in_data1
230    str     data1_w, [dst], #4
2311:
232    tbz     src, #3, .Lloop
233    ldr     data1, [src], #8
234    // Check for a zero.
235    sub     tmp1, data1, zeroones
236    orr     tmp2, data1, #REP8_7f
237    bics    has_nul1, tmp1, tmp2
238    b.ne    .Lnul_in_data1
239    str     data1, [dst], #8
240    b       .Lloop
241#if defined(STPCPY)
242END(stpcpy)
243#elif defined(STRCPY)
244END(strcpy)
245#endif
246