string_copy.S revision 753eb7f07e7736ba3bd73b2653cbfb8863da2278
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/* Copyright (c) 2014, Linaro Limited
29   All rights reserved.
30
31   Redistribution and use in source and binary forms, with or without
32   modification, are permitted provided that the following conditions are met:
33       * Redistributions of source code must retain the above copyright
34         notice, this list of conditions and the following disclaimer.
35       * Redistributions in binary form must reproduce the above copyright
36         notice, this list of conditions and the following disclaimer in the
37         documentation and/or other materials provided with the distribution.
38       * Neither the name of the Linaro nor the
39         names of its contributors may be used to endorse or promote products
40         derived from this software without specific prior written permission.
41
42   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*/
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64
58 */
59
60#if !defined(STPCPY) && !defined(STRCPY)
61#error "Either STPCPY or STRCPY must be defined."
62#endif
63
64#include <private/bionic_asm.h>
65
66/* Arguments and results.  */
67#if defined(STPCPY)
68#define dst         x0
69#elif defined(STRCPY)
70#define dstin       x0
71#endif
72#define src         x1
73
74/* Locals and temporaries.  */
75#if defined(STRCPY)
76#define dst         x2
77#endif
78#define data1       x3
79#define data1_w     w3
80#define data2       x4
81#define data2_w     w4
82#define has_nul1    x5
83#define has_nul1_w  w5
84#define has_nul2    x6
85#define tmp1        x7
86#define tmp2        x8
87#define tmp3        x9
88#define tmp4        x10
89#define zeroones    x11
90#define zeroones_w  w11
91#define pos         x12
92
93#define REP8_01 0x0101010101010101
94#define REP8_7f 0x7f7f7f7f7f7f7f7f
95#define REP8_80 0x8080808080808080
96
97#if defined(STPCPY)
98ENTRY(stpcpy)
99#elif defined(STRCPY)
100ENTRY(strcpy)
101#endif
102    mov     zeroones, #REP8_01
103#if defined(STRCPY)
104    mov     dst, dstin
105#endif
106    ands    tmp1, src, #15
107    b.ne    .Lmisaligned
108    // NUL detection works on the principle that (X - 1) & (~X) & 0x80
109    // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
110    // can be done in parallel across the entire word.
111    // The inner loop deals with two Dwords at a time.  This has a
112    // slightly higher start-up cost, but we should win quite quickly,
113    // especially on cores with a high number of issue slots per
114    // cycle, as we get much better parallelism out of the operations.
115.Lloop:
116    ldp     data1, data2, [src], #16
117    sub     tmp1, data1, zeroones
118    orr     tmp2, data1, #REP8_7f
119    bic     has_nul1, tmp1, tmp2
120    cbnz    has_nul1, .Lnul_in_data1
121    sub     tmp3, data2, zeroones
122    orr     tmp4, data2, #REP8_7f
123    bic     has_nul2, tmp3, tmp4
124    cbnz    has_nul2, .Lnul_in_data2
125    // No NUL in either register, copy it in a single instruction.
126    stp     data1, data2, [dst], #16
127    b       .Lloop
128
129.Lnul_in_data1:
130    rev     has_nul1, has_nul1
131    clz     pos, has_nul1
132    add     tmp1, pos, #0x8
133
134    tbz     tmp1, #6, 1f
135#if defined(STPCPY)
136    str     data1, [dst], #7
137#elif defined(STRCPY)
138    str     data1, [dst]
139#endif
140    ret
1411:
142    tbz     tmp1, #5, 1f
143    str     data1_w, [dst], #4
144    lsr     data1, data1, #32
1451:
146    tbz     tmp1, #4, 1f
147    strh    data1_w, [dst], #2
148    lsr     data1, data1, #16
1491:
150    tbz     tmp1, #3, 1f
151    strb    data1_w, [dst]
152#if defined(STPCPY)
153    ret
154#endif
1551:
156#if defined(STPCPY)
157    // Back up one so that dst points to the '\0' string terminator.
158    sub     dst, dst, #1
159#endif
160    ret
161
162.Lnul_in_data2:
163    str     data1, [dst], #8
164    rev     has_nul2, has_nul2
165    clz     pos, has_nul2
166    add     tmp1, pos, #0x8
167
168    tbz     tmp1, #6, 1f
169#if defined(STPCPY)
170    str     data2, [dst], #7
171#elif defined(STRCPY)
172    str     data2, [dst]
173#endif
174    ret
1751:
176    tbz     tmp1, #5, 1f
177    str     data2_w, [dst], #4
178    lsr     data2, data2, #32
1791:
180    tbz     tmp1, #4, 1f
181    strh    data2_w, [dst], #2
182    lsr     data2, data2, #16
1831:
184    tbz     tmp1, #3, 1f
185    strb    data2_w, [dst]
186#if defined(STPCPY)
187    ret
188#endif
1891:
190#if defined(STPCPY)
191    // Back up one so that dst points to the '\0' string terminator.
192    sub     dst, dst, #1
193#endif
194    ret
195
196.Lmisaligned:
197    tbz     src, #0, 1f
198    ldrb    data1_w, [src], #1
199    strb    data1_w, [dst], #1
200    cbnz    data1_w, 1f
201#if defined(STPCPY)
202    // Back up one so that dst points to the '\0' string terminator.
203    sub     dst, dst, #1
204#endif
205    ret
2061:
207    tbz     src, #1, 1f
208    ldrb    data1_w, [src], #1
209    strb    data1_w, [dst], #1
210    cbz     data1_w, .Ldone
211    ldrb    data2_w, [src], #1
212    strb    data2_w, [dst], #1
213    cbnz    data2_w, 1f
214.Ldone:
215#if defined(STPCPY)
216    // Back up one so that dst points to the '\0' string terminator.
217    sub     dst, dst, #1
218#endif
219    ret
2201:
221    tbz     src, #2, 1f
222    ldr     data1_w, [src], #4
223    // Check for a zero.
224    sub     has_nul1_w, data1_w, zeroones_w
225    bic     has_nul1_w, has_nul1_w, data1_w
226    ands    has_nul1_w, has_nul1_w, #0x80808080
227    b.ne    .Lnul_in_data1
228    str     data1_w, [dst], #4
2291:
230    tbz     src, #3, .Lloop
231    ldr     data1, [src], #8
232    // Check for a zero.
233    sub     tmp1, data1, zeroones
234    orr     tmp2, data1, #REP8_7f
235    bics    has_nul1, tmp1, tmp2
236    b.ne    .Lnul_in_data1
237    str     data1, [dst], #8
238    b       .Lloop
239#if defined(STPCPY)
240END(stpcpy)
241#elif defined(STRCPY)
242END(strcpy)
243#endif
244