strcpy.S revision 77561bfe0b83b32d5c5bfc0c97bacae9f4204b34
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/* Copyright (c) 2014, Linaro Limited
29   All rights reserved.
30
31   Redistribution and use in source and binary forms, with or without
32   modification, are permitted provided that the following conditions are met:
33       * Redistributions of source code must retain the above copyright
34         notice, this list of conditions and the following disclaimer.
35       * Redistributions in binary form must reproduce the above copyright
36         notice, this list of conditions and the following disclaimer in the
37         documentation and/or other materials provided with the distribution.
38       * Neither the name of the Linaro nor the
39         names of its contributors may be used to endorse or promote products
40         derived from this software without specific prior written permission.
41
42   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*/
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64
58 */
59
60#include <private/bionic_asm.h>
61
62/* Arguments and results.  */
63#define dstin       x0
64#define src         x1
65
66/* Locals and temporaries.  */
67#define dst         x2
68#define data1       x3
69#define data1_w     w3
70#define data2       x4
71#define data2_w     w4
72#define has_nul1    x5
73#define has_nul1_w  w5
74#define has_nul2    x6
75#define tmp1        x7
76#define tmp2        x8
77#define tmp3        x9
78#define tmp4        x10
79#define zeroones    x11
80#define zeroones_w  w11
81#define pos         x12
82
83#define REP8_01 0x0101010101010101
84#define REP8_7f 0x7f7f7f7f7f7f7f7f
85#define REP8_80 0x8080808080808080
86
87ENTRY(strcpy)
88    mov     zeroones, #REP8_01
89    mov     dst, dstin
90    ands    tmp1, src, #15
91    b.ne    .Lmisaligned
92    // NUL detection works on the principle that (X - 1) & (~X) & 0x80
93    // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
94    // can be done in parallel across the entire word.
95    // The inner loop deals with two Dwords at a time.  This has a
96    // slightly higher start-up cost, but we should win quite quickly,
97    // especially on cores with a high number of issue slots per
98    // cycle, as we get much better parallelism out of the operations.
99.Lloop:
100    ldp     data1, data2, [src], #16
101    sub     tmp1, data1, zeroones
102    orr     tmp2, data1, #REP8_7f
103    bic     has_nul1, tmp1, tmp2
104    cbnz    has_nul1, .Lnul_in_data1
105    sub     tmp3, data2, zeroones
106    orr     tmp4, data2, #REP8_7f
107    bic     has_nul2, tmp3, tmp4
108    cbnz    has_nul2, .Lnul_in_data2
109    // No NUL in either register, copy it in a single instruction.
110    stp     data1, data2, [dst], #16
111    b       .Lloop
112
113.Lnul_in_data1:
114    rev     has_nul1, has_nul1
115    clz     pos, has_nul1
116    add     tmp1, pos, #0x8
117
118    tbz     tmp1, #6, 1f
119    str     data1, [dst]
120    ret
1211:
122    tbz     tmp1, #5, 1f
123    str     data1_w, [dst], #4
124    lsr     data1, data1, #32
1251:
126    tbz     tmp1, #4, 1f
127    strh    data1_w, [dst], #2
128    lsr     data1, data1, #16
1291:
130    tbz     tmp1, #3, 1f
131    strb    data1_w, [dst]
1321:
133    ret
134
135.Lnul_in_data2:
136    str     data1, [dst], #8
137    rev     has_nul2, has_nul2
138    clz     pos, has_nul2
139    add     tmp1, pos, #0x8
140
141    tbz     tmp1, #6, 1f
142    str     data2, [dst]
143    ret
1441:
145    tbz     tmp1, #5, 1f
146    str     data2_w, [dst], #4
147    lsr     data2, data2, #32
1481:
149    tbz     tmp1, #4, 1f
150    strh    data2_w, [dst], #2
151    lsr     data2, data2, #16
1521:
153    tbz     tmp1, #3, 1f
154    strb    data2_w, [dst]
1551:
156    ret
157
158.Lmisaligned:
159    tbz     src, #0, 1f
160    ldrb    data1_w, [src], #1
161    strb    data1_w, [dst], #1
162    cbnz    data1_w, 1f
163    ret
1641:
165    tbz     src, #1, 1f
166    ldrb    data1_w, [src], #1
167    strb    data1_w, [dst], #1
168    cbz     data1_w, .Ldone
169    ldrb    data2_w, [src], #1
170    strb    data2_w, [dst], #1
171    cbnz    data2_w, 1f
172.Ldone:
173    ret
1741:
175    tbz     src, #2, 1f
176    ldr     data1_w, [src], #4
177    // Check for a zero.
178    sub     has_nul1_w, data1_w, zeroones_w
179    bic     has_nul1_w, has_nul1_w, data1_w
180    ands    has_nul1_w, has_nul1_w, #0x80808080
181    b.ne    .Lnul_in_data1
182    str     data1_w, [dst], #4
1831:
184    tbz     src, #3, .Lloop
185    ldr     data1, [src], #8
186    // Check for a zero.
187    sub     tmp1, data1, zeroones
188    orr     tmp2, data1, #REP8_7f
189    bics    has_nul1, tmp1, tmp2
190    b.ne    .Lnul_in_data1
191    str     data1, [dst], #8
192    b       .Lloop
193END(strcpy)
194