1372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein/* Copyright (c) 2013-2015, Linaro Limited
27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   All rights reserved.
37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   Redistribution and use in source and binary forms, with or without
57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   modification, are permitted provided that the following conditions are met:
67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions of source code must retain the above copyright
7372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	 notice, this list of conditions and the following disclaimer.
87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions in binary form must reproduce the above copyright
9372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	 notice, this list of conditions and the following disclaimer in the
10372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	 documentation and/or other materials provided with the distribution.
117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Neither the name of the Linaro nor the
12372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	 names of its contributors may be used to endorse or promote products
13372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	 derived from this software without specific prior written permission.
147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions:
287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer *
29372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */
317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h>
337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
34372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein/* To test the page crossing code path more thoroughly, compile with
35372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein   -DTEST_PAGE_CROSS - this will force all calls through the slower
36372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein   entry path.  This option is not intended for production use.	 */
37372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Arguments and results.  */
397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define srcin		x0
407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define len		x0
417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Locals and temporaries.  */
437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src		x1
447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1		x2
457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2		x3
46372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define has_nul1	x4
47372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define has_nul2	x5
48372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp1		x4
49372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp2		x5
50372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp3		x6
51372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp4		x7
52372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define zeroones	x8
53372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
54372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define L(l) .L ## l
55372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
56372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
57372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
58372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   can be done in parallel across the entire word. A faster check
59372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
60372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   false hits for characters 129..255.	*/
617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101
637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f
647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080
657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
66372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#ifdef TEST_PAGE_CROSS
67372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein# define MIN_PAGE_SIZE 15
68372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#else
69372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein# define MIN_PAGE_SIZE 4096
70372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#endif
71372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
72372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* Since strings are short on average, we check the first 16 bytes
73372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   of the string for a NUL character.  In order to do an unaligned ldp
74372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   safely we have to do a page cross check first.  If there is a NUL
75372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   byte we calculate the length from the 2 8-byte words using
76372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   conditional select to reduce branch mispredictions (it is unlikely
77372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   strlen will be repeatedly called on strings with the same length).
78372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
79372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   If the string is longer than 16 bytes, we align src so don't need
80372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   further page cross checks, and process 32 bytes per iteration
81372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   using the fast NUL check.  If we encounter non-ASCII characters,
82372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   fallback to a second loop using the full NUL check.
83372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
84372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   If the page cross check fails, we read 16 bytes from an aligned
85372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   address, remove any characters before the string, and continue
86372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   in the main loop using aligned loads.  Since strings crossing a
87372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   page in the first 16 bytes are rare (probability of
88372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
89372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
90372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   AArch64 systems have a minimum page size of 4k.  We don't bother
91372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   checking for larger page sizes - the cost of setting up the correct
92372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   page size is just not worth the extra gain from a small reduction in
93372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   the cases taking the slow path.  Note that we only care about
94372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   whether the first fetch, which may be misaligned, crosses a page
95372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   boundary.  */
96372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strlen)
98372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	and	tmp1, srcin, MIN_PAGE_SIZE - 1
99372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	mov	zeroones, REP8_01
100372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	cmp	tmp1, MIN_PAGE_SIZE - 16
101372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	b.gt	L(page_cross)
102372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [srcin]
103372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#ifdef __AARCH64EB__
104372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* For big-endian, carry propagation (if the final byte in the
105372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   string is 0x01) means we cannot use has_nul1/2 directly.
106372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   Since we expect strings to be small and early-exit,
107372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   byte-swap the data now so has_null1/2 will be correct.  */
108372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	rev	data1, data1
109372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	rev	data2, data2
110372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#endif
1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, data1, zeroones
112372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, data1, REP8_7f
1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp3, data2, zeroones
114372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp4, data2, REP8_7f
115372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bics	has_nul1, tmp1, tmp2
116372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	has_nul2, tmp3, tmp4
117372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ccmp	has_nul2, 0, 0, eq
118372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	beq	L(main_loop_entry)
1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
120372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* Enter with C = has_nul1 == 0.  */
121372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	has_nul1, has_nul1, has_nul2, cc
122372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	mov	len, 8
123372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	rev	has_nul1, has_nul1
124372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	clz	tmp1, has_nul1
125372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	len, xzr, len, cc
126372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	add	len, len, tmp1, lsr 3
127372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ret
128372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
129372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* The inner loop processes 32 bytes per iteration and uses the fast
130372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   NUL check.  If we encounter non-ASCII characters, use a second
131372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   loop with the accurate NUL check.  */
132372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	.p2align 4
133372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(main_loop_entry):
134372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	src, srcin, 15
135372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	src, src, 16
136372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(main_loop):
137372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [src, 32]!
138372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein.Lpage_cross_entry:
139372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp1, data1, zeroones
140372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp3, data2, zeroones
141372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, tmp1, tmp3
142372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	tst	tmp2, zeroones, lsl 7
143372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bne	1f
144372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [src, 16]
145372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp1, data1, zeroones
146372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp3, data2, zeroones
147372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, tmp1, tmp3
148372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	tst	tmp2, zeroones, lsl 7
149372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	beq	L(main_loop)
150372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	add	src, src, 16
151372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein1:
152372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* The fast check failed, so do the slower, accurate NUL check.	 */
153372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, data1, REP8_7f
154372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp4, data2, REP8_7f
155372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bics	has_nul1, tmp1, tmp2
156372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	has_nul2, tmp3, tmp4
157372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ccmp	has_nul2, 0, 0, eq
158372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	beq	L(nonascii_loop)
159372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
160372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* Enter with C = has_nul1 == 0.  */
161372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(tail):
1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* For big-endian, carry propagation (if the final byte in the
164372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   string is 0x01) means we cannot use has_nul1/2 directly.  The
1657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   easiest way to get the correct byte is to byte-swap the data
1667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   and calculate the syndrome a second time.  */
167372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	data1, data1, data2, cc
168372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	rev	data1, data1
169372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp1, data1, zeroones
170372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, data1, REP8_7f
171372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	has_nul1, tmp1, tmp2
172372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#else
173372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	has_nul1, has_nul1, has_nul2, cc
1747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
175372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	len, src, srcin
176372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	rev	has_nul1, has_nul1
177372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	add	tmp2, len, 8
178372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	clz	tmp1, has_nul1
179372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	len, len, tmp2, cc
180372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	add	len, len, tmp1, lsr 3
1817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
183372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(nonascii_loop):
184372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [src, 16]!
185372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp1, data1, zeroones
186372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, data1, REP8_7f
187372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp3, data2, zeroones
188372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp4, data2, REP8_7f
189372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bics	has_nul1, tmp1, tmp2
190372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	has_nul2, tmp3, tmp4
191372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ccmp	has_nul2, 0, 0, eq
192372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bne	L(tail)
193372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [src, 16]!
194372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp1, data1, zeroones
195372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp2, data1, REP8_7f
196372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	sub	tmp3, data2, zeroones
197372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp4, data2, REP8_7f
198372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bics	has_nul1, tmp1, tmp2
199372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	has_nul2, tmp3, tmp4
200372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ccmp	has_nul2, 0, 0, eq
201372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	beq	L(nonascii_loop)
202372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	b	L(tail)
203372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein
204372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
205372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   srcin to 0x7f, so we ignore any NUL bytes before the string.
206372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	   Then continue in the aligned loop.  */
207372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(page_cross):
208372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	bic	src, srcin, 15
209372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	ldp	data1, data2, [src]
210372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	lsl	tmp1, srcin, 3
211372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	mov	tmp4, -1
2127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
213372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	/* Big-endian.	Early bytes are at MSB.	 */
214372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
2157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
2167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Little-endian.  Early bytes are at LSB.  */
217372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
2187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
219372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orr	tmp1, tmp1, REP8_80
220372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orn	data1, data1, tmp1
221372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	orn	tmp2, data2, tmp1
222372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	tst	srcin, 8
223372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	data1, data1, tmp4, eq
224372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	csel	data2, data2, tmp2, eq
225372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein	b	L(page_cross_entry)
2267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
2277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strlen)
228