17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2014, Linaro Limited
27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   All rights reserved.
37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   Redistribution and use in source and binary forms, with or without
57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   modification, are permitted provided that the following conditions are met:
67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions of source code must retain the above copyright
77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer.
87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions in binary form must reproduce the above copyright
97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer in the
107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         documentation and/or other materials provided with the distribution.
117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Neither the name of the Linaro nor the
127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         names of its contributors may be used to endorse or promote products
137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         derived from this software without specific prior written permission.
147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/
277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions:
297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer *
307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64
317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */
327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h>
347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101
367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f
377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080
387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result.  */
407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1		x0
417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2		x1
427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define limit		x2
437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result		x0
447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables.  */
467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1		x3
477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w		w3
487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2		x4
497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w		w4
507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul		x5
517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff		x6
527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define syndrome	x7
537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1		x8
547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2		x9
557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3		x10
567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define zeroones	x11
577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos		x12
587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define limit_wd	x13
597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define mask		x14
607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define endloop		x15
617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.text
637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.p2align 6
647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.rep 7
657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	nop	/* Pad so that the loop below fits a cache line.  */
667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.endr
677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strncmp)
687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbz	limit, .Lret0
697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	tmp1, src1, src2
707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	zeroones, #REP8_01
717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	tst	tmp1, #7
727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmisaligned8
737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	tmp1, src1, #7
747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmutual_align
757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Calculate the number of full and partial words -1.  */
767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   can be done in parallel across the entire word.  */
827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Start of performance-critical section  -- one 64B cache line.  */
837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned:
847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned:
877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	subs	limit_wd, limit_wd, #1
887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, data1, zeroones
897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, data1, #REP8_7f
907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	diff, data1, data2	/* Non-zero if differences found.  */
917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	endloop, #0, #0, eq
947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	.Lloop_aligned
957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* End of performance-critical section  -- one 64B cache line.  */
967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Not reached the limit, must have found the end or a diff.  */
987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	tbz	limit_wd, #63, .Lnot_limit
997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Limit % 8 == 0 => all bytes significant.  */
1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	limit, limit, #7
1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	.Lnot_limit
1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	limit, limit, #3	/* Bits -> bytes.  */
1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	mask, #~0
1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	mask, mask, limit
1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	mask, mask, limit
1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	data1, data1, mask
1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	data2, data2, mask
1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Make sure that the NUL byte is marked in the syndrome.  */
1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	has_nul, has_nul, mask
1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lnot_limit:
1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef	__AARCH64EB__
1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	syndrome, syndrome
1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data1, data1
1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data2, data2
1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* For big-endian we cannot use the trick with the syndrome value
1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   as carry-propagation can corrupt the upper bits if the trailing
1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   bytes in the string contain 0x01.  */
1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* However, if there is no NUL byte in the dword, we can generate
1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the result directly.  We can't just subtract the bytes as the
1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   MSB might be significant.  */
1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbnz	has_nul, 1f
1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cmp	data1, data2
1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cset	result, ne
1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cneg	result, result, lo
1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1:
1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	tmp3, data1
1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, tmp3, zeroones
1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, tmp3, #REP8_7f
1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	has_nul, tmp1, tmp2
1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	has_nul, has_nul
1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
1567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
1577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
1587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
1597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
1607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
1617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
1637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
1647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
1657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
1667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
1677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align:
1717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Sources are mutually aligned, but are not currently at an
1727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   alignment boundary.  Round down the addresses and then mask off
1737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the bytes that precede the start point.
1747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   We also need to adjust the limit calculations, but without
1757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   overflowing if the limit is near ULONG_MAX.  */
1767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src1, src1, #7
1777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src2, src2, #7
1787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
1797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	neg	tmp3, tmp1, lsl #3	/* 64 - bits(bytes beyond align). */
1807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
1817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	tmp2, #~0
1827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
1837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Big-endian.  Early bytes are at MSB.  */
1857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp2, tmp2, tmp3	/* Shift (tmp1 & 63).  */
1867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Little-endian.  Early bytes are at LSB.  */
1887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	tmp2, tmp2, tmp3	/* Shift (tmp1 & 63).  */
1897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	and	tmp3, limit_wd, #7
1917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	limit_wd, limit_wd, #3
1927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
1937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	limit, limit, tmp1
1947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	tmp3, tmp3, tmp1
1957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data1, data1, tmp2
1967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data2, data2, tmp2
1977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	limit_wd, limit_wd, tmp3, lsr #3
1987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b	.Lstart_realigned
1997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
2007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lret0:
2017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	result, #0
2027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
2037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
2047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.p2align 6
2057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8:
2067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	limit, limit, #1
2077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1:
2087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Perhaps we can do better than this.  */
2097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data1w, [src1], #1
2107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data2w, [src2], #1
2117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	subs	limit, limit, #1
2127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	data1w, #1, #0, cs	/* NZCV = 0b0000.  */
2137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
2147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	1b
2157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2
2167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
2177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strncmp)
218