17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2012, Linaro Limited
27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   All rights reserved.
37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   Redistribution and use in source and binary forms, with or without
57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   modification, are permitted provided that the following conditions are met:
67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions of source code must retain the above copyright
77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer.
87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions in binary form must reproduce the above copyright
97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer in the
107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         documentation and/or other materials provided with the distribution.
117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Neither the name of the Linaro nor the
127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         names of its contributors may be used to endorse or promote products
137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         derived from this software without specific prior written permission.
147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/
277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions:
297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer *
307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64
317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */
327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h>
347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101
367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f
377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080
387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result.  */
407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1		x0
417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2		x1
427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result		x0
437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables.  */
457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1		x2
467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w		w2
477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2		x3
487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w		w3
497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul		x4
507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff		x5
517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define syndrome	x6
527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1		x7
537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2		x8
547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3		x9
557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define zeroones	x10
567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos		x11
577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Start of performance-critical section  -- one 64B cache line.  */
597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strcmp)
607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	tmp1, src1, src2
617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	zeroones, #REP8_01
627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	tst	tmp1, #7
637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmisaligned8
647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	tmp1, src1, #7
657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmutual_align
667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   can be done in parallel across the entire word.  */
697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned:
707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned:
737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, data1, zeroones
747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, data1, #REP8_7f
757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	diff, data1, data2	/* Non-zero if differences found.  */
767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbz	syndrome, .Lloop_aligned
797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* End of performance-critical section  -- one 64B cache line.  */
807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef	__AARCH64EB__
827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	syndrome, syndrome
837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data1, data1
847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data2, data2
907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* For big-endian we cannot use the trick with the syndrome value
997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   as carry-propagation can corrupt the upper bits if the trailing
1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   bytes in the string contain 0x01.  */
1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* However, if there is no NUL byte in the dword, we can generate
1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the result directly.  We can't just subtract the bytes as the
1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   MSB might be significant.  */
1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbnz	has_nul, 1f
1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cmp	data1, data2
1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cset	result, ne
1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cneg	result, result, lo
1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1:
1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	tmp3, data1
1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, tmp3, zeroones
1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, tmp3, #REP8_7f
1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	has_nul, tmp1, tmp2
1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	has_nul, has_nul
1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align:
1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Sources are mutually aligned, but are not currently at an
1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   alignment boundary.  Round down the addresses and then mask off
1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the bytes that preceed the start point.  */
1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src1, src1, #7
1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src2, src2, #7
1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	neg	tmp1, tmp1		/* Bits to alignment -64.  */
1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	tmp2, #~0
1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Big-endian.  Early bytes are at MSB.  */
1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Little-endian.  Early bytes are at LSB.  */
1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data1, data1, tmp2
1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data2, data2, tmp2
1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b	.Lstart_realigned
1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8:
1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* We can do better than this.  */
1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data1w, [src1], #1
1567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data2w, [src2], #1
1577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cmp	data1w, #1
1587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
1597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	.Lmisaligned8
1607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2
1617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strcmp)
163