17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2012, Linaro Limited
27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   All rights reserved.
37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   Redistribution and use in source and binary forms, with or without
57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   modification, are permitted provided that the following conditions are met:
67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions of source code must retain the above copyright
77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer.
87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions in binary form must reproduce the above copyright
97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer in the
107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         documentation and/or other materials provided with the distribution.
117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Neither the name of the Linaro nor the
127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         names of its contributors may be used to endorse or promote products
137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         derived from this software without specific prior written permission.
147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/
277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions:
297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer *
307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64
317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */
327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h>
347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101
367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f
377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080
387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result.  */
407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1		x0
417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2		x1
427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result		x0
437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables.  */
457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1		x2
467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w		w2
477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2		x3
487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w		w3
497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul		x4
507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff		x5
517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define syndrome	x6
527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1		x7
537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2		x8
547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3		x9
557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define zeroones	x10
567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos		x11
577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Start of performance-critical section  -- one 64B cache line.  */
597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strcmp)
609d150dd9a09132561a10c98de6b79b0b318d4e7dYuanyuan Zhong.p2align  6
617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	tmp1, src1, src2
627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	zeroones, #REP8_01
637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	tst	tmp1, #7
647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmisaligned8
657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	tmp1, src1, #7
667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmutual_align
677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   can be done in parallel across the entire word.  */
707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned:
717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned:
747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, data1, zeroones
757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, data1, #REP8_7f
767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	diff, data1, data2	/* Non-zero if differences found.  */
777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbz	syndrome, .Lloop_aligned
807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* End of performance-critical section  -- one 64B cache line.  */
817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef	__AARCH64EB__
837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	syndrome, syndrome
847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data1, data1
857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data2, data2
917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* For big-endian we cannot use the trick with the syndrome value
1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   as carry-propagation can corrupt the upper bits if the trailing
1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   bytes in the string contain 0x01.  */
1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* However, if there is no NUL byte in the dword, we can generate
1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the result directly.  We can't just subtract the bytes as the
1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   MSB might be significant.  */
1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbnz	has_nul, 1f
1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cmp	data1, data2
1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cset	result, ne
1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cneg	result, result, lo
1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1:
1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	tmp3, data1
1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	tmp1, tmp3, zeroones
1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	tmp2, tmp3, #REP8_7f
1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	has_nul, tmp1, tmp2
1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	has_nul, has_nul
1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	syndrome, diff, has_nul
1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, syndrome
1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of the syndrome marks either the first bit
1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the top bit of the first zero byte.
1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align:
1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Sources are mutually aligned, but are not currently at an
1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   alignment boundary.  Round down the addresses and then mask off
1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the bytes that preceed the start point.  */
1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src1, src1, #7
1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src2, src2, #7
1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	neg	tmp1, tmp1		/* Bits to alignment -64.  */
1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	tmp2, #~0
1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Big-endian.  Early bytes are at MSB.  */
1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Little-endian.  Early bytes are at LSB.  */
1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data1, data1, tmp2
1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data2, data2, tmp2
1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b	.Lstart_realigned
1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8:
1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* We can do better than this.  */
1567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data1w, [src1], #1
1577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data2w, [src2], #1
1587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cmp	data1w, #1
1597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
1607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	.Lmisaligned8
1617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2
1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strcmp)
164