17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2014, Linaro Limited
27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   All rights reserved.
37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   Redistribution and use in source and binary forms, with or without
57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   modification, are permitted provided that the following conditions are met:
67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions of source code must retain the above copyright
77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer.
87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Redistributions in binary form must reproduce the above copyright
97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         notice, this list of conditions and the following disclaimer in the
107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         documentation and/or other materials provided with the distribution.
117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer       * Neither the name of the Linaro nor the
127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         names of its contributors may be used to endorse or promote products
137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer         derived from this software without specific prior written permission.
147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/
277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions:
297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer *
307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64
317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */
327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h>
347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result.  */
367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1		x0
377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2		x1
387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define limit		x2
397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result		x0
407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables.  */
427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1		x3
437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w		w3
447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2		x4
457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w		w4
467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul		x5
477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff		x6
487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define endloop		x7
497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1		x8
507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2		x9
517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3		x10
527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos		x11
537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define limit_wd	x12
547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define mask		x13
557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(memcmp)
577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbz	limit, .Lret0
587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	tmp1, src1, src2
597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	tst	tmp1, #7
607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmisaligned8
617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	tmp1, src1, #7
627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.ne	.Lmutual_align
637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	limit_wd, limit, #7
647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	limit_wd, limit_wd, #3
657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Start of performance-critical section  -- one 64B cache line.  */
667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned:
677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned:
707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	subs	limit_wd, limit_wd, #1
717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	eor	diff, data1, data2	/* Non-zero if differences found.  */
727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	csinv	endloop, diff, xzr, ne	/* Last Dword or differences.  */
737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbz	endloop, .Lloop_aligned
747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* End of performance-critical section  -- one 64B cache line.  */
757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Not reached the limit, must have found a diff.  */
777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	cbnz	limit_wd, .Lnot_limit
787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Limit % 8 == 0 => all bytes significant.  */
807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ands	limit, limit, #7
817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	.Lnot_limit
827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	limit, limit, #3	/* Bits -> bytes.  */
847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	mask, #~0
857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	mask, mask, limit
877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	mask, mask, limit
897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	data1, data1, mask
917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	data2, data2, mask
927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	diff, diff, mask
947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lnot_limit:
957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef	__AARCH64EB__
977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	diff, diff
987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data1, data1
997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	rev	data2, data2
1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* The MS-non-zero bit of DIFF marks either the first bit
1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   that is different, or the end of the significant data.
1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   Shifting left now will bring the critical information into the
1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   top bits.  */
1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	clz	pos, diff
1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data1, data1, pos
1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	data2, data2, pos
1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* But we need to zero-extend (char is unsigned) the value and then
1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   perform a signed 32-bit subtraction.  */
1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	data1, data1, #56
1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2, lsr #56
1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align:
1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Sources are mutually aligned, but are not currently at an
1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   alignment boundary.  Round down the addresses and then mask off
1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	   the bytes that precede the start point.  */
1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src1, src1, #7
1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	bic	src2, src2, #7
1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	limit, limit, tmp1	/* Adjust the limit for the extra.  */
1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data1, [src1], #8
1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	neg	tmp1, tmp1		/* Bits to alignment -64.  */
1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldr	data2, [src2], #8
1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	tmp2, #~0
1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__
1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Big-endian.  Early bytes are at MSB.  */
1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else
1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Little-endian.  Early bytes are at LSB.  */
1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif
1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	add	limit_wd, limit, #7
1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data1, data1, tmp2
1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	orr	data2, data2, tmp2
1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	lsr	limit_wd, limit_wd, #3
1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b	.Lstart_realigned
1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lret0:
1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	mov	result, #0
1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer
1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	.p2align 6
1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8:
1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	limit, limit, #1
1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1:
1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	/* Perhaps we can do better than this.  */
1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data1w, [src1], #1
1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ldrb	data2w, [src2], #1
1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	subs	limit, limit, #1
1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	b.eq	1b
1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	sub	result, data1, data2
1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer	ret
1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(memcmp)
156