12146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris/* Copyright (c) 2012, Linaro Limited
22146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   All rights reserved.
32146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
42146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   Redistribution and use in source and binary forms, with or without
52146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   modification, are permitted provided that the following conditions are met:
62146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris       * Redistributions of source code must retain the above copyright
72146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris         notice, this list of conditions and the following disclaimer.
82146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris       * Redistributions in binary form must reproduce the above copyright
92146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris         notice, this list of conditions and the following disclaimer in the
102146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris         documentation and/or other materials provided with the distribution.
112146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris       * Neither the name of the Linaro nor the
122146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris         names of its contributors may be used to endorse or promote products
132146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris         derived from this software without specific prior written permission.
142146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
152146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
162146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
172146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
182146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
192146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
202146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
212146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
222146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
232146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
242146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
252146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
262146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris*/
272146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
282146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris/* Assumptions:
292146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris *
302146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris * ARMv8-a, AArch64
312146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris * Unaligned accesses
322146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris *
332146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris */
342146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
352146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#include <private/bionic_asm.h>
362146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
372146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define dstin	x0
382146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define src	x1
392146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define count	x2
402146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp1	x3
412146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp1w	w3
422146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp2	x4
432146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp2w	w4
442146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp3	x5
452146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define tmp3w	w5
462146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define dst	x6
472146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
482146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define A_l	x7
492146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define A_h	x8
502146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define B_l	x9
512146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define B_h	x10
522146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define C_l	x11
532146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define C_h	x12
542146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define D_l	x13
552146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define D_h	x14
562146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
572146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define QA_l q0
582146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define QA_h q1
592146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define QB_l q2
602146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris#define QB_h q3
612146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
622146d142b23b830ad94f8479ce5abeaa15820e7dChristopher FerrisENTRY(memcpy)
632146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
642146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	mov	dst, dstin
652146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	cmp	count, #64
662146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.ge	.Lcpy_not_short
672146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	cmp	count, #15
682146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.le	.Ltail15tiny
692146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
702146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Deal with small copies quickly by dropping straight into the
712146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * exit block.  */
722146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris.Ltail63:
732146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Copy up to 48 bytes of data.  At this point we only need the
742146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * bottom 6 bits of count to be accurate.  */
752146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ands	tmp1, count, #0x30
762146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.eq	.Ltail15
772146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	dst, dst, tmp1
782146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	src, src, tmp1
792146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	cmp	tmp1w, #0x20
802146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.eq	1f
812146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.lt	2f
822146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp	A_l, A_h, [src, #-48]
832146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp	A_l, A_h, [dst, #-48]
842146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
852146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp	A_l, A_h, [src, #-32]
862146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp	A_l, A_h, [dst, #-32]
872146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris2:
882146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp	A_l, A_h, [src, #-16]
892146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp	A_l, A_h, [dst, #-16]
902146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
912146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris.Ltail15:
922146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ands	count, count, #15
932146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	beq	1f
942146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	src, src, count
952146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp	A_l, A_h, [src, #-16]
962146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	dst, dst, count
972146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp	A_l, A_h, [dst, #-16]
982146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
992146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ret
1002146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
1012146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris.Ltail15tiny:
1022146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Copy up to 15 bytes of data.  Does not assume additional data
1032146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	   being copied.  */
1042146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tbz	count, #3, 1f
1052146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldr	tmp1, [src], #8
1062146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	str	tmp1, [dst], #8
1072146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
1082146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tbz	count, #2, 1f
1092146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldr	tmp1w, [src], #4
1102146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	str	tmp1w, [dst], #4
1112146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
1122146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tbz	count, #1, 1f
1132146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldrh	tmp1w, [src], #2
1142146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	strh	tmp1w, [dst], #2
1152146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
1162146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tbz	count, #0, 1f
1172146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldrb	tmp1w, [src]
1182146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	strb	tmp1w, [dst]
1192146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
1202146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ret
1212146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
1222146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris.Lcpy_not_short:
1232146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* We don't much care about the alignment of DST, but we want SRC
1242146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * to be 128-bit (16 byte) aligned so that we don't cross cache line
1252146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * boundaries on both loads and stores.  */
1262146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	neg	tmp2, src
1272146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ands	tmp2, tmp2, #15		/* Bytes to reach alignment.  */
1282146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.eq	2f
1292146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	sub	count, count, tmp2
1302146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Copy more data than needed; it's faster than jumping
1312146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * around copying sub-Quadword quantities.  We know that
1322146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * it can't overrun.  */
1332146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp	A_l, A_h, [src]
1342146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	src, src, tmp2
1352146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp	A_l, A_h, [dst]
1362146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	dst, dst, tmp2
1372146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* There may be less than 63 bytes to go now.  */
1382146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	cmp	count, #63
1392146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.le	.Ltail63
1402146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris2:
1412146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	subs	count, count, #128
1422146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.ge	.Lcpy_body_large
1432146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Less than 128 bytes to copy, so handle 64 here and then jump
1442146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * to the tail.  */
1452146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QA_l, QA_h, [src]
1462146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QB_l, QB_h, [src, #32]
1472146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QA_l, QA_h, [dst]
1482146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QB_l, QB_h, [dst, #32]
1492146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tst	count, #0x3f
1502146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	src, src, #64
1512146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	dst, dst, #64
1522146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.ne	.Ltail63
1532146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ret
1542146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
1552146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* Critical loop.  Start at a new cache line boundary.  Assuming
1562146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	 * 64 bytes per line this ensures the entire loop is in one line.  */
1572146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	.p2align 6
1582146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris.Lcpy_body_large:
1592146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	/* There are at least 128 bytes to copy.  */
1602146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QA_l, QA_h, [src, #0]
1612146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	sub	dst, dst, #32		/* Pre-bias.  */
1622146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QB_l, QB_h, [src, #32]!	/* src += 64 - Pre-bias.  */
1632146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris1:
1642146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QA_l, QA_h, [dst, #32]
1652146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QA_l, QA_h, [src, #32]
1662146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QB_l, QB_h, [dst, #64]!
1672146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ldp QB_l, QB_h, [src, #64]!
1682146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
1692146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	subs	count, count, #64
1702146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.ge	1b
1712146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris
1722146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QA_l, QA_h, [dst, #32]
1732146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	stp QB_l, QB_h, [dst, #64]
1742146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	src, src, #32
1752146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	add	dst, dst, #64 + 32
1762146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	tst	count, #0x3f
1772146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	b.ne	.Ltail63
1782146d142b23b830ad94f8479ce5abeaa15820e7dChristopher Ferris	ret
1792146d142b23b830ad94f8479ce5abeaa15820e7dChristopher FerrisEND(memcpy)
180