10ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang/* Copyright (c) 2012, Linaro Limited 20ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang All rights reserved. 30ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang Copyright (c) 2014, NVIDIA Corporation. All rights reserved. 40ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 50ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang Redistribution and use in source and binary forms, with or without 60ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang modification, are permitted provided that the following conditions are met: 70ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * Redistributions of source code must retain the above copyright 80ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang notice, this list of conditions and the following disclaimer. 90ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * Redistributions in binary form must reproduce the above copyright 100ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang notice, this list of conditions and the following disclaimer in the 110ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang documentation and/or other materials provided with the distribution. 120ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * Neither the name of the Linaro nor the 130ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang names of its contributors may be used to endorse or promote products 140ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang derived from this software without specific prior written permission. 150ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 160ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 170ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 180ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 190ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 200ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 210ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 220ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 230ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 240ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 250ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 260ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 270ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang*/ 280ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 290ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang/* Assumptions: 300ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * 310ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * denver, ARMv8-a, AArch64 320ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * Unaligned accesses 330ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * 340ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang */ 350ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 360ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#include <private/bionic_asm.h> 370ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 380ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang/* By default we assume that the DC instruction can be used to zero 390ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang data blocks more efficiently. In some circumstances this might be 400ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang unsafe, for example in an asymmetric multiprocessor environment with 410ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang different DC clear lengths (neither the upper nor lower lengths are 420ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang safe to use). The feature can be disabled by defining DONT_USE_DC. 430ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 440ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang If code may be run in a virtualized environment, then define 450ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang MAYBE_VIRT. This will cause the code to cache the system register 460ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang values rather than re-reading them each call. */ 470ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 480ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define dstin x0 490ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define val w1 500ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define count x2 510ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define tmp1 x3 520ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define tmp1w w3 530ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define tmp2 x4 540ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define tmp2w w4 550ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define zva_len_x x5 560ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define zva_len w5 570ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define zva_bits_x x6 580ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 590ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define A_l x7 600ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define A_lw w7 610ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define dst x8 620ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define tmp3w w9 630ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 640ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#define QA_l q0 650ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 660ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu ZhangENTRY(memset) 670ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 680ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov dst, dstin /* Preserve return value. */ 690ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands A_lw, val, #255 700ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#ifndef DONT_USE_DC 710ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang# b.eq .Lzero_mem 720ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#endif 730ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang orr A_lw, A_lw, A_lw, lsl #8 740ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang orr A_lw, A_lw, A_lw, lsl #16 750ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang orr A_l, A_l, A_l, lsl #32 760ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail_maybe_long: 770ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #256 780ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ge .Lnot_short 790ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail_maybe_tiny: 800ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #15 810ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.le .Ltail15tiny 820ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail255: 830ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands tmp1, count, #0xC0 840ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq .Ltail63 850ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang dup v0.4s, A_lw 860ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp tmp1w, #0x80 870ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq 1f 880ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.lt 2f 890ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 900ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 910ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 920ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 930ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 940ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang2: 950ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 960ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 970ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail63: 980ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands tmp1, count, #0x30 990ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq .Ltail15 1000ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, tmp1 1010ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp tmp1w, #0x20 1020ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq 1f 1030ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.lt 2f 1040ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #-48] 1050ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1060ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #-32] 1070ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang2: 1080ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #-16] 1090ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 1100ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail15: 1110ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang and count, count, #15 1120ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, count 1130ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ 1140ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ret 1150ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 1160ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Ltail15tiny: 1170ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* Set up to 15 bytes. Does not assume earlier memory 1180ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang being set. */ 1190ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbz count, #3, 1f 1200ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang str A_l, [dst], #8 1210ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1220ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbz count, #2, 1f 1230ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang str A_lw, [dst], #4 1240ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1250ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbz count, #1, 1f 1260ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang strh A_lw, [dst], #2 1270ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1280ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbz count, #0, 1f 1290ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang strb A_lw, [dst] 1300ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1310ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ret 1320ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 1330ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* Critical loop. Start at a new cache line boundary. Assuming 1340ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * 64 bytes per line, this ensures the entire loop is in one line. */ 1350ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang .p2align 6 1360ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Lnot_short: 1370ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang dup v0.4s, A_lw 1380ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang neg tmp2, dst 1390ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands tmp2, tmp2, #15 1400ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq 2f 1410ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* Bring DST to 128-bit (16-byte) alignment. We know that there's 1420ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * more than that to set, so we simply store 16 bytes and advance by 1430ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * the amount required to reach alignment. */ 1440ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub count, count, tmp2 1450ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst] 1460ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, tmp2 1470ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* There may be less than 63 bytes to go now. */ 1480ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #255 1490ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.le .Ltail255 1500ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang2: 1510ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #2097152 1520ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.gt 3f 1530ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1540ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub count, count, #256 1550ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang2: 1560ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1570ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1580ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1590ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1600ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1610ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1620ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1630ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp QA_l, QA_l, [dst], #32 1640ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang subs count, count, #256 1650ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ge 2b 1660ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tst count, #0xff 1670ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ne .Ltail255 1680ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ret 1690ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang3: 1700ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub count, count, #64 1710ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang4: 1720ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang subs count, count, #64 1730ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stnp QA_l, QA_l, [dst] 1740ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stnp QA_l, QA_l, [dst, #32] 1750ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, #64 1760ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ge 4b 1770ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tst count, #0x3f 1780ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ne .Ltail63 1790ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ret 1800ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 1810ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#ifndef DONT_USE_DC 1820ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* For zeroing memory, check to see if we can use the ZVA feature to 1830ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * zero entire 'cache' lines. */ 1840ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Lzero_mem: 1850ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov A_l, #0 1860ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #63 1870ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.le .Ltail_maybe_tiny 1880ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang neg tmp2, dst 1890ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands tmp2, tmp2, #15 1900ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq 1f 1910ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub count, count, tmp2 1920ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst] 1930ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, tmp2 1940ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #63 1950ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.le .Ltail63 1960ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 1970ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* For zeroing small amounts of memory, it's not worth setting up 1980ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * the line-clear code. */ 1990ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, #128 2000ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.lt .Lnot_short 2010ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#ifdef MAYBE_VIRT 2020ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* For efficiency when virtualized, we cache the ZVA capability. */ 2030ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang adrp tmp2, .Lcache_clear 2040ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ldr zva_len, [tmp2, #:lo12:.Lcache_clear] 2050ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbnz zva_len, #31, .Lnot_short 2060ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cbnz zva_len, .Lzero_by_line 2070ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mrs tmp1, dczid_el0 2080ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbz tmp1, #4, 1f 2090ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* ZVA not available. Remember this for next time. */ 2100ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov zva_len, #~0 2110ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang str zva_len, [tmp2, #:lo12:.Lcache_clear] 2120ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b .Lnot_short 2130ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 2140ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov tmp3w, #4 2150ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ 2160ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang lsl zva_len, tmp3w, zva_len 2170ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang str zva_len, [tmp2, #:lo12:.Lcache_clear] 2180ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#else 2190ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mrs tmp1, dczid_el0 2200ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang tbnz tmp1, #4, .Lnot_short 2210ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov tmp3w, #4 2220ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ 2230ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang lsl zva_len, tmp3w, zva_len 2240ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#endif 2250ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 2260ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Lzero_by_line: 2270ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* Compute how far we need to go to become suitably aligned. We're 2280ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * already at quad-word alignment. */ 2290ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp count, zva_len_x 2300ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.lt .Lnot_short /* Not enough to reach alignment. */ 2310ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub zva_bits_x, zva_len_x, #1 2320ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang neg tmp2, dst 2330ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands tmp2, tmp2, zva_bits_x 2340ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.eq 1f /* Already aligned. */ 2350ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* Not aligned, check that there's enough to copy after alignment. */ 2360ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub tmp1, count, tmp2 2370ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang cmp tmp1, #64 2380ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ 2390ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.lt .Lnot_short 2400ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* We know that there's at least 64 bytes to zero and that it's safe 2410ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang * to overrun by 64 bytes. */ 2420ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang mov count, tmp1 2430ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang2: 2440ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst] 2450ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #16] 2460ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #32] 2470ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang subs tmp2, tmp2, #64 2480ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang stp A_l, A_l, [dst, #48] 2490ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, #64 2500ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ge 2b 2510ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang /* We've overrun a bit, so adjust dst downwards. */ 2520ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, tmp2 2530ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang1: 2540ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang sub count, count, zva_len_x 2550ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang3: 2560ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang dc zva, dst 2570ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang add dst, dst, zva_len_x 2580ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang subs count, count, zva_len_x 2590ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ge 3b 2600ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ands count, count, zva_bits_x 2610ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang b.ne .Ltail_maybe_long 2620ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang ret 2630ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu ZhangEND(memset) 2640ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang 2650ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#ifdef MAYBE_VIRT 2660ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang .bss 2670ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang .p2align 2 2680ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang.Lcache_clear: 2690ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang .space 4 2700ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#endif 2710ef7a8fd31a7ee9571c54c17e1f42cba4cfb44ecShu Zhang#endif /* DONT_USE_DC */ 272