1372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein/* Copyright (c) 2013-2015, Linaro Limited 27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer All rights reserved. 37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Redistribution and use in source and binary forms, with or without 57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer modification, are permitted provided that the following conditions are met: 67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions of source code must retain the above copyright 7372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein notice, this list of conditions and the following disclaimer. 87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions in binary form must reproduce the above copyright 9372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein notice, this list of conditions and the following disclaimer in the 10372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein documentation and/or other materials provided with the distribution. 117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Neither the name of the Linaro nor the 12372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein names of its contributors may be used to endorse or promote products 13372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein derived from this software without specific prior written permission. 147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions: 287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * 29372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein * ARMv8-a, AArch64, unaligned accesses, min page size 4k. 307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */ 317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h> 337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 34372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein/* To test the page crossing code path more thoroughly, compile with 35372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein -DTEST_PAGE_CROSS - this will force all calls through the slower 36372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein entry path. This option is not intended for production use. */ 37372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Arguments and results. */ 397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define srcin x0 407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define len x0 417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Locals and temporaries. */ 437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src x1 447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1 x2 457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2 x3 46372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define has_nul1 x4 47372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define has_nul2 x5 48372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp1 x4 49372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp2 x5 50372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp3 x6 51372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define tmp4 x7 52372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define zeroones x8 53372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 54372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#define L(l) .L ## l 55372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 56372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 57372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 58372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein can be done in parallel across the entire word. A faster check 59372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives 60372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein false hits for characters 129..255. */ 617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101 637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f 647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080 657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 66372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#ifdef TEST_PAGE_CROSS 67372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein# define MIN_PAGE_SIZE 15 68372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#else 69372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein# define MIN_PAGE_SIZE 4096 70372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#endif 71372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 72372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* Since strings are short on average, we check the first 16 bytes 73372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein of the string for a NUL character. In order to do an unaligned ldp 74372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein safely we have to do a page cross check first. If there is a NUL 75372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein byte we calculate the length from the 2 8-byte words using 76372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein conditional select to reduce branch mispredictions (it is unlikely 77372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein strlen will be repeatedly called on strings with the same length). 78372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 79372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein If the string is longer than 16 bytes, we align src so don't need 80372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein further page cross checks, and process 32 bytes per iteration 81372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein using the fast NUL check. If we encounter non-ASCII characters, 82372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein fallback to a second loop using the full NUL check. 83372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 84372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein If the page cross check fails, we read 16 bytes from an aligned 85372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein address, remove any characters before the string, and continue 86372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein in the main loop using aligned loads. Since strings crossing a 87372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein page in the first 16 bytes are rare (probability of 88372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized. 89372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 90372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein AArch64 systems have a minimum page size of 4k. We don't bother 91372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein checking for larger page sizes - the cost of setting up the correct 92372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein page size is just not worth the extra gain from a small reduction in 93372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein the cases taking the slow path. Note that we only care about 94372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein whether the first fetch, which may be misaligned, crosses a page 95372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein boundary. */ 96372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strlen) 98372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein and tmp1, srcin, MIN_PAGE_SIZE - 1 99372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein mov zeroones, REP8_01 100372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein cmp tmp1, MIN_PAGE_SIZE - 16 101372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein b.gt L(page_cross) 102372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [srcin] 103372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#ifdef __AARCH64EB__ 104372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* For big-endian, carry propagation (if the final byte in the 105372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein string is 0x01) means we cannot use has_nul1/2 directly. 106372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein Since we expect strings to be small and early-exit, 107372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein byte-swap the data now so has_null1/2 will be correct. */ 108372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein rev data1, data1 109372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein rev data2, data2 110372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#endif 1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp1, data1, zeroones 112372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, data1, REP8_7f 1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp3, data2, zeroones 114372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp4, data2, REP8_7f 115372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bics has_nul1, tmp1, tmp2 116372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic has_nul2, tmp3, tmp4 117372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ccmp has_nul2, 0, 0, eq 118372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein beq L(main_loop_entry) 1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 120372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* Enter with C = has_nul1 == 0. */ 121372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel has_nul1, has_nul1, has_nul2, cc 122372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein mov len, 8 123372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein rev has_nul1, has_nul1 124372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein clz tmp1, has_nul1 125372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel len, xzr, len, cc 126372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein add len, len, tmp1, lsr 3 127372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ret 128372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 129372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* The inner loop processes 32 bytes per iteration and uses the fast 130372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein NUL check. If we encounter non-ASCII characters, use a second 131372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein loop with the accurate NUL check. */ 132372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein .p2align 4 133372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(main_loop_entry): 134372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic src, srcin, 15 135372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub src, src, 16 136372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(main_loop): 137372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [src, 32]! 138372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein.Lpage_cross_entry: 139372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp1, data1, zeroones 140372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp3, data2, zeroones 141372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, tmp1, tmp3 142372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein tst tmp2, zeroones, lsl 7 143372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bne 1f 144372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [src, 16] 145372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp1, data1, zeroones 146372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp3, data2, zeroones 147372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, tmp1, tmp3 148372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein tst tmp2, zeroones, lsl 7 149372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein beq L(main_loop) 150372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein add src, src, 16 151372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein1: 152372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* The fast check failed, so do the slower, accurate NUL check. */ 153372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, data1, REP8_7f 154372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp4, data2, REP8_7f 155372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bics has_nul1, tmp1, tmp2 156372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic has_nul2, tmp3, tmp4 157372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ccmp has_nul2, 0, 0, eq 158372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein beq L(nonascii_loop) 159372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 160372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* Enter with C = has_nul1 == 0. */ 161372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(tail): 1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__ 1637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* For big-endian, carry propagation (if the final byte in the 164372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein string is 0x01) means we cannot use has_nul1/2 directly. The 1657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer easiest way to get the correct byte is to byte-swap the data 1667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer and calculate the syndrome a second time. */ 167372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel data1, data1, data2, cc 168372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein rev data1, data1 169372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp1, data1, zeroones 170372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, data1, REP8_7f 171372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic has_nul1, tmp1, tmp2 172372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein#else 173372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel has_nul1, has_nul1, has_nul2, cc 1747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 175372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub len, src, srcin 176372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein rev has_nul1, has_nul1 177372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein add tmp2, len, 8 178372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein clz tmp1, has_nul1 179372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel len, len, tmp2, cc 180372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein add len, len, tmp1, lsr 3 1817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 183372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(nonascii_loop): 184372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [src, 16]! 185372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp1, data1, zeroones 186372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, data1, REP8_7f 187372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp3, data2, zeroones 188372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp4, data2, REP8_7f 189372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bics has_nul1, tmp1, tmp2 190372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic has_nul2, tmp3, tmp4 191372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ccmp has_nul2, 0, 0, eq 192372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bne L(tail) 193372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [src, 16]! 194372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp1, data1, zeroones 195372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp2, data1, REP8_7f 196372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein sub tmp3, data2, zeroones 197372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp4, data2, REP8_7f 198372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bics has_nul1, tmp1, tmp2 199372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic has_nul2, tmp3, tmp4 200372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ccmp has_nul2, 0, 0, eq 201372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein beq L(nonascii_loop) 202372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein b L(tail) 203372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein 204372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* Load 16 bytes from [srcin & ~15] and force the bytes that precede 205372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein srcin to 0x7f, so we ignore any NUL bytes before the string. 206372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein Then continue in the aligned loop. */ 207372f19e9e27c1333c0fc1e83b53d365051e81612Jake WeinsteinL(page_cross): 208372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein bic src, srcin, 15 209372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein ldp data1, data2, [src] 210372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein lsl tmp1, srcin, 3 211372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein mov tmp4, -1 2127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__ 213372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein /* Big-endian. Early bytes are at MSB. */ 214372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ 2157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else 2167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Little-endian. Early bytes are at LSB. */ 217372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ 2187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 219372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orr tmp1, tmp1, REP8_80 220372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orn data1, data1, tmp1 221372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein orn tmp2, data2, tmp1 222372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein tst srcin, 8 223372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel data1, data1, tmp4, eq 224372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein csel data2, data2, tmp2, eq 225372f19e9e27c1333c0fc1e83b53d365051e81612Jake Weinstein b L(page_cross_entry) 2267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 2277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strlen) 228