memchr.S revision 61833de613990f2fdaf357bb3d854d72a4980890
1/* 2 * memchr - find a character in a memory zone 3 * 4 * Copyright (c) 2014, ARM Limited 5 * All rights Reserved. 6 * Copyright (c) 2014, Linaro Ltd. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * * Neither the name of the company nor the names of its contributors 16 * may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* Assumptions: 33 * 34 * ARMv8-a, AArch64 35 * Neon Available. 36 */ 37 38#include <private/bionic_asm.h> 39 40/* Arguments and results. */ 41#define srcin x0 42#define chrin w1 43#define cntin x2 44 45#define result x0 46 47#define src x3 48#define tmp x4 49#define wtmp2 w5 50#define synd x6 51#define soff x9 52#define cntrem x10 53 54#define vrepchr v0 55#define vdata1 v1 56#define vdata2 v2 57#define vhas_chr1 v3 58#define vhas_chr2 v4 59#define vrepmask v5 60#define vend v6 61 62/* 63 * Core algorithm: 64 * 65 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits 66 * per byte. For each tuple, bit 0 is set if the relevant byte matched the 67 * requested character and bit 1 is not used (faster than using a 32bit 68 * syndrome). Since the bits in the syndrome reflect exactly the order in which 69 * things occur in the original string, counting trailing zeros allows to 70 * identify exactly which byte has matched. 71 */ 72 73ENTRY(memchr) 74 /* 75 * Magic constant 0x40100401 allows us to identify which lane matches 76 * the requested byte. 77 */ 78 cbz cntin, .Lzero_length 79 mov wtmp2, #0x0401 80 movk wtmp2, #0x4010, lsl #16 81 dup vrepchr.16b, chrin 82 /* Work with aligned 32-byte chunks */ 83 bic src, srcin, #31 84 dup vrepmask.4s, wtmp2 85 ands soff, srcin, #31 86 and cntrem, cntin, #31 87 b.eq .Lloop 88 89 /* 90 * Input string is not 32-byte aligned. We calculate the syndrome 91 * value for the aligned 32 bytes block containing the first bytes 92 * and mask the irrelevant part. 93 */ 94 95 ld1 {vdata1.16b, vdata2.16b}, [src], #32 96 sub tmp, soff, #32 97 adds cntin, cntin, tmp 98 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 99 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 100 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 101 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 102 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 103 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 104 mov synd, vend.2d[0] 105 /* Clear the soff*2 lower bits */ 106 lsl tmp, soff, #1 107 lsr synd, synd, tmp 108 lsl synd, synd, tmp 109 /* The first block can also be the last */ 110 b.ls .Lmasklast 111 /* Have we found something already? */ 112 cbnz synd, .Ltail 113 114.Lloop: 115 ld1 {vdata1.16b, vdata2.16b}, [src], #32 116 subs cntin, cntin, #32 117 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 118 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 119 /* If we're out of data we finish regardless of the result */ 120 b.ls .Lend 121 /* Use a fast check for the termination condition */ 122 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b 123 addp vend.2d, vend.2d, vend.2d 124 mov synd, vend.2d[0] 125 /* We're not out of data, loop if we haven't found the character */ 126 cbz synd, .Lloop 127 128.Lend: 129 /* Termination condition found, let's calculate the syndrome value */ 130 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 131 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 132 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 133 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 134 mov synd, vend.2d[0] 135 /* Only do the clear for the last possible block */ 136 b.hi .Ltail 137 138.Lmasklast: 139 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ 140 add tmp, cntrem, soff 141 and tmp, tmp, #31 142 sub tmp, tmp, #32 143 neg tmp, tmp, lsl #1 144 lsl synd, synd, tmp 145 lsr synd, synd, tmp 146 147.Ltail: 148 /* Count the trailing zeros using bit reversing */ 149 rbit synd, synd 150 /* Compensate the last post-increment */ 151 sub src, src, #32 152 /* Check that we have found a character */ 153 cmp synd, #0 154 /* And count the leading zeros */ 155 clz synd, synd 156 /* Compute the potential result */ 157 add result, src, synd, lsr #1 158 /* Select result or NULL */ 159 csel result, xzr, result, eq 160 ret 161 162.Lzero_length: 163 mov result, xzr 164 ret 165END(memchr) 166