memchr.S revision 8c20c13100d159ff505af9e6e19cab30f368a074
1/* 2 * memchr - find a character in a memory zone 3 * 4 * Copyright (c) 2014, ARM Limited 5 * All rights Reserved. 6 * Copyright (c) 2014, Linaro Ltd. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * * Neither the name of the company nor the names of its contributors 16 * may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* Assumptions: 33 * 34 * ARMv8-a, AArch64 35 * Neon Available. 36 */ 37 38#include <private/bionic_asm.h> 39 40/* Arguments and results. */ 41#define srcin x0 42#define chrin w1 43#define cntin x2 44 45#define result x0 46 47#define src x3 48#define tmp x4 49#define wtmp2 w5 50#define synd x6 51#define soff x9 52#define cntrem x10 53 54#define vrepchr v0 55#define vdata1 v1 56#define vdata2 v2 57#define vhas_chr1 v3 58#define vhas_chr2 v4 59#define vrepmask v5 60#define vend v6 61 62/* 63 * Core algorithm: 64 * 65 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits 66 * per byte. For each tuple, bit 0 is set if the relevant byte matched the 67 * requested character and bit 1 is not used (faster than using a 32bit 68 * syndrome). Since the bits in the syndrome reflect exactly the order in which 69 * things occur in the original string, counting trailing zeros allows to 70 * identify exactly which byte has matched. 71 */ 72 73ENTRY(memchr) 74 /* 75 * Magic constant 0x40100401 allows us to identify which lane matches 76 * the requested byte. 77 */ 78 mov wtmp2, #0x0401 79 movk wtmp2, #0x4010, lsl #16 80 dup vrepchr.16b, chrin 81 /* Work with aligned 32-byte chunks */ 82 bic src, srcin, #31 83 dup vrepmask.4s, wtmp2 84 ands soff, srcin, #31 85 and cntrem, cntin, #31 86 b.eq .Lloop 87 88 /* 89 * Input string is not 32-byte aligned. We calculate the syndrome 90 * value for the aligned 32 bytes block containing the first bytes 91 * and mask the irrelevant part. 92 */ 93 94 ld1 {vdata1.16b, vdata2.16b}, [src], #32 95 sub tmp, soff, #32 96 adds cntin, cntin, tmp 97 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 98 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 99 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 100 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 101 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 102 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 103 mov synd, vend.2d[0] 104 /* Clear the soff*2 lower bits */ 105 lsl tmp, soff, #1 106 lsr synd, synd, tmp 107 lsl synd, synd, tmp 108 /* The first block can also be the last */ 109 b.ls .Lmasklast 110 /* Have we found something already? */ 111 cbnz synd, .Ltail 112 113.Lloop: 114 ld1 {vdata1.16b, vdata2.16b}, [src], #32 115 subs cntin, cntin, #32 116 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 117 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 118 /* If we're out of data we finish regardless of the result */ 119 b.ls .Lend 120 /* Use a fast check for the termination condition */ 121 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b 122 addp vend.2d, vend.2d, vend.2d 123 mov synd, vend.2d[0] 124 /* We're not out of data, loop if we haven't found the character */ 125 cbz synd, .Lloop 126 127.Lend: 128 /* Termination condition found, let's calculate the syndrome value */ 129 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b 130 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b 131 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ 132 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 133 mov synd, vend.2d[0] 134 /* Only do the clear for the last possible block */ 135 b.hi .Ltail 136 137.Lmasklast: 138 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ 139 add tmp, cntrem, soff 140 and tmp, tmp, #31 141 sub tmp, tmp, #32 142 neg tmp, tmp, lsl #1 143 lsl synd, synd, tmp 144 lsr synd, synd, tmp 145 146.Ltail: 147 /* Count the trailing zeros using bit reversing */ 148 rbit synd, synd 149 /* Compensate the last post-increment */ 150 sub src, src, #32 151 /* Check that we have found a character */ 152 cmp synd, #0 153 /* And count the leading zeros */ 154 clz synd, synd 155 /* Compute the potential result */ 156 add result, src, synd, lsr #1 157 /* Select result or NULL */ 158 csel result, xzr, result, eq 159 ret 160END(memchr) 161