105436638acc7c010349a69c3395f1a57c642dc62Ying Wang/* Searching in a string.
205436638acc7c010349a69c3395f1a57c642dc62Ying Wang   Copyright (C) 2003, 2007-2012 Free Software Foundation, Inc.
305436638acc7c010349a69c3395f1a57c642dc62Ying Wang
405436638acc7c010349a69c3395f1a57c642dc62Ying Wang   This program is free software: you can redistribute it and/or modify
505436638acc7c010349a69c3395f1a57c642dc62Ying Wang   it under the terms of the GNU General Public License as published by
605436638acc7c010349a69c3395f1a57c642dc62Ying Wang   the Free Software Foundation; either version 3 of the License, or
705436638acc7c010349a69c3395f1a57c642dc62Ying Wang   (at your option) any later version.
805436638acc7c010349a69c3395f1a57c642dc62Ying Wang
905436638acc7c010349a69c3395f1a57c642dc62Ying Wang   This program is distributed in the hope that it will be useful,
1005436638acc7c010349a69c3395f1a57c642dc62Ying Wang   but WITHOUT ANY WARRANTY; without even the implied warranty of
1105436638acc7c010349a69c3395f1a57c642dc62Ying Wang   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1205436638acc7c010349a69c3395f1a57c642dc62Ying Wang   GNU General Public License for more details.
1305436638acc7c010349a69c3395f1a57c642dc62Ying Wang
1405436638acc7c010349a69c3395f1a57c642dc62Ying Wang   You should have received a copy of the GNU General Public License
1505436638acc7c010349a69c3395f1a57c642dc62Ying Wang   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
1605436638acc7c010349a69c3395f1a57c642dc62Ying Wang
1705436638acc7c010349a69c3395f1a57c642dc62Ying Wang#include <config.h>
1805436638acc7c010349a69c3395f1a57c642dc62Ying Wang
1905436638acc7c010349a69c3395f1a57c642dc62Ying Wang/* Specification.  */
2005436638acc7c010349a69c3395f1a57c642dc62Ying Wang#include <string.h>
2105436638acc7c010349a69c3395f1a57c642dc62Ying Wang
2205436638acc7c010349a69c3395f1a57c642dc62Ying Wang/* Find the first occurrence of C in S or the final NUL byte.  */
2305436638acc7c010349a69c3395f1a57c642dc62Ying Wangchar *
2405436638acc7c010349a69c3395f1a57c642dc62Ying Wangstrchrnul (const char *s, int c_in)
2505436638acc7c010349a69c3395f1a57c642dc62Ying Wang{
2605436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
2705436638acc7c010349a69c3395f1a57c642dc62Ying Wang     long instead of a 64-bit uintmax_t tends to give better
2805436638acc7c010349a69c3395f1a57c642dc62Ying Wang     performance.  On 64-bit hardware, unsigned long is generally 64
2905436638acc7c010349a69c3395f1a57c642dc62Ying Wang     bits already.  Change this typedef to experiment with
3005436638acc7c010349a69c3395f1a57c642dc62Ying Wang     performance.  */
3105436638acc7c010349a69c3395f1a57c642dc62Ying Wang  typedef unsigned long int longword;
3205436638acc7c010349a69c3395f1a57c642dc62Ying Wang
3305436638acc7c010349a69c3395f1a57c642dc62Ying Wang  const unsigned char *char_ptr;
3405436638acc7c010349a69c3395f1a57c642dc62Ying Wang  const longword *longword_ptr;
3505436638acc7c010349a69c3395f1a57c642dc62Ying Wang  longword repeated_one;
3605436638acc7c010349a69c3395f1a57c642dc62Ying Wang  longword repeated_c;
3705436638acc7c010349a69c3395f1a57c642dc62Ying Wang  unsigned char c;
3805436638acc7c010349a69c3395f1a57c642dc62Ying Wang
3905436638acc7c010349a69c3395f1a57c642dc62Ying Wang  c = (unsigned char) c_in;
4005436638acc7c010349a69c3395f1a57c642dc62Ying Wang  if (!c)
4105436638acc7c010349a69c3395f1a57c642dc62Ying Wang    return rawmemchr (s, 0);
4205436638acc7c010349a69c3395f1a57c642dc62Ying Wang
4305436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* Handle the first few bytes by reading one byte at a time.
4405436638acc7c010349a69c3395f1a57c642dc62Ying Wang     Do this until CHAR_PTR is aligned on a longword boundary.  */
4505436638acc7c010349a69c3395f1a57c642dc62Ying Wang  for (char_ptr = (const unsigned char *) s;
4605436638acc7c010349a69c3395f1a57c642dc62Ying Wang       (size_t) char_ptr % sizeof (longword) != 0;
4705436638acc7c010349a69c3395f1a57c642dc62Ying Wang       ++char_ptr)
4805436638acc7c010349a69c3395f1a57c642dc62Ying Wang    if (!*char_ptr || *char_ptr == c)
4905436638acc7c010349a69c3395f1a57c642dc62Ying Wang      return (char *) char_ptr;
5005436638acc7c010349a69c3395f1a57c642dc62Ying Wang
5105436638acc7c010349a69c3395f1a57c642dc62Ying Wang  longword_ptr = (const longword *) char_ptr;
5205436638acc7c010349a69c3395f1a57c642dc62Ying Wang
5305436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* All these elucidatory comments refer to 4-byte longwords,
5405436638acc7c010349a69c3395f1a57c642dc62Ying Wang     but the theory applies equally well to any size longwords.  */
5505436638acc7c010349a69c3395f1a57c642dc62Ying Wang
5605436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* Compute auxiliary longword values:
5705436638acc7c010349a69c3395f1a57c642dc62Ying Wang     repeated_one is a value which has a 1 in every byte.
5805436638acc7c010349a69c3395f1a57c642dc62Ying Wang     repeated_c has c in every byte.  */
5905436638acc7c010349a69c3395f1a57c642dc62Ying Wang  repeated_one = 0x01010101;
6005436638acc7c010349a69c3395f1a57c642dc62Ying Wang  repeated_c = c | (c << 8);
6105436638acc7c010349a69c3395f1a57c642dc62Ying Wang  repeated_c |= repeated_c << 16;
6205436638acc7c010349a69c3395f1a57c642dc62Ying Wang  if (0xffffffffU < (longword) -1)
6305436638acc7c010349a69c3395f1a57c642dc62Ying Wang    {
6405436638acc7c010349a69c3395f1a57c642dc62Ying Wang      repeated_one |= repeated_one << 31 << 1;
6505436638acc7c010349a69c3395f1a57c642dc62Ying Wang      repeated_c |= repeated_c << 31 << 1;
6605436638acc7c010349a69c3395f1a57c642dc62Ying Wang      if (8 < sizeof (longword))
6705436638acc7c010349a69c3395f1a57c642dc62Ying Wang        {
6805436638acc7c010349a69c3395f1a57c642dc62Ying Wang          size_t i;
6905436638acc7c010349a69c3395f1a57c642dc62Ying Wang
7005436638acc7c010349a69c3395f1a57c642dc62Ying Wang          for (i = 64; i < sizeof (longword) * 8; i *= 2)
7105436638acc7c010349a69c3395f1a57c642dc62Ying Wang            {
7205436638acc7c010349a69c3395f1a57c642dc62Ying Wang              repeated_one |= repeated_one << i;
7305436638acc7c010349a69c3395f1a57c642dc62Ying Wang              repeated_c |= repeated_c << i;
7405436638acc7c010349a69c3395f1a57c642dc62Ying Wang            }
7505436638acc7c010349a69c3395f1a57c642dc62Ying Wang        }
7605436638acc7c010349a69c3395f1a57c642dc62Ying Wang    }
7705436638acc7c010349a69c3395f1a57c642dc62Ying Wang
7805436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* Instead of the traditional loop which tests each byte, we will
7905436638acc7c010349a69c3395f1a57c642dc62Ying Wang     test a longword at a time.  The tricky part is testing if *any of
8005436638acc7c010349a69c3395f1a57c642dc62Ying Wang     the four* bytes in the longword in question are equal to NUL or
8105436638acc7c010349a69c3395f1a57c642dc62Ying Wang     c.  We first use an xor with repeated_c.  This reduces the task
8205436638acc7c010349a69c3395f1a57c642dc62Ying Wang     to testing whether *any of the four* bytes in longword1 or
8305436638acc7c010349a69c3395f1a57c642dc62Ying Wang     longword2 is zero.
8405436638acc7c010349a69c3395f1a57c642dc62Ying Wang
8505436638acc7c010349a69c3395f1a57c642dc62Ying Wang     Let's consider longword1.  We compute tmp =
8605436638acc7c010349a69c3395f1a57c642dc62Ying Wang       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
8705436638acc7c010349a69c3395f1a57c642dc62Ying Wang     That is, we perform the following operations:
8805436638acc7c010349a69c3395f1a57c642dc62Ying Wang       1. Subtract repeated_one.
8905436638acc7c010349a69c3395f1a57c642dc62Ying Wang       2. & ~longword1.
9005436638acc7c010349a69c3395f1a57c642dc62Ying Wang       3. & a mask consisting of 0x80 in every byte.
9105436638acc7c010349a69c3395f1a57c642dc62Ying Wang     Consider what happens in each byte:
9205436638acc7c010349a69c3395f1a57c642dc62Ying Wang       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
9305436638acc7c010349a69c3395f1a57c642dc62Ying Wang         and step 3 transforms it into 0x80.  A carry can also be propagated
9405436638acc7c010349a69c3395f1a57c642dc62Ying Wang         to more significant bytes.
9505436638acc7c010349a69c3395f1a57c642dc62Ying Wang       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
9605436638acc7c010349a69c3395f1a57c642dc62Ying Wang         position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
9705436638acc7c010349a69c3395f1a57c642dc62Ying Wang         the byte ends in a single bit of value 0 and k bits of value 1.
9805436638acc7c010349a69c3395f1a57c642dc62Ying Wang         After step 2, the result is just k bits of value 1: 2^k - 1.  After
9905436638acc7c010349a69c3395f1a57c642dc62Ying Wang         step 3, the result is 0.  And no carry is produced.
10005436638acc7c010349a69c3395f1a57c642dc62Ying Wang     So, if longword1 has only non-zero bytes, tmp is zero.
10105436638acc7c010349a69c3395f1a57c642dc62Ying Wang     Whereas if longword1 has a zero byte, call j the position of the least
10205436638acc7c010349a69c3395f1a57c642dc62Ying Wang     significant zero byte.  Then the result has a zero at positions 0, ...,
10305436638acc7c010349a69c3395f1a57c642dc62Ying Wang     j-1 and a 0x80 at position j.  We cannot predict the result at the more
10405436638acc7c010349a69c3395f1a57c642dc62Ying Wang     significant bytes (positions j+1..3), but it does not matter since we
10505436638acc7c010349a69c3395f1a57c642dc62Ying Wang     already have a non-zero bit at position 8*j+7.
10605436638acc7c010349a69c3395f1a57c642dc62Ying Wang
10705436638acc7c010349a69c3395f1a57c642dc62Ying Wang     The test whether any byte in longword1 or longword2 is zero is equivalent
10805436638acc7c010349a69c3395f1a57c642dc62Ying Wang     to testing whether tmp1 is nonzero or tmp2 is nonzero.  We can combine
10905436638acc7c010349a69c3395f1a57c642dc62Ying Wang     this into a single test, whether (tmp1 | tmp2) is nonzero.
11005436638acc7c010349a69c3395f1a57c642dc62Ying Wang
11105436638acc7c010349a69c3395f1a57c642dc62Ying Wang     This test can read more than one byte beyond the end of a string,
11205436638acc7c010349a69c3395f1a57c642dc62Ying Wang     depending on where the terminating NUL is encountered.  However,
11305436638acc7c010349a69c3395f1a57c642dc62Ying Wang     this is considered safe since the initialization phase ensured
11405436638acc7c010349a69c3395f1a57c642dc62Ying Wang     that the read will be aligned, therefore, the read will not cross
11505436638acc7c010349a69c3395f1a57c642dc62Ying Wang     page boundaries and will not cause a fault.  */
11605436638acc7c010349a69c3395f1a57c642dc62Ying Wang
11705436638acc7c010349a69c3395f1a57c642dc62Ying Wang  while (1)
11805436638acc7c010349a69c3395f1a57c642dc62Ying Wang    {
11905436638acc7c010349a69c3395f1a57c642dc62Ying Wang      longword longword1 = *longword_ptr ^ repeated_c;
12005436638acc7c010349a69c3395f1a57c642dc62Ying Wang      longword longword2 = *longword_ptr;
12105436638acc7c010349a69c3395f1a57c642dc62Ying Wang
12205436638acc7c010349a69c3395f1a57c642dc62Ying Wang      if (((((longword1 - repeated_one) & ~longword1)
12305436638acc7c010349a69c3395f1a57c642dc62Ying Wang            | ((longword2 - repeated_one) & ~longword2))
12405436638acc7c010349a69c3395f1a57c642dc62Ying Wang           & (repeated_one << 7)) != 0)
12505436638acc7c010349a69c3395f1a57c642dc62Ying Wang        break;
12605436638acc7c010349a69c3395f1a57c642dc62Ying Wang      longword_ptr++;
12705436638acc7c010349a69c3395f1a57c642dc62Ying Wang    }
12805436638acc7c010349a69c3395f1a57c642dc62Ying Wang
12905436638acc7c010349a69c3395f1a57c642dc62Ying Wang  char_ptr = (const unsigned char *) longword_ptr;
13005436638acc7c010349a69c3395f1a57c642dc62Ying Wang
13105436638acc7c010349a69c3395f1a57c642dc62Ying Wang  /* At this point, we know that one of the sizeof (longword) bytes
13205436638acc7c010349a69c3395f1a57c642dc62Ying Wang     starting at char_ptr is == 0 or == c.  On little-endian machines,
13305436638acc7c010349a69c3395f1a57c642dc62Ying Wang     we could determine the first such byte without any further memory
13405436638acc7c010349a69c3395f1a57c642dc62Ying Wang     accesses, just by looking at the tmp result from the last loop
13505436638acc7c010349a69c3395f1a57c642dc62Ying Wang     iteration.  But this does not work on big-endian machines.
13605436638acc7c010349a69c3395f1a57c642dc62Ying Wang     Choose code that works in both cases.  */
13705436638acc7c010349a69c3395f1a57c642dc62Ying Wang
13805436638acc7c010349a69c3395f1a57c642dc62Ying Wang  char_ptr = (unsigned char *) longword_ptr;
13905436638acc7c010349a69c3395f1a57c642dc62Ying Wang  while (*char_ptr && (*char_ptr != c))
14005436638acc7c010349a69c3395f1a57c642dc62Ying Wang    char_ptr++;
14105436638acc7c010349a69c3395f1a57c642dc62Ying Wang  return (char *) char_ptr;
14205436638acc7c010349a69c3395f1a57c642dc62Ying Wang}
143