12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/*
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * The authors of this software are Rob Pike and Ken Thompson.
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *              Copyright (c) 2002 by Lucent Technologies.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Permission to use, copy, modify, and distribute this software for any
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * purpose without fee is hereby granted, provided that this entire notice
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * is included in all copies of any software which is or includes a copy
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * or modification of this software and in all copies of the supporting
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * documentation for such software.
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <stdarg.h>
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <string.h>
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/utf.h"
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonenum
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bit1	= 7,
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bitx	= 6,
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bit2	= 5,
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bit3	= 4,
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bit4	= 3,
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bit5	= 2,
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune4	= (1<<(Bit4+3*Bitx))-1,
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                                        /* 0001 1111 1111 1111 1111 1111 */
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Bad	= Runeerror,
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonchartorune(Rune *rune, const char *str)
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	int c, c1, c2, c3;
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	long l;
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * one character sequence
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	00000-0007F => T1
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	c = *(unsigned char*)str;
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c < Tx) {
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		*rune = c;
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 1;
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * two character sequence
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	0080-07FF => T2 Tx
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	c1 = *(unsigned char*)(str+1) ^ Tx;
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c1 & Testx)
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		goto bad;
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c < T3) {
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(c < T2)
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			goto bad;
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		l = ((c << Bitx) | c1) & Rune2;
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(l <= Rune1)
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			goto bad;
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		*rune = l;
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 2;
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * three character sequence
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	0800-FFFF => T3 Tx Tx
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	c2 = *(unsigned char*)(str+2) ^ Tx;
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c2 & Testx)
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		goto bad;
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c < T4) {
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(l <= Rune2)
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			goto bad;
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		*rune = l;
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 3;
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * four character sequence (21-bit value)
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	10000-1FFFFF => T4 Tx Tx Tx
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	c3 = *(unsigned char*)(str+3) ^ Tx;
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if (c3 & Testx)
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		goto bad;
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if (c < T5) {
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if (l <= Rune3)
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			goto bad;
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		*rune = l;
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 4;
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * Support for 5-byte or longer UTF-8 would go here, but
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * since we don't have that, we'll just fall through to bad.
1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * bad decoding
1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbad:
1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	*rune = Bad;
1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return 1;
1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint
1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonrunetochar(char *str, const Rune *rune)
1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/* Runes are signed, so convert to unsigned for range check. */
1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	unsigned long c;
1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * one character sequence
1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	00000-0007F => 00-7F
1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	c = *rune;
1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c <= Rune1) {
1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[0] = c;
1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 1;
1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * two character sequence
1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	0080-07FF => T2 Tx
1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c <= Rune2) {
1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[0] = T2 | (c >> 1*Bitx);
1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[1] = Tx | (c & Maskx);
1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 2;
1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * If the Rune is out of range, convert it to the error rune.
1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * Do this test here because the error rune encodes to three bytes.
1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * Doing it earlier would duplicate work, since an out of range
1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * Rune wouldn't have fit in one or two bytes.
1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if (c > Runemax)
1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		c = Runeerror;
1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * three character sequence
1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *	0800-FFFF => T3 Tx Tx
1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if (c <= Rune3) {
1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[0] = T3 |  (c >> 2*Bitx);
1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		str[2] = Tx |  (c & Maskx);
1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return 3;
1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	/*
1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 * four character sequence (21-bit value)
1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 *     10000-1FFFFF => T4 Tx Tx Tx
1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	 */
1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	str[0] = T4 | (c >> 3*Bitx);
1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	str[3] = Tx | (c & Maskx);
1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return 4;
1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint
1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonrunelen(Rune rune)
1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	char str[10];
1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return runetochar(str, &rune);
1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint
1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonfullrune(const char *str, int n)
1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if (n > 0) {
1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		int c = *(unsigned char*)str;
1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if (c < Tx)
1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			return 1;
1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if (n > 1) {
1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			if (c < T3)
1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson				return 1;
1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			if (n > 2) {
2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson				if (c < T4 || n > 3)
2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson					return 1;
2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			}
2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		}
2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return 0;
2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint
2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonutflen(const char *s)
2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	int c;
2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	long n;
2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune rune;
2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	n = 0;
2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	for(;;) {
2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		c = *(unsigned char*)s;
2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(c < Runeself) {
2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			if(c == 0)
2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson				return n;
2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			s++;
2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		} else
2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			s += chartorune(&rune, s);
2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		n++;
2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return 0;
2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonchar*
2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonutfrune(const char *s, Rune c)
2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{
2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	long c1;
2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	Rune r;
2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	int n;
2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	if(c < Runesync)		/* not part of utf sequence */
2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		return strchr((char*)s, c);
2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	for(;;) {
2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		c1 = *(unsigned char*)s;
2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(c1 < Runeself) {	/* one byte rune */
2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			if(c1 == 0)
2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson				return 0;
2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			if(c1 == c)
2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson				return (char*)s;
2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			s++;
2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			continue;
2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		}
2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		n = chartorune(&r, s);
2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		if(r == c)
2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson			return (char*)s;
2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson		s += n;
2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	}
2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	return 0;
2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
259