12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson/* 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * The authors of this software are Rob Pike and Ken Thompson. 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Copyright (c) 2002 by Lucent Technologies. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Permission to use, copy, modify, and distribute this software for any 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * purpose without fee is hereby granted, provided that this entire notice 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * is included in all copies of any software which is or includes a copy 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * or modification of this software and in all copies of the supporting 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * documentation for such software. 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <stdarg.h> 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <string.h> 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/utf.h" 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonenum 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bit1 = 7, 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bitx = 6, 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bit2 = 5, 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bit3 = 4, 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bit4 = 3, 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bit5 = 2, 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune4 = (1<<(Bit4+3*Bitx))-1, 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 0001 1111 1111 1111 1111 1111 */ 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Maskx = (1<<Bitx)-1, /* 0011 1111 */ 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Testx = Maskx ^ 0xFF, /* 1100 0000 */ 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Bad = Runeerror, 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonchartorune(Rune *rune, const char *str) 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c, c1, c2, c3; 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long l; 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * one character sequence 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 00000-0007F => T1 572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *(unsigned char*)str; 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < Tx) { 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *rune = c; 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * two character sequence 662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 0080-07FF => T2 Tx 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c1 = *(unsigned char*)(str+1) ^ Tx; 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c1 & Testx) 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < T3) { 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < T2) 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson l = ((c << Bitx) | c1) & Rune2; 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(l <= Rune1) 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *rune = l; 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 2; 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * three character sequence 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 0800-FFFF => T3 Tx Tx 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c2 = *(unsigned char*)(str+2) ^ Tx; 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c2 & Testx) 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < T4) { 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(l <= Rune2) 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *rune = l; 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 3; 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * four character sequence (21-bit value) 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 10000-1FFFFF => T4 Tx Tx Tx 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c3 = *(unsigned char*)(str+3) ^ Tx; 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c3 & Testx) 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c < T5) { 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (l <= Rune3) 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson goto bad; 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *rune = l; 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 4; 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Support for 5-byte or longer UTF-8 would go here, but 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * since we don't have that, we'll just fall through to bad. 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * bad decoding 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbad: 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *rune = Bad; 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonrunetochar(char *str, const Rune *rune) 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* Runes are signed, so convert to unsigned for range check. */ 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson unsigned long c; 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * one character sequence 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 00000-0007F => 00-7F 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *rune; 1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c <= Rune1) { 1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[0] = c; 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * two character sequence 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 0080-07FF => T2 Tx 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c <= Rune2) { 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[0] = T2 | (c >> 1*Bitx); 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[1] = Tx | (c & Maskx); 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 2; 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * If the Rune is out of range, convert it to the error rune. 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Do this test here because the error rune encodes to three bytes. 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Doing it earlier would duplicate work, since an out of range 1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * Rune wouldn't have fit in one or two bytes. 1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c > Runemax) 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = Runeerror; 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * three character sequence 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 0800-FFFF => T3 Tx Tx 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c <= Rune3) { 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[0] = T3 | (c >> 2*Bitx); 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[1] = Tx | ((c >> 1*Bitx) & Maskx); 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[2] = Tx | (c & Maskx); 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 3; 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson /* 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * four character sequence (21-bit value) 1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson * 10000-1FFFFF => T4 Tx Tx Tx 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson */ 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[0] = T4 | (c >> 3*Bitx); 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[1] = Tx | ((c >> 2*Bitx) & Maskx); 1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[2] = Tx | ((c >> 1*Bitx) & Maskx); 1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson str[3] = Tx | (c & Maskx); 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 4; 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint 1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonrunelen(Rune rune) 1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char str[10]; 1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return runetochar(str, &rune); 1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint 1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonfullrune(const char *str, int n) 1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > 0) { 1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c = *(unsigned char*)str; 1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c < Tx) 1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > 1) { 1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c < T3) 1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n > 2) { 2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (c < T4 || n > 3) 2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 1; 2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint 2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonutflen(const char *s) 2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int c; 2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long n; 2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune rune; 2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n = 0; 2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for(;;) { 2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c = *(unsigned char*)s; 2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < Runeself) { 2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c == 0) 2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return n; 2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s++; 2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else 2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += chartorune(&rune, s); 2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n++; 2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonchar* 2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonutfrune(const char *s, Rune c) 2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson{ 2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson long c1; 2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Rune r; 2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n; 2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c < Runesync) /* not part of utf sequence */ 2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return strchr((char*)s, c); 2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for(;;) { 2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c1 = *(unsigned char*)s; 2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c1 < Runeself) { /* one byte rune */ 2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c1 == 0) 2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(c1 == c) 2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return (char*)s; 2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s++; 2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson continue; 2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson n = chartorune(&r, s); 2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if(r == c) 2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return (char*)s; 2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += n; 2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 259