1ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/************************************************* 2ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk* Perl-Compatible Regular Expressions * 3ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*************************************************/ 4ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 5ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* PCRE is a library of functions to support regular expressions whose syntax 6ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukand semantics are as close as possible to those of the Perl 5 language. 7ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 8ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk Written by Philip Hazel 9ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk Copyright (c) 1997-2010 University of Cambridge 10ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 11ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk----------------------------------------------------------------------------- 12ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukRedistribution and use in source and binary forms, with or without 13ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukmodification, are permitted provided that the following conditions are met: 14ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 15ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk * Redistributions of source code must retain the above copyright notice, 16ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk this list of conditions and the following disclaimer. 17ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 18ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk * Redistributions in binary form must reproduce the above copyright 19ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk notice, this list of conditions and the following disclaimer in the 20ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk documentation and/or other materials provided with the distribution. 21ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 22ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk * Neither the name of the University of Cambridge nor the names of its 23ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk contributors may be used to endorse or promote products derived from 24ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk this software without specific prior written permission. 25ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 26ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukPOSSIBILITY OF SUCH DAMAGE. 37ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk----------------------------------------------------------------------------- 38ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*/ 39ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 40ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 41ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* This module contains an internal function that is used to match an extended 42ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukclass. It is used by both pcre_exec() and pcre_def_exec(). */ 43ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 44ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 45ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#ifdef HAVE_CONFIG_H 46ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#include "config.h" 47ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#endif 48ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 49ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#include "pcre_internal.h" 50ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 51ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 52ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/************************************************* 53ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk* Match character against an XCLASS * 54ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*************************************************/ 55ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 56ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* This function is called to match a character against an extended class that 57ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukmight contain values > 255 and/or Unicode properties. 58ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 59ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukArguments: 60ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk c the character 61ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk data points to the flag byte of the XCLASS data 62ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 63ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukReturns: TRUE if character matches, else FALSE 64ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*/ 65ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 66ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukBOOL 67ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk_pcre_xclass(int c, const uschar *data) 68ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk{ 69ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukint t; 70ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukBOOL negated = (*data & XCL_NOT) != 0; 71ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 72ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* Character values < 256 are matched against a bitmap, if one is present. If 73ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouknot, we still carry on, because there may be ranges that start below 256 in the 74ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukadditional data. */ 75ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 76ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukif (c < 256) 77ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 78ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) 79ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; /* char found */ 80ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 81ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 82ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* First skip the bit map if present. Then match against the list of Unicode 83ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukproperties or large chars or ranges that end with a large char. We won't ever 84ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukencounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ 85ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 86ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukif ((*data++ & XCL_MAP) != 0) data += 32; 87ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 88ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukwhile ((t = *data++) != XCL_END) 89ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 90ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk int x, y; 91ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if (t == XCL_SINGLE) 92ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 93ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk GETCHARINC(x, data); 94ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if (c == x) return !negated; 95ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 96ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk else if (t == XCL_RANGE) 97ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 98ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk GETCHARINC(x, data); 99ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk GETCHARINC(y, data); 100ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if (c >= x && c <= y) return !negated; 101ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 102ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 103ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#ifdef SUPPORT_UCP 104ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk else /* XCL_PROP & XCL_NOTPROP */ 105ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 106ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk const ucd_record *prop = GET_UCD(c); 107ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 108ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk switch(*data) 109ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk { 110ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_ANY: 111ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if (t == XCL_PROP) return !negated; 112ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 113ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 114ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_LAMP: 115ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 116ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; 117ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 118ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 119ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_GC: 120ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) 121ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; 122ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 123ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 124ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_PC: 125ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; 126ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 127ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 128ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_SC: 129ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; 130ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 131ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 132ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_ALNUM: 133ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || 134ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP)) 135ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; 136ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 137ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 138ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_SPACE: /* Perl space */ 139ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || 140ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) 141ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk == (t == XCL_PROP)) 142ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; 143ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 144ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 145ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_PXSPACE: /* POSIX space */ 146ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || 147ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || 148ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) 149ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; 150ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 151ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 152ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk case PT_WORD: 153ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || 154ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) 155ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk == (t == XCL_PROP)) 156ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return !negated; 157ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk break; 158ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 159ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk /* This should never occur, but compilers may mutter if there is no 160ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk default. */ 161ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 162ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk default: 163ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk return FALSE; 164ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 165ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 166ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk data += 2; 167ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 168ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#endif /* SUPPORT_UCP */ 169ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk } 170ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 171ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukreturn negated; /* char did not match */ 172ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk} 173ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk 174ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* End of pcre_xclass.c */ 175