1ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/*************************************************
2ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*      Perl-Compatible Regular Expressions       *
3ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*************************************************/
4ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
5ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* PCRE is a library of functions to support regular expressions whose syntax
6ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukand semantics are as close as possible to those of the Perl 5 language.
7ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
8ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk                       Written by Philip Hazel
9ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           Copyright (c) 1997-2010 University of Cambridge
10ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
11ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk-----------------------------------------------------------------------------
12ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukRedistribution and use in source and binary forms, with or without
13ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukmodification, are permitted provided that the following conditions are met:
14ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
15ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    * Redistributions of source code must retain the above copyright notice,
16ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      this list of conditions and the following disclaimer.
17ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
18ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    * Redistributions in binary form must reproduce the above copyright
19ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      notice, this list of conditions and the following disclaimer in the
20ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      documentation and/or other materials provided with the distribution.
21ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
22ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    * Neither the name of the University of Cambridge nor the names of its
23ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      contributors may be used to endorse or promote products derived from
24ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      this software without specific prior written permission.
25ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
26ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukPOSSIBILITY OF SUCH DAMAGE.
37ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk-----------------------------------------------------------------------------
38ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*/
39ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
40ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
41ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* This module contains an internal function that is used to match an extended
42ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukclass. It is used by both pcre_exec() and pcre_def_exec(). */
43ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
44ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
45ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#ifdef HAVE_CONFIG_H
46ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#include "config.h"
47ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#endif
48ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
49ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#include "pcre_internal.h"
50ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
51ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
52ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/*************************************************
53ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*       Match character against an XCLASS        *
54ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*************************************************/
55ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
56ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* This function is called to match a character against an extended class that
57ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukmight contain values > 255 and/or Unicode properties.
58ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
59ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukArguments:
60ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  c           the character
61ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  data        points to the flag byte of the XCLASS data
62ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
63ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukReturns:      TRUE if character matches, else FALSE
64ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk*/
65ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
66ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukBOOL
67ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk_pcre_xclass(int c, const uschar *data)
68ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk{
69ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukint t;
70ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex SakhartchoukBOOL negated = (*data & XCL_NOT) != 0;
71ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
72ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* Character values < 256 are matched against a bitmap, if one is present. If
73ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouknot, we still carry on, because there may be ranges that start below 256 in the
74ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukadditional data. */
75ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
76ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukif (c < 256)
77ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  {
78ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
79ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    return !negated;   /* char found */
80ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  }
81ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
82ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* First skip the bit map if present. Then match against the list of Unicode
83ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukproperties or large chars or ranges that end with a large char. We won't ever
84ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukencounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
85ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
86ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukif ((*data++ & XCL_MAP) != 0) data += 32;
87ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
88ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukwhile ((t = *data++) != XCL_END)
89ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  {
90ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  int x, y;
91ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  if (t == XCL_SINGLE)
92ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    {
93ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    GETCHARINC(x, data);
94ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    if (c == x) return !negated;
95ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    }
96ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  else if (t == XCL_RANGE)
97ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    {
98ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    GETCHARINC(x, data);
99ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    GETCHARINC(y, data);
100ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    if (c >= x && c <= y) return !negated;
101ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    }
102ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
103ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#ifdef SUPPORT_UCP
104ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  else  /* XCL_PROP & XCL_NOTPROP */
105ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    {
106ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    const ucd_record *prop = GET_UCD(c);
107ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
108ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    switch(*data)
109ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      {
110ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_ANY:
111ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if (t == XCL_PROP) return !negated;
112ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
113ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
114ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_LAMP:
115ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
116ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
117ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
118ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
119ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_GC:
120ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
121ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk        return !negated;
122ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
123ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
124ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_PC:
125ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
126ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
127ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
128ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_SC:
129ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
130ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
131ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
132ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_ALNUM:
133ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
134ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
135ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk        return !negated;
136ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
137ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
138ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_SPACE:    /* Perl space */
139ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
140ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
141ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk             == (t == XCL_PROP))
142ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk        return !negated;
143ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
144ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
145ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_PXSPACE:  /* POSIX space */
146ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
147ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
148ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
149ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk        return !negated;
150ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
151ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
152ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      case PT_WORD:
153ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
154ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk           _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
155ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk             == (t == XCL_PROP))
156ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk        return !negated;
157ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      break;
158ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
159ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      /* This should never occur, but compilers may mutter if there is no
160ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      default. */
161ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
162ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      default:
163ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      return FALSE;
164ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk      }
165ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
166ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    data += 2;
167ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk    }
168ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk#endif  /* SUPPORT_UCP */
169ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk  }
170ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
171ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchoukreturn negated;   /* char did not match */
172ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk}
173ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk
174ccf591e720ec7d70c1b8e732c2158ed304f4bf3cAlex Sakhartchouk/* End of pcre_xclass.c */
175