1// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Test character class manipulations.
6
7#include "util/test.h"
8#include "re2/regexp.h"
9
10namespace re2 {
11
12struct CCTest {
13  struct {
14    Rune lo;
15    Rune hi;
16  } add[10];
17  int remove;
18  struct {
19    Rune lo;
20    Rune hi;
21  } final[10];
22};
23
24static CCTest tests[] = {
25  { { { 10, 20 }, {-1} }, -1,
26    { { 10, 20 }, {-1} } },
27
28  { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
29    { { 10, 30 }, {-1} } },
30
31  { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
32    { { 10, 40 }, {-1} } },
33
34  { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
35    { { 0, 50 }, {-1} } },
36
37  { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
38    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
39
40  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
41    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
42
43  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
44    { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
45
46  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
47    { { 5, 25 }, {-1} } },
48
49  { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
50    { { 10, 23 }, {-1} } },
51
52  // These check boundary cases during negation.
53  { { { 0, Runemax }, {-1} }, -1,
54    { { 0, Runemax }, {-1} } },
55
56  { { { 0, 50 }, {-1} }, -1,
57    { { 0, 50 }, {-1} } },
58
59  { { { 50, Runemax }, {-1} }, -1,
60    { { 50, Runemax }, {-1} } },
61
62  // Check RemoveAbove.
63  { { { 50, Runemax }, {-1} }, 255,
64    { { 50, 255 }, {-1} } },
65
66  { { { 50, Runemax }, {-1} }, 65535,
67    { { 50, 65535 }, {-1} } },
68
69  { { { 50, Runemax }, {-1} }, Runemax,
70    { { 50, Runemax }, {-1} } },
71
72  { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
73    { { 50, 60 }, { 250, 255 }, {-1} } },
74
75  { { { 50, 60 }, {-1} }, 255,
76    { { 50, 60 }, {-1} } },
77
78  { { { 350, 360 }, {-1} }, 255,
79    { {-1} } },
80
81  { { {-1} }, 255,
82    { {-1} } },
83};
84
85template<class CharClass>
86static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
87  if (t == NULL) {
88    printf("\t%s:", desc);
89  } else {
90    printf("\n");
91    printf("CharClass added: [%s]", desc);
92    for (int k = 0; t->add[k].lo >= 0; k++)
93      printf(" %d-%d", t->add[k].lo, t->add[k].hi);
94    printf("\n");
95    if (t->remove >= 0)
96      printf("Removed > %d\n", t->remove);
97    printf("\twant:");
98    for (int k = 0; t->final[k].lo >= 0; k++)
99      printf(" %d-%d", t->final[k].lo, t->final[k].hi);
100    printf("\n");
101    printf("\thave:");
102  }
103
104  for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
105    printf(" %d-%d", it->lo, it->hi);
106  printf("\n");
107}
108
109bool ShouldContain(CCTest *t, int x) {
110  for (int j = 0; t->final[j].lo >= 0; j++)
111    if (t->final[j].lo <= x && x <= t->final[j].hi)
112      return true;
113  return false;
114}
115
116// Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
117
118CharClass* Negate(CharClass *cc) {
119  return cc->Negate();
120}
121
122void Delete(CharClass* cc) {
123  cc->Delete();
124}
125
126CharClassBuilder* Negate(CharClassBuilder* cc) {
127  CharClassBuilder* ncc = cc->Copy();
128  ncc->Negate();
129  return ncc;
130}
131
132void Delete(CharClassBuilder* cc) {
133  delete cc;
134}
135
136template<class CharClass>
137bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
138  typename CharClass::iterator it = cc->begin();
139  int size = 0;
140  for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
141    if (it == cc->end() ||
142        it->lo != t->final[j].lo ||
143        it->hi != t->final[j].hi) {
144      Broke(desc, t, cc);
145      return false;
146    }
147    size += it->hi - it->lo + 1;
148  }
149  if (it != cc->end()) {
150    Broke(desc, t, cc);
151    return false;
152  }
153  if (cc->size() != size) {
154    Broke(desc, t, cc);
155    printf("wrong size: want %d have %d\n", size, cc->size());
156    return false;
157  }
158
159  for (int j = 0; j < 101; j++) {
160    if (j == 100)
161      j = Runemax;
162    if (ShouldContain(t, j) != cc->Contains(j)) {
163      Broke(desc, t, cc);
164      printf("want contains(%d)=%d, got %d\n",
165             j, ShouldContain(t, j), cc->Contains(j));
166      return false;
167    }
168  }
169
170  CharClass* ncc = Negate(cc);
171  for (int j = 0; j < 101; j++) {
172    if (j == 100)
173      j = Runemax;
174    if (ShouldContain(t, j) == ncc->Contains(j)) {
175      Broke(desc, t, cc);
176      Broke("ncc", NULL, ncc);
177      printf("want ncc contains(%d)!=%d, got %d\n",
178             j, ShouldContain(t, j), ncc->Contains(j));
179      Delete(ncc);
180      return false;
181    }
182    if (ncc->size() != Runemax+1 - cc->size()) {
183      Broke(desc, t, cc);
184      Broke("ncc", NULL, ncc);
185      printf("ncc size should be %d is %d\n",
186             Runemax+1 - cc->size(), ncc->size());
187      Delete(ncc);
188      return false;
189    }
190  }
191  Delete(ncc);
192  return true;
193}
194
195TEST(TestCharClassBuilder, Adds) {
196  int nfail = 0;
197  for (int i = 0; i < arraysize(tests); i++) {
198    CharClassBuilder ccb;
199    CCTest* t = &tests[i];
200    for (int j = 0; t->add[j].lo >= 0; j++)
201      ccb.AddRange(t->add[j].lo, t->add[j].hi);
202    if (t->remove >= 0)
203      ccb.RemoveAbove(t->remove);
204    if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
205      nfail++;
206    CharClass* cc = ccb.GetCharClass();
207    if (!CorrectCC(cc, t, "before copy (CharClass)"))
208      nfail++;
209    cc->Delete();
210
211    CharClassBuilder *ccb1 = ccb.Copy();
212    if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
213      nfail++;
214    cc = ccb.GetCharClass();
215    if (!CorrectCC(cc, t, "after copy (CharClass)"))
216      nfail++;
217    cc->Delete();
218    delete ccb1;
219  }
220  EXPECT_EQ(nfail, 0);
221}
222
223}  // namespace re2
224