1//===- unittests/Basic/CharInfoTest.cpp -- ASCII classification tests -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/Basic/CharInfo.h"
11#include "gtest/gtest.h"
12
13using namespace llvm;
14using namespace clang;
15
16// Check that the CharInfo table has been constructed reasonably.
17TEST(CharInfoTest, validateInfoTable) {
18  using namespace charinfo;
19  EXPECT_EQ((unsigned)CHAR_SPACE,   InfoTable[(unsigned)' ']);
20  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\t']);
21  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\f']); // ??
22  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\v']); // ??
23  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\n']);
24  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\r']);
25  EXPECT_EQ((unsigned)CHAR_UNDER,   InfoTable[(unsigned)'_']);
26  EXPECT_EQ((unsigned)CHAR_PERIOD,  InfoTable[(unsigned)'.']);
27
28  for (unsigned i = 'a'; i <= 'f'; ++i) {
29    EXPECT_EQ((unsigned)CHAR_XLOWER, InfoTable[i]);
30    EXPECT_EQ((unsigned)CHAR_XUPPER, InfoTable[i+'A'-'a']);
31  }
32
33  for (unsigned i = 'g'; i <= 'z'; ++i) {
34    EXPECT_EQ((unsigned)CHAR_LOWER, InfoTable[i]);
35    EXPECT_EQ((unsigned)CHAR_UPPER, InfoTable[i+'A'-'a']);
36  }
37
38  for (unsigned i = '0'; i <= '9'; ++i)
39    EXPECT_EQ((unsigned)CHAR_DIGIT, InfoTable[i]);
40}
41
42// Check various predicates.
43TEST(CharInfoTest, isASCII) {
44  EXPECT_TRUE(isASCII('\0'));
45  EXPECT_TRUE(isASCII('\n'));
46  EXPECT_TRUE(isASCII(' '));
47  EXPECT_TRUE(isASCII('a'));
48  EXPECT_TRUE(isASCII('\x7f'));
49  EXPECT_FALSE(isASCII('\x80'));
50  EXPECT_FALSE(isASCII('\xc2'));
51  EXPECT_FALSE(isASCII('\xff'));
52}
53
54TEST(CharInfoTest, isIdentifierHead) {
55  EXPECT_TRUE(isIdentifierHead('a'));
56  EXPECT_TRUE(isIdentifierHead('A'));
57  EXPECT_TRUE(isIdentifierHead('z'));
58  EXPECT_TRUE(isIdentifierHead('Z'));
59  EXPECT_TRUE(isIdentifierHead('_'));
60
61  EXPECT_FALSE(isIdentifierHead('0'));
62  EXPECT_FALSE(isIdentifierHead('.'));
63  EXPECT_FALSE(isIdentifierHead('`'));
64  EXPECT_FALSE(isIdentifierHead('\0'));
65
66  EXPECT_FALSE(isIdentifierHead('$'));
67  EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true));
68
69  EXPECT_FALSE(isIdentifierHead('\x80'));
70  EXPECT_FALSE(isIdentifierHead('\xc2'));
71  EXPECT_FALSE(isIdentifierHead('\xff'));
72}
73
74TEST(CharInfoTest, isIdentifierBody) {
75  EXPECT_TRUE(isIdentifierBody('a'));
76  EXPECT_TRUE(isIdentifierBody('A'));
77  EXPECT_TRUE(isIdentifierBody('z'));
78  EXPECT_TRUE(isIdentifierBody('Z'));
79  EXPECT_TRUE(isIdentifierBody('_'));
80
81  EXPECT_TRUE(isIdentifierBody('0'));
82  EXPECT_FALSE(isIdentifierBody('.'));
83  EXPECT_FALSE(isIdentifierBody('`'));
84  EXPECT_FALSE(isIdentifierBody('\0'));
85
86  EXPECT_FALSE(isIdentifierBody('$'));
87  EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true));
88
89  EXPECT_FALSE(isIdentifierBody('\x80'));
90  EXPECT_FALSE(isIdentifierBody('\xc2'));
91  EXPECT_FALSE(isIdentifierBody('\xff'));
92}
93
94TEST(CharInfoTest, isHorizontalWhitespace) {
95  EXPECT_FALSE(isHorizontalWhitespace('a'));
96  EXPECT_FALSE(isHorizontalWhitespace('_'));
97  EXPECT_FALSE(isHorizontalWhitespace('0'));
98  EXPECT_FALSE(isHorizontalWhitespace('.'));
99  EXPECT_FALSE(isHorizontalWhitespace('`'));
100  EXPECT_FALSE(isHorizontalWhitespace('\0'));
101  EXPECT_FALSE(isHorizontalWhitespace('\x7f'));
102
103  EXPECT_TRUE(isHorizontalWhitespace(' '));
104  EXPECT_TRUE(isHorizontalWhitespace('\t'));
105  EXPECT_TRUE(isHorizontalWhitespace('\f')); // ??
106  EXPECT_TRUE(isHorizontalWhitespace('\v')); // ??
107
108  EXPECT_FALSE(isHorizontalWhitespace('\n'));
109  EXPECT_FALSE(isHorizontalWhitespace('\r'));
110
111  EXPECT_FALSE(isHorizontalWhitespace('\x80'));
112  EXPECT_FALSE(isHorizontalWhitespace('\xc2'));
113  EXPECT_FALSE(isHorizontalWhitespace('\xff'));
114}
115
116TEST(CharInfoTest, isVerticalWhitespace) {
117  EXPECT_FALSE(isVerticalWhitespace('a'));
118  EXPECT_FALSE(isVerticalWhitespace('_'));
119  EXPECT_FALSE(isVerticalWhitespace('0'));
120  EXPECT_FALSE(isVerticalWhitespace('.'));
121  EXPECT_FALSE(isVerticalWhitespace('`'));
122  EXPECT_FALSE(isVerticalWhitespace('\0'));
123  EXPECT_FALSE(isVerticalWhitespace('\x7f'));
124
125  EXPECT_FALSE(isVerticalWhitespace(' '));
126  EXPECT_FALSE(isVerticalWhitespace('\t'));
127  EXPECT_FALSE(isVerticalWhitespace('\f')); // ??
128  EXPECT_FALSE(isVerticalWhitespace('\v')); // ??
129
130  EXPECT_TRUE(isVerticalWhitespace('\n'));
131  EXPECT_TRUE(isVerticalWhitespace('\r'));
132
133  EXPECT_FALSE(isVerticalWhitespace('\x80'));
134  EXPECT_FALSE(isVerticalWhitespace('\xc2'));
135  EXPECT_FALSE(isVerticalWhitespace('\xff'));
136}
137
138TEST(CharInfoTest, isWhitespace) {
139  EXPECT_FALSE(isWhitespace('a'));
140  EXPECT_FALSE(isWhitespace('_'));
141  EXPECT_FALSE(isWhitespace('0'));
142  EXPECT_FALSE(isWhitespace('.'));
143  EXPECT_FALSE(isWhitespace('`'));
144  EXPECT_FALSE(isWhitespace('\0'));
145  EXPECT_FALSE(isWhitespace('\x7f'));
146
147  EXPECT_TRUE(isWhitespace(' '));
148  EXPECT_TRUE(isWhitespace('\t'));
149  EXPECT_TRUE(isWhitespace('\f'));
150  EXPECT_TRUE(isWhitespace('\v'));
151
152  EXPECT_TRUE(isWhitespace('\n'));
153  EXPECT_TRUE(isWhitespace('\r'));
154
155  EXPECT_FALSE(isWhitespace('\x80'));
156  EXPECT_FALSE(isWhitespace('\xc2'));
157  EXPECT_FALSE(isWhitespace('\xff'));
158}
159
160TEST(CharInfoTest, isDigit) {
161  EXPECT_TRUE(isDigit('0'));
162  EXPECT_TRUE(isDigit('9'));
163
164  EXPECT_FALSE(isDigit('a'));
165  EXPECT_FALSE(isDigit('A'));
166
167  EXPECT_FALSE(isDigit('z'));
168  EXPECT_FALSE(isDigit('Z'));
169
170  EXPECT_FALSE(isDigit('.'));
171  EXPECT_FALSE(isDigit('_'));
172
173  EXPECT_FALSE(isDigit('/'));
174  EXPECT_FALSE(isDigit('\0'));
175
176  EXPECT_FALSE(isDigit('\x80'));
177  EXPECT_FALSE(isDigit('\xc2'));
178  EXPECT_FALSE(isDigit('\xff'));
179}
180
181TEST(CharInfoTest, isHexDigit) {
182  EXPECT_TRUE(isHexDigit('0'));
183  EXPECT_TRUE(isHexDigit('9'));
184
185  EXPECT_TRUE(isHexDigit('a'));
186  EXPECT_TRUE(isHexDigit('A'));
187
188  EXPECT_FALSE(isHexDigit('z'));
189  EXPECT_FALSE(isHexDigit('Z'));
190
191  EXPECT_FALSE(isHexDigit('.'));
192  EXPECT_FALSE(isHexDigit('_'));
193
194  EXPECT_FALSE(isHexDigit('/'));
195  EXPECT_FALSE(isHexDigit('\0'));
196
197  EXPECT_FALSE(isHexDigit('\x80'));
198  EXPECT_FALSE(isHexDigit('\xc2'));
199  EXPECT_FALSE(isHexDigit('\xff'));
200}
201
202TEST(CharInfoTest, isLetter) {
203  EXPECT_FALSE(isLetter('0'));
204  EXPECT_FALSE(isLetter('9'));
205
206  EXPECT_TRUE(isLetter('a'));
207  EXPECT_TRUE(isLetter('A'));
208
209  EXPECT_TRUE(isLetter('z'));
210  EXPECT_TRUE(isLetter('Z'));
211
212  EXPECT_FALSE(isLetter('.'));
213  EXPECT_FALSE(isLetter('_'));
214
215  EXPECT_FALSE(isLetter('/'));
216  EXPECT_FALSE(isLetter('('));
217  EXPECT_FALSE(isLetter('\0'));
218
219  EXPECT_FALSE(isLetter('\x80'));
220  EXPECT_FALSE(isLetter('\xc2'));
221  EXPECT_FALSE(isLetter('\xff'));
222}
223
224TEST(CharInfoTest, isLowercase) {
225  EXPECT_FALSE(isLowercase('0'));
226  EXPECT_FALSE(isLowercase('9'));
227
228  EXPECT_TRUE(isLowercase('a'));
229  EXPECT_FALSE(isLowercase('A'));
230
231  EXPECT_TRUE(isLowercase('z'));
232  EXPECT_FALSE(isLowercase('Z'));
233
234  EXPECT_FALSE(isLowercase('.'));
235  EXPECT_FALSE(isLowercase('_'));
236
237  EXPECT_FALSE(isLowercase('/'));
238  EXPECT_FALSE(isLowercase('('));
239  EXPECT_FALSE(isLowercase('\0'));
240
241  EXPECT_FALSE(isLowercase('\x80'));
242  EXPECT_FALSE(isLowercase('\xc2'));
243  EXPECT_FALSE(isLowercase('\xff'));
244}
245
246TEST(CharInfoTest, isUppercase) {
247  EXPECT_FALSE(isUppercase('0'));
248  EXPECT_FALSE(isUppercase('9'));
249
250  EXPECT_FALSE(isUppercase('a'));
251  EXPECT_TRUE(isUppercase('A'));
252
253  EXPECT_FALSE(isUppercase('z'));
254  EXPECT_TRUE(isUppercase('Z'));
255
256  EXPECT_FALSE(isUppercase('.'));
257  EXPECT_FALSE(isUppercase('_'));
258
259  EXPECT_FALSE(isUppercase('/'));
260  EXPECT_FALSE(isUppercase('('));
261  EXPECT_FALSE(isUppercase('\0'));
262
263  EXPECT_FALSE(isUppercase('\x80'));
264  EXPECT_FALSE(isUppercase('\xc2'));
265  EXPECT_FALSE(isUppercase('\xff'));
266}
267
268TEST(CharInfoTest, isAlphanumeric) {
269  EXPECT_TRUE(isAlphanumeric('0'));
270  EXPECT_TRUE(isAlphanumeric('9'));
271
272  EXPECT_TRUE(isAlphanumeric('a'));
273  EXPECT_TRUE(isAlphanumeric('A'));
274
275  EXPECT_TRUE(isAlphanumeric('z'));
276  EXPECT_TRUE(isAlphanumeric('Z'));
277
278  EXPECT_FALSE(isAlphanumeric('.'));
279  EXPECT_FALSE(isAlphanumeric('_'));
280
281  EXPECT_FALSE(isAlphanumeric('/'));
282  EXPECT_FALSE(isAlphanumeric('('));
283  EXPECT_FALSE(isAlphanumeric('\0'));
284
285  EXPECT_FALSE(isAlphanumeric('\x80'));
286  EXPECT_FALSE(isAlphanumeric('\xc2'));
287  EXPECT_FALSE(isAlphanumeric('\xff'));
288}
289
290TEST(CharInfoTest, isPunctuation) {
291  EXPECT_FALSE(isPunctuation('0'));
292  EXPECT_FALSE(isPunctuation('9'));
293
294  EXPECT_FALSE(isPunctuation('a'));
295  EXPECT_FALSE(isPunctuation('A'));
296
297  EXPECT_FALSE(isPunctuation('z'));
298  EXPECT_FALSE(isPunctuation('Z'));
299
300  EXPECT_TRUE(isPunctuation('.'));
301  EXPECT_TRUE(isPunctuation('_'));
302
303  EXPECT_TRUE(isPunctuation('/'));
304  EXPECT_TRUE(isPunctuation('('));
305
306  EXPECT_FALSE(isPunctuation(' '));
307  EXPECT_FALSE(isPunctuation('\n'));
308  EXPECT_FALSE(isPunctuation('\0'));
309
310  EXPECT_FALSE(isPunctuation('\x80'));
311  EXPECT_FALSE(isPunctuation('\xc2'));
312  EXPECT_FALSE(isPunctuation('\xff'));
313}
314
315TEST(CharInfoTest, isPrintable) {
316  EXPECT_TRUE(isPrintable('0'));
317  EXPECT_TRUE(isPrintable('9'));
318
319  EXPECT_TRUE(isPrintable('a'));
320  EXPECT_TRUE(isPrintable('A'));
321
322  EXPECT_TRUE(isPrintable('z'));
323  EXPECT_TRUE(isPrintable('Z'));
324
325  EXPECT_TRUE(isPrintable('.'));
326  EXPECT_TRUE(isPrintable('_'));
327
328  EXPECT_TRUE(isPrintable('/'));
329  EXPECT_TRUE(isPrintable('('));
330
331  EXPECT_TRUE(isPrintable(' '));
332  EXPECT_FALSE(isPrintable('\t'));
333  EXPECT_FALSE(isPrintable('\n'));
334  EXPECT_FALSE(isPrintable('\0'));
335
336  EXPECT_FALSE(isPrintable('\x80'));
337  EXPECT_FALSE(isPrintable('\xc2'));
338  EXPECT_FALSE(isPrintable('\xff'));
339}
340
341TEST(CharInfoTest, isPreprocessingNumberBody) {
342  EXPECT_TRUE(isPreprocessingNumberBody('0'));
343  EXPECT_TRUE(isPreprocessingNumberBody('9'));
344
345  EXPECT_TRUE(isPreprocessingNumberBody('a'));
346  EXPECT_TRUE(isPreprocessingNumberBody('A'));
347
348  EXPECT_TRUE(isPreprocessingNumberBody('z'));
349  EXPECT_TRUE(isPreprocessingNumberBody('Z'));
350  EXPECT_TRUE(isPreprocessingNumberBody('.'));
351  EXPECT_TRUE(isPreprocessingNumberBody('_'));
352
353  EXPECT_FALSE(isPreprocessingNumberBody('/'));
354  EXPECT_FALSE(isPreprocessingNumberBody('('));
355  EXPECT_FALSE(isPreprocessingNumberBody('\0'));
356
357  EXPECT_FALSE(isPreprocessingNumberBody('\x80'));
358  EXPECT_FALSE(isPreprocessingNumberBody('\xc2'));
359  EXPECT_FALSE(isPreprocessingNumberBody('\xff'));
360}
361
362TEST(CharInfoTest, isRawStringDelimBody) {
363  EXPECT_TRUE(isRawStringDelimBody('0'));
364  EXPECT_TRUE(isRawStringDelimBody('9'));
365
366  EXPECT_TRUE(isRawStringDelimBody('a'));
367  EXPECT_TRUE(isRawStringDelimBody('A'));
368
369  EXPECT_TRUE(isRawStringDelimBody('z'));
370  EXPECT_TRUE(isRawStringDelimBody('Z'));
371  EXPECT_TRUE(isRawStringDelimBody('.'));
372  EXPECT_TRUE(isRawStringDelimBody('_'));
373
374  EXPECT_TRUE(isRawStringDelimBody('/'));
375  EXPECT_FALSE(isRawStringDelimBody('('));
376  EXPECT_FALSE(isRawStringDelimBody('\0'));
377
378  EXPECT_FALSE(isRawStringDelimBody('\x80'));
379  EXPECT_FALSE(isRawStringDelimBody('\xc2'));
380  EXPECT_FALSE(isRawStringDelimBody('\xff'));
381}
382
383TEST(CharInfoTest, toLowercase) {
384  EXPECT_EQ('0', toLowercase('0'));
385  EXPECT_EQ('9', toLowercase('9'));
386
387  EXPECT_EQ('a', toLowercase('a'));
388  EXPECT_EQ('a', toLowercase('A'));
389
390  EXPECT_EQ('z', toLowercase('z'));
391  EXPECT_EQ('z', toLowercase('Z'));
392
393  EXPECT_EQ('.', toLowercase('.'));
394  EXPECT_EQ('_', toLowercase('_'));
395
396  EXPECT_EQ('/', toLowercase('/'));
397  EXPECT_EQ('\0', toLowercase('\0'));
398}
399
400TEST(CharInfoTest, toUppercase) {
401  EXPECT_EQ('0', toUppercase('0'));
402  EXPECT_EQ('9', toUppercase('9'));
403
404  EXPECT_EQ('A', toUppercase('a'));
405  EXPECT_EQ('A', toUppercase('A'));
406
407  EXPECT_EQ('Z', toUppercase('z'));
408  EXPECT_EQ('Z', toUppercase('Z'));
409
410  EXPECT_EQ('.', toUppercase('.'));
411  EXPECT_EQ('_', toUppercase('_'));
412
413  EXPECT_EQ('/', toUppercase('/'));
414  EXPECT_EQ('\0', toUppercase('\0'));
415}
416
417TEST(CharInfoTest, isValidIdentifier) {
418  EXPECT_FALSE(isValidIdentifier(""));
419
420  // 1 character
421  EXPECT_FALSE(isValidIdentifier("."));
422  EXPECT_FALSE(isValidIdentifier("\n"));
423  EXPECT_FALSE(isValidIdentifier(" "));
424  EXPECT_FALSE(isValidIdentifier("\x80"));
425  EXPECT_FALSE(isValidIdentifier("\xc2"));
426  EXPECT_FALSE(isValidIdentifier("\xff"));
427  EXPECT_FALSE(isValidIdentifier("$"));
428  EXPECT_FALSE(isValidIdentifier("1"));
429
430  EXPECT_TRUE(isValidIdentifier("_"));
431  EXPECT_TRUE(isValidIdentifier("a"));
432  EXPECT_TRUE(isValidIdentifier("z"));
433  EXPECT_TRUE(isValidIdentifier("A"));
434  EXPECT_TRUE(isValidIdentifier("Z"));
435
436  // 2 characters, '_' suffix
437  EXPECT_FALSE(isValidIdentifier("._"));
438  EXPECT_FALSE(isValidIdentifier("\n_"));
439  EXPECT_FALSE(isValidIdentifier(" _"));
440  EXPECT_FALSE(isValidIdentifier("\x80_"));
441  EXPECT_FALSE(isValidIdentifier("\xc2_"));
442  EXPECT_FALSE(isValidIdentifier("\xff_"));
443  EXPECT_FALSE(isValidIdentifier("$_"));
444  EXPECT_FALSE(isValidIdentifier("1_"));
445
446  EXPECT_TRUE(isValidIdentifier("__"));
447  EXPECT_TRUE(isValidIdentifier("a_"));
448  EXPECT_TRUE(isValidIdentifier("z_"));
449  EXPECT_TRUE(isValidIdentifier("A_"));
450  EXPECT_TRUE(isValidIdentifier("Z_"));
451
452  // 2 characters, '_' prefix
453  EXPECT_FALSE(isValidIdentifier("_."));
454  EXPECT_FALSE(isValidIdentifier("_\n"));
455  EXPECT_FALSE(isValidIdentifier("_ "));
456  EXPECT_FALSE(isValidIdentifier("_\x80"));
457  EXPECT_FALSE(isValidIdentifier("_\xc2"));
458  EXPECT_FALSE(isValidIdentifier("_\xff"));
459  EXPECT_FALSE(isValidIdentifier("_$"));
460  EXPECT_TRUE(isValidIdentifier("_1"));
461
462  EXPECT_TRUE(isValidIdentifier("__"));
463  EXPECT_TRUE(isValidIdentifier("_a"));
464  EXPECT_TRUE(isValidIdentifier("_z"));
465  EXPECT_TRUE(isValidIdentifier("_A"));
466  EXPECT_TRUE(isValidIdentifier("_Z"));
467
468  // 3 characters, '__' prefix
469  EXPECT_FALSE(isValidIdentifier("__."));
470  EXPECT_FALSE(isValidIdentifier("__\n"));
471  EXPECT_FALSE(isValidIdentifier("__ "));
472  EXPECT_FALSE(isValidIdentifier("__\x80"));
473  EXPECT_FALSE(isValidIdentifier("__\xc2"));
474  EXPECT_FALSE(isValidIdentifier("__\xff"));
475  EXPECT_FALSE(isValidIdentifier("__$"));
476  EXPECT_TRUE(isValidIdentifier("__1"));
477
478  EXPECT_TRUE(isValidIdentifier("___"));
479  EXPECT_TRUE(isValidIdentifier("__a"));
480  EXPECT_TRUE(isValidIdentifier("__z"));
481  EXPECT_TRUE(isValidIdentifier("__A"));
482  EXPECT_TRUE(isValidIdentifier("__Z"));
483
484  // 3 characters, '_' prefix and suffix
485  EXPECT_FALSE(isValidIdentifier("_._"));
486  EXPECT_FALSE(isValidIdentifier("_\n_"));
487  EXPECT_FALSE(isValidIdentifier("_ _"));
488  EXPECT_FALSE(isValidIdentifier("_\x80_"));
489  EXPECT_FALSE(isValidIdentifier("_\xc2_"));
490  EXPECT_FALSE(isValidIdentifier("_\xff_"));
491  EXPECT_FALSE(isValidIdentifier("_$_"));
492  EXPECT_TRUE(isValidIdentifier("_1_"));
493
494  EXPECT_TRUE(isValidIdentifier("___"));
495  EXPECT_TRUE(isValidIdentifier("_a_"));
496  EXPECT_TRUE(isValidIdentifier("_z_"));
497  EXPECT_TRUE(isValidIdentifier("_A_"));
498  EXPECT_TRUE(isValidIdentifier("_Z_"));
499}
500