1/*
2 * Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef WTF_ASCIICType_h
30#define WTF_ASCIICType_h
31
32#include <wtf/Assertions.h>
33
34// The behavior of many of the functions in the <ctype.h> header is dependent
35// on the current locale. But in the WebKit project, all uses of those functions
36// are in code processing something that's not locale-specific. These equivalents
37// for some of the <ctype.h> functions are named more explicitly, not dependent
38// on the C library locale, and we should also optimize them as needed.
39
40// All functions return false or leave the character unchanged if passed a character
41// that is outside the range 0-7F. So they can be used on Unicode strings or
42// characters if the intent is to do processing only if the character is ASCII.
43
44namespace WTF {
45
46    inline bool isASCII(char c) { return !(c & ~0x7F); }
47    inline bool isASCII(unsigned short c) { return !(c & ~0x7F); }
48#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
49    inline bool isASCII(wchar_t c) { return !(c & ~0x7F); }
50#endif
51    inline bool isASCII(int c) { return !(c & ~0x7F); }
52    inline bool isASCII(unsigned c) { return !(c & ~0x7F); }
53
54    inline bool isASCIIAlpha(char c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
55    inline bool isASCIIAlpha(unsigned short c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
56#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
57    inline bool isASCIIAlpha(wchar_t c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
58#endif
59    inline bool isASCIIAlpha(int c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
60    inline bool isASCIIAlpha(unsigned c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
61
62    inline bool isASCIIAlphanumeric(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
63    inline bool isASCIIAlphanumeric(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
64#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
65    inline bool isASCIIAlphanumeric(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
66#endif
67    inline bool isASCIIAlphanumeric(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
68    inline bool isASCIIAlphanumeric(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
69
70    inline bool isASCIIDigit(char c) { return (c >= '0') & (c <= '9'); }
71    inline bool isASCIIDigit(unsigned short c) { return (c >= '0') & (c <= '9'); }
72#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
73    inline bool isASCIIDigit(wchar_t c) { return (c >= '0') & (c <= '9'); }
74#endif
75    inline bool isASCIIDigit(int c) { return (c >= '0') & (c <= '9'); }
76    inline bool isASCIIDigit(unsigned c) { return (c >= '0') & (c <= '9'); }
77
78    inline bool isASCIIHexDigit(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
79    inline bool isASCIIHexDigit(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
80#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
81    inline bool isASCIIHexDigit(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
82#endif
83    inline bool isASCIIHexDigit(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
84    inline bool isASCIIHexDigit(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
85
86    inline bool isASCIIOctalDigit(char c) { return (c >= '0') & (c <= '7'); }
87    inline bool isASCIIOctalDigit(unsigned short c) { return (c >= '0') & (c <= '7'); }
88#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
89    inline bool isASCIIOctalDigit(wchar_t c) { return (c >= '0') & (c <= '7'); }
90#endif
91    inline bool isASCIIOctalDigit(int c) { return (c >= '0') & (c <= '7'); }
92    inline bool isASCIIOctalDigit(unsigned c) { return (c >= '0') & (c <= '7'); }
93
94    inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }
95    inline bool isASCIILower(unsigned short c) { return c >= 'a' && c <= 'z'; }
96#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
97    inline bool isASCIILower(wchar_t c) { return c >= 'a' && c <= 'z'; }
98#endif
99    inline bool isASCIILower(int c) { return c >= 'a' && c <= 'z'; }
100    inline bool isASCIILower(unsigned c) { return c >= 'a' && c <= 'z'; }
101
102    inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }
103    inline bool isASCIIUpper(unsigned short c) { return c >= 'A' && c <= 'Z'; }
104#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
105    inline bool isASCIIUpper(wchar_t c) { return c >= 'A' && c <= 'Z'; }
106#endif
107    inline bool isASCIIUpper(int c) { return c >= 'A' && c <= 'Z'; }
108    inline bool isASCIIUpper(unsigned c) { return c >= 'A' && c <= 'Z'; }
109
110    /*
111        Statistics from a run of Apple's page load test for callers of isASCIISpace:
112
113            character          count
114            ---------          -----
115            non-spaces         689383
116        20  space              294720
117        0A  \n                 89059
118        09  \t                 28320
119        0D  \r                 0
120        0C  \f                 0
121        0B  \v                 0
122    */
123    inline bool isASCIISpace(char c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
124    inline bool isASCIISpace(unsigned short c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
125#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
126    inline bool isASCIISpace(wchar_t c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
127#endif
128    inline bool isASCIISpace(int c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
129    inline bool isASCIISpace(unsigned c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
130
131    inline char toASCIILower(char c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
132    inline unsigned short toASCIILower(unsigned short c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
133#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
134    inline wchar_t toASCIILower(wchar_t c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
135#endif
136    inline int toASCIILower(int c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
137    inline unsigned toASCIILower(unsigned c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
138
139    // FIXME: Why do these need static_cast?
140    inline char toASCIIUpper(char c) { return static_cast<char>(c & ~((c >= 'a' && c <= 'z') << 5)); }
141    inline unsigned short toASCIIUpper(unsigned short c) { return static_cast<unsigned short>(c & ~((c >= 'a' && c <= 'z') << 5)); }
142#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
143    inline wchar_t toASCIIUpper(wchar_t c) { return static_cast<wchar_t>(c & ~((c >= 'a' && c <= 'z') << 5)); }
144#endif
145    inline int toASCIIUpper(int c) { return static_cast<int>(c & ~((c >= 'a' && c <= 'z') << 5)); }
146    inline unsigned toASCIIUpper(unsigned c) { return static_cast<unsigned>(c & ~((c >= 'a' && c <= 'z') << 5)); }
147
148    inline int toASCIIHexValue(char c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
149    inline int toASCIIHexValue(unsigned short c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
150#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
151    inline int toASCIIHexValue(wchar_t c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
152#endif
153    inline int toASCIIHexValue(int c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
154    inline int toASCIIHexValue(unsigned c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
155
156    inline bool isASCIIPrintable(char c) { return c >= ' ' && c <= '~'; }
157    inline bool isASCIIPrintable(unsigned short c) { return c >= ' ' && c <= '~'; }
158#if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
159    inline bool isASCIIPrintable(wchar_t c) { return c >= ' ' && c <= '~'; }
160#endif
161    inline bool isASCIIPrintable(int c) { return c >= ' ' && c <= '~'; }
162    inline bool isASCIIPrintable(unsigned c) { return c >= ' ' && c <= '~'; }
163}
164
165using WTF::isASCII;
166using WTF::isASCIIAlpha;
167using WTF::isASCIIAlphanumeric;
168using WTF::isASCIIDigit;
169using WTF::isASCIIHexDigit;
170using WTF::isASCIILower;
171using WTF::isASCIIOctalDigit;
172using WTF::isASCIIPrintable;
173using WTF::isASCIISpace;
174using WTF::isASCIIUpper;
175using WTF::toASCIIHexValue;
176using WTF::toASCIILower;
177using WTF::toASCIIUpper;
178
179#endif
180