1/*
2 *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3 *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 *
6 *  This library is free software; you can redistribute it and/or
7 *  modify it under the terms of the GNU Library General Public
8 *  License as published by the Free Software Foundation; either
9 *  version 2 of the License, or (at your option) any later version.
10 *
11 *  This library is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 *  Library General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Library General Public License
17 *  along with this library; see the file COPYING.LIB.  If not, write to
18 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 *  Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef WTF_UNICODE_ICU_H
24#define WTF_UNICODE_ICU_H
25
26#include <unicode/brkiter.h>
27#include <unicode/rbbi.h>
28#include <unicode/uchar.h>
29#include <unicode/uscript.h>
30#include <unicode/ustring.h>
31#include <unicode/utf16.h>
32
33namespace WTF {
34
35namespace Unicode {
36
37enum Direction {
38    LeftToRight = U_LEFT_TO_RIGHT,
39    RightToLeft = U_RIGHT_TO_LEFT,
40    EuropeanNumber = U_EUROPEAN_NUMBER,
41    EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
42    EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
43    ArabicNumber = U_ARABIC_NUMBER,
44    CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
45    BlockSeparator = U_BLOCK_SEPARATOR,
46    SegmentSeparator = U_SEGMENT_SEPARATOR,
47    WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
48    OtherNeutral = U_OTHER_NEUTRAL,
49    LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
50    LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
51    RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
52    RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
53    RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
54    PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
55    NonSpacingMark = U_DIR_NON_SPACING_MARK,
56    BoundaryNeutral = U_BOUNDARY_NEUTRAL
57};
58
59enum DecompositionType {
60    DecompositionNone = U_DT_NONE,
61    DecompositionCanonical = U_DT_CANONICAL,
62    DecompositionCompat = U_DT_COMPAT,
63    DecompositionCircle = U_DT_CIRCLE,
64    DecompositionFinal = U_DT_FINAL,
65    DecompositionFont = U_DT_FONT,
66    DecompositionFraction = U_DT_FRACTION,
67    DecompositionInitial = U_DT_INITIAL,
68    DecompositionIsolated = U_DT_ISOLATED,
69    DecompositionMedial = U_DT_MEDIAL,
70    DecompositionNarrow = U_DT_NARROW,
71    DecompositionNoBreak = U_DT_NOBREAK,
72    DecompositionSmall = U_DT_SMALL,
73    DecompositionSquare = U_DT_SQUARE,
74    DecompositionSub = U_DT_SUB,
75    DecompositionSuper = U_DT_SUPER,
76    DecompositionVertical = U_DT_VERTICAL,
77    DecompositionWide = U_DT_WIDE,
78};
79
80enum CharCategory {
81    NoCategory =  0,
82    Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
83    Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
84    Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
85    Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
86    Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
87    Letter_Other = U_MASK(U_OTHER_LETTER),
88
89    Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
90    Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
91    Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
92
93    Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
94    Number_Letter = U_MASK(U_LETTER_NUMBER),
95    Number_Other = U_MASK(U_OTHER_NUMBER),
96
97    Separator_Space = U_MASK(U_SPACE_SEPARATOR),
98    Separator_Line = U_MASK(U_LINE_SEPARATOR),
99    Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
100
101    Other_Control = U_MASK(U_CONTROL_CHAR),
102    Other_Format = U_MASK(U_FORMAT_CHAR),
103    Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
104    Other_Surrogate = U_MASK(U_SURROGATE),
105
106    Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
107    Punctuation_Open = U_MASK(U_START_PUNCTUATION),
108    Punctuation_Close = U_MASK(U_END_PUNCTUATION),
109    Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
110    Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
111
112    Symbol_Math = U_MASK(U_MATH_SYMBOL),
113    Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
114    Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
115    Symbol_Other = U_MASK(U_OTHER_SYMBOL),
116
117    Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
118    Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
119};
120
121inline UChar32 foldCase(UChar32 c)
122{
123    return u_foldCase(c, U_FOLD_CASE_DEFAULT);
124}
125
126inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
127{
128    UErrorCode status = U_ZERO_ERROR;
129    int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
130    *error = !U_SUCCESS(status);
131    return realLength;
132}
133
134inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
135{
136    UErrorCode status = U_ZERO_ERROR;
137    int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
138    *error = !!U_FAILURE(status);
139    return realLength;
140}
141
142inline UChar32 toLower(UChar32 c)
143{
144    return u_tolower(c);
145}
146
147inline UChar32 toUpper(UChar32 c)
148{
149    return u_toupper(c);
150}
151
152inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
153{
154    UErrorCode status = U_ZERO_ERROR;
155    int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
156    *error = !!U_FAILURE(status);
157    return realLength;
158}
159
160inline UChar32 toTitleCase(UChar32 c)
161{
162    return u_totitle(c);
163}
164
165inline bool isArabicChar(UChar32 c)
166{
167      return ublock_getCode(c) == UBLOCK_ARABIC;
168}
169
170inline bool isAlphanumeric(UChar32 c)
171{
172    return u_isalnum(c);
173}
174
175inline bool isSeparatorSpace(UChar32 c)
176{
177    return u_charType(c) == U_SPACE_SEPARATOR;
178}
179
180inline bool isPrintableChar(UChar32 c)
181{
182    return !!u_isprint(c);
183}
184
185inline bool isPunct(UChar32 c)
186{
187    return !!u_ispunct(c);
188}
189
190inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
191{
192    return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
193}
194
195inline UChar32 mirroredChar(UChar32 c)
196{
197    return u_charMirror(c);
198}
199
200inline CharCategory category(UChar32 c)
201{
202    return static_cast<CharCategory>(U_GET_GC_MASK(c));
203}
204
205inline Direction direction(UChar32 c)
206{
207    return static_cast<Direction>(u_charDirection(c));
208}
209
210inline bool isLower(UChar32 c)
211{
212    return !!u_islower(c);
213}
214
215inline uint8_t combiningClass(UChar32 c)
216{
217    return u_getCombiningClass(c);
218}
219
220inline DecompositionType decompositionType(UChar32 c)
221{
222    return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
223}
224
225inline int umemcasecmp(const UChar* a, const UChar* b, int len)
226{
227    return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
228}
229
230} // namespace Unicode
231
232} // namespace WTF
233
234#endif // WTF_UNICODE_ICU_H
235