1/*
2 *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3 *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 *
6 *  This library is free software; you can redistribute it and/or
7 *  modify it under the terms of the GNU Library General Public
8 *  License as published by the Free Software Foundation; either
9 *  version 2 of the License, or (at your option) any later version.
10 *
11 *  This library is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 *  Library General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Library General Public License
17 *  along with this library; see the file COPYING.LIB.  If not, write to
18 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 *  Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef WTF_UNICODE_ICU_H
24#define WTF_UNICODE_ICU_H
25
26#include <stdlib.h>
27#include <unicode/uchar.h>
28#include <unicode/ustring.h>
29#include <unicode/utf16.h>
30
31namespace WTF {
32namespace Unicode {
33
34enum Direction {
35    LeftToRight = U_LEFT_TO_RIGHT,
36    RightToLeft = U_RIGHT_TO_LEFT,
37    EuropeanNumber = U_EUROPEAN_NUMBER,
38    EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
39    EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
40    ArabicNumber = U_ARABIC_NUMBER,
41    CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
42    BlockSeparator = U_BLOCK_SEPARATOR,
43    SegmentSeparator = U_SEGMENT_SEPARATOR,
44    WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
45    OtherNeutral = U_OTHER_NEUTRAL,
46    LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
47    LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
48    RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
49    RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
50    RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
51    PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
52    NonSpacingMark = U_DIR_NON_SPACING_MARK,
53    BoundaryNeutral = U_BOUNDARY_NEUTRAL
54};
55
56enum DecompositionType {
57    DecompositionNone = U_DT_NONE,
58    DecompositionCanonical = U_DT_CANONICAL,
59    DecompositionCompat = U_DT_COMPAT,
60    DecompositionCircle = U_DT_CIRCLE,
61    DecompositionFinal = U_DT_FINAL,
62    DecompositionFont = U_DT_FONT,
63    DecompositionFraction = U_DT_FRACTION,
64    DecompositionInitial = U_DT_INITIAL,
65    DecompositionIsolated = U_DT_ISOLATED,
66    DecompositionMedial = U_DT_MEDIAL,
67    DecompositionNarrow = U_DT_NARROW,
68    DecompositionNoBreak = U_DT_NOBREAK,
69    DecompositionSmall = U_DT_SMALL,
70    DecompositionSquare = U_DT_SQUARE,
71    DecompositionSub = U_DT_SUB,
72    DecompositionSuper = U_DT_SUPER,
73    DecompositionVertical = U_DT_VERTICAL,
74    DecompositionWide = U_DT_WIDE,
75};
76
77enum CharCategory {
78    NoCategory =  0,
79    Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
80    Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
81    Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
82    Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
83    Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
84    Letter_Other = U_MASK(U_OTHER_LETTER),
85
86    Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
87    Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
88    Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
89
90    Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
91    Number_Letter = U_MASK(U_LETTER_NUMBER),
92    Number_Other = U_MASK(U_OTHER_NUMBER),
93
94    Separator_Space = U_MASK(U_SPACE_SEPARATOR),
95    Separator_Line = U_MASK(U_LINE_SEPARATOR),
96    Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
97
98    Other_Control = U_MASK(U_CONTROL_CHAR),
99    Other_Format = U_MASK(U_FORMAT_CHAR),
100    Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
101    Other_Surrogate = U_MASK(U_SURROGATE),
102
103    Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
104    Punctuation_Open = U_MASK(U_START_PUNCTUATION),
105    Punctuation_Close = U_MASK(U_END_PUNCTUATION),
106    Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
107    Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
108
109    Symbol_Math = U_MASK(U_MATH_SYMBOL),
110    Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
111    Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
112    Symbol_Other = U_MASK(U_OTHER_SYMBOL),
113
114    Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
115    Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
116};
117
118inline UChar32 foldCase(UChar32 c)
119{
120    return u_foldCase(c, U_FOLD_CASE_DEFAULT);
121}
122
123inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
124{
125    UErrorCode status = U_ZERO_ERROR;
126    int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
127    *error = !U_SUCCESS(status);
128    return realLength;
129}
130
131inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
132{
133    UErrorCode status = U_ZERO_ERROR;
134    int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
135    *error = !!U_FAILURE(status);
136    return realLength;
137}
138
139inline UChar32 toLower(UChar32 c)
140{
141    return u_tolower(c);
142}
143
144inline UChar32 toUpper(UChar32 c)
145{
146    return u_toupper(c);
147}
148
149inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
150{
151    UErrorCode status = U_ZERO_ERROR;
152    int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
153    *error = !!U_FAILURE(status);
154    return realLength;
155}
156
157inline UChar32 toTitleCase(UChar32 c)
158{
159    return u_totitle(c);
160}
161
162inline bool isArabicChar(UChar32 c)
163{
164      return ublock_getCode(c) == UBLOCK_ARABIC;
165}
166
167inline bool isAlphanumeric(UChar32 c)
168{
169    return u_isalnum(c);
170}
171
172inline bool isSeparatorSpace(UChar32 c)
173{
174    return u_charType(c) == U_SPACE_SEPARATOR;
175}
176
177inline bool isPrintableChar(UChar32 c)
178{
179    return !!u_isprint(c);
180}
181
182inline bool isPunct(UChar32 c)
183{
184    return !!u_ispunct(c);
185}
186
187inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
188{
189    return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
190}
191
192inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
193{
194    int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
195    return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC;
196}
197
198inline UChar32 mirroredChar(UChar32 c)
199{
200    return u_charMirror(c);
201}
202
203inline CharCategory category(UChar32 c)
204{
205    return static_cast<CharCategory>(U_GET_GC_MASK(c));
206}
207
208inline Direction direction(UChar32 c)
209{
210    return static_cast<Direction>(u_charDirection(c));
211}
212
213inline bool isLower(UChar32 c)
214{
215    return !!u_islower(c);
216}
217
218inline uint8_t combiningClass(UChar32 c)
219{
220    return u_getCombiningClass(c);
221}
222
223inline DecompositionType decompositionType(UChar32 c)
224{
225    return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
226}
227
228inline int umemcasecmp(const UChar* a, const UChar* b, int len)
229{
230    return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
231}
232
233} }
234
235#endif // WTF_UNICODE_ICU_H
236