1/*
2 *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3 *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 *
6 *  This library is free software; you can redistribute it and/or
7 *  modify it under the terms of the GNU Library General Public
8 *  License as published by the Free Software Foundation; either
9 *  version 2 of the License, or (at your option) any later version.
10 *
11 *  This library is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 *  Library General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Library General Public License
17 *  along with this library; see the file COPYING.LIB.  If not, write to
18 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 *  Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef WTF_UNICODE_ICU_H
24#define WTF_UNICODE_ICU_H
25
26#include <unicode/uchar.h>
27#include <unicode/ustring.h>
28
29namespace WTF {
30
31namespace Unicode {
32
33enum Direction {
34    LeftToRight = U_LEFT_TO_RIGHT,
35    RightToLeft = U_RIGHT_TO_LEFT,
36    EuropeanNumber = U_EUROPEAN_NUMBER,
37    EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
38    EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
39    ArabicNumber = U_ARABIC_NUMBER,
40    CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
41    BlockSeparator = U_BLOCK_SEPARATOR,
42    SegmentSeparator = U_SEGMENT_SEPARATOR,
43    WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
44    OtherNeutral = U_OTHER_NEUTRAL,
45    LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
46    LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
47    RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
48    RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
49    RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
50    PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
51    NonSpacingMark = U_DIR_NON_SPACING_MARK,
52    BoundaryNeutral = U_BOUNDARY_NEUTRAL
53};
54
55enum DecompositionType {
56    DecompositionNone = U_DT_NONE,
57    DecompositionCanonical = U_DT_CANONICAL,
58    DecompositionCompat = U_DT_COMPAT,
59    DecompositionCircle = U_DT_CIRCLE,
60    DecompositionFinal = U_DT_FINAL,
61    DecompositionFont = U_DT_FONT,
62    DecompositionFraction = U_DT_FRACTION,
63    DecompositionInitial = U_DT_INITIAL,
64    DecompositionIsolated = U_DT_ISOLATED,
65    DecompositionMedial = U_DT_MEDIAL,
66    DecompositionNarrow = U_DT_NARROW,
67    DecompositionNoBreak = U_DT_NOBREAK,
68    DecompositionSmall = U_DT_SMALL,
69    DecompositionSquare = U_DT_SQUARE,
70    DecompositionSub = U_DT_SUB,
71    DecompositionSuper = U_DT_SUPER,
72    DecompositionVertical = U_DT_VERTICAL,
73    DecompositionWide = U_DT_WIDE,
74};
75
76enum CharCategory {
77    NoCategory =  0,
78    Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
79    Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
80    Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
81    Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
82    Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
83    Letter_Other = U_MASK(U_OTHER_LETTER),
84
85    Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
86    Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
87    Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
88
89    Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
90    Number_Letter = U_MASK(U_LETTER_NUMBER),
91    Number_Other = U_MASK(U_OTHER_NUMBER),
92
93    Separator_Space = U_MASK(U_SPACE_SEPARATOR),
94    Separator_Line = U_MASK(U_LINE_SEPARATOR),
95    Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
96
97    Other_Control = U_MASK(U_CONTROL_CHAR),
98    Other_Format = U_MASK(U_FORMAT_CHAR),
99    Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
100    Other_Surrogate = U_MASK(U_SURROGATE),
101
102    Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
103    Punctuation_Open = U_MASK(U_START_PUNCTUATION),
104    Punctuation_Close = U_MASK(U_END_PUNCTUATION),
105    Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
106    Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
107
108    Symbol_Math = U_MASK(U_MATH_SYMBOL),
109    Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
110    Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
111    Symbol_Other = U_MASK(U_OTHER_SYMBOL),
112
113    Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
114    Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
115};
116
117inline UChar32 foldCase(UChar32 c)
118{
119    return u_foldCase(c, U_FOLD_CASE_DEFAULT);
120}
121
122inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
123{
124    UErrorCode status = U_ZERO_ERROR;
125    int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
126    *error = !U_SUCCESS(status);
127    return realLength;
128}
129
130inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
131{
132    UErrorCode status = U_ZERO_ERROR;
133    int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
134    *error = !!U_FAILURE(status);
135    return realLength;
136}
137
138inline UChar32 toLower(UChar32 c)
139{
140    return u_tolower(c);
141}
142
143inline UChar32 toUpper(UChar32 c)
144{
145    return u_toupper(c);
146}
147
148inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
149{
150    UErrorCode status = U_ZERO_ERROR;
151    int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
152    *error = !!U_FAILURE(status);
153    return realLength;
154}
155
156inline UChar32 toTitleCase(UChar32 c)
157{
158    return u_totitle(c);
159}
160
161inline bool isArabicChar(UChar32 c)
162{
163      return ublock_getCode(c) == UBLOCK_ARABIC;
164}
165
166inline bool isAlphanumeric(UChar32 c)
167{
168    return u_isalnum(c);
169}
170
171inline bool isSeparatorSpace(UChar32 c)
172{
173    return u_charType(c) == U_SPACE_SEPARATOR;
174}
175
176inline bool isPrintableChar(UChar32 c)
177{
178    return !!u_isprint(c);
179}
180
181inline bool isPunct(UChar32 c)
182{
183    return !!u_ispunct(c);
184}
185
186inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
187{
188    return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
189}
190
191inline UChar32 mirroredChar(UChar32 c)
192{
193    return u_charMirror(c);
194}
195
196inline CharCategory category(UChar32 c)
197{
198    return static_cast<CharCategory>(U_GET_GC_MASK(c));
199}
200
201inline Direction direction(UChar32 c)
202{
203    return static_cast<Direction>(u_charDirection(c));
204}
205
206inline bool isLower(UChar32 c)
207{
208    return !!u_islower(c);
209}
210
211inline uint8_t combiningClass(UChar32 c)
212{
213    return u_getCombiningClass(c);
214}
215
216inline DecompositionType decompositionType(UChar32 c)
217{
218    return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
219}
220
221inline int umemcasecmp(const UChar* a, const UChar* b, int len)
222{
223    return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
224}
225
226} // namespace Unicode
227
228} // namespace WTF
229
230#endif // WTF_UNICODE_ICU_H
231