UnicodeGLib.h revision cad810f21b803229eb11403f9209855525a25d57
1/*
2 *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3 *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 *  Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
5 *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
6 *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
7 *
8 *  This library is free software; you can redistribute it and/or
9 *  modify it under the terms of the GNU Library General Public
10 *  License as published by the Free Software Foundation; either
11 *  version 2 of the License, or (at your option) any later version.
12 *
13 *  This library is distributed in the hope that it will be useful,
14 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 *  Library General Public License for more details.
17 *
18 *  You should have received a copy of the GNU Library General Public License
19 *  along with this library; see the file COPYING.LIB.  If not, write to
20 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 *  Boston, MA 02110-1301, USA.
22 *
23 */
24
25#ifndef UnicodeGLib_h
26#define UnicodeGLib_h
27
28#include "UnicodeMacrosFromICU.h"
29#include "GOwnPtr.h"
30
31#include <glib.h>
32#include <pango/pango.h>
33#include <stdint.h>
34#include <stdlib.h>
35#include <string.h>
36
37typedef uint16_t UChar;
38typedef int32_t UChar32;
39
40namespace WTF {
41namespace Unicode {
42
43enum Direction {
44    LeftToRight,
45    RightToLeft,
46    EuropeanNumber,
47    EuropeanNumberSeparator,
48    EuropeanNumberTerminator,
49    ArabicNumber,
50    CommonNumberSeparator,
51    BlockSeparator,
52    SegmentSeparator,
53    WhiteSpaceNeutral,
54    OtherNeutral,
55    LeftToRightEmbedding,
56    LeftToRightOverride,
57    RightToLeftArabic,
58    RightToLeftEmbedding,
59    RightToLeftOverride,
60    PopDirectionalFormat,
61    NonSpacingMark,
62    BoundaryNeutral
63};
64
65enum DecompositionType {
66    DecompositionNone,
67    DecompositionCanonical,
68    DecompositionCompat,
69    DecompositionCircle,
70    DecompositionFinal,
71    DecompositionFont,
72    DecompositionFraction,
73    DecompositionInitial,
74    DecompositionIsolated,
75    DecompositionMedial,
76    DecompositionNarrow,
77    DecompositionNoBreak,
78    DecompositionSmall,
79    DecompositionSquare,
80    DecompositionSub,
81    DecompositionSuper,
82    DecompositionVertical,
83    DecompositionWide,
84};
85
86enum CharCategory {
87    NoCategory =  0,
88    Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED),
89    Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER),
90    Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER),
91    Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER),
92    Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER),
93    Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER),
94
95    Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK),
96    Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK),
97    Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK),
98
99    Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER),
100    Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER),
101    Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER),
102
103    Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR),
104    Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR),
105    Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR),
106
107    Other_Control = U_MASK(G_UNICODE_CONTROL),
108    Other_Format = U_MASK(G_UNICODE_FORMAT),
109    Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE),
110    Other_Surrogate = U_MASK(G_UNICODE_SURROGATE),
111
112    Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION),
113    Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION),
114    Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION),
115    Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION),
116    Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION),
117
118    Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL),
119    Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL),
120    Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL),
121    Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL),
122
123    Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION),
124    Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION)
125};
126
127UChar32 foldCase(UChar32);
128
129int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
130
131int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
132
133inline UChar32 toLower(UChar32 c)
134{
135    return g_unichar_tolower(c);
136}
137
138inline UChar32 toUpper(UChar32 c)
139{
140    return g_unichar_toupper(c);
141}
142
143int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
144
145inline UChar32 toTitleCase(UChar32 c)
146{
147    return g_unichar_totitle(c);
148}
149
150inline bool isArabicChar(UChar32 c)
151{
152    return c >= 0x0600 && c <= 0x06FF;
153}
154
155inline bool isAlphanumeric(UChar32 c)
156{
157    return g_unichar_isalnum(c);
158}
159
160inline bool isFormatChar(UChar32 c)
161{
162    return g_unichar_type(c) == G_UNICODE_FORMAT;
163}
164
165inline bool isSeparatorSpace(UChar32 c)
166{
167    return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR;
168}
169
170inline bool isPrintableChar(UChar32 c)
171{
172    return g_unichar_isprint(c);
173}
174
175inline bool isDigit(UChar32 c)
176{
177    return g_unichar_isdigit(c);
178}
179
180inline bool isPunct(UChar32 c)
181{
182    return g_unichar_ispunct(c);
183}
184
185inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
186{
187    // FIXME
188    return false;
189}
190
191inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
192{
193    // FIXME
194    return false;
195}
196
197inline UChar32 mirroredChar(UChar32 c)
198{
199    gunichar mirror = 0;
200    g_unichar_get_mirror_char(c, &mirror);
201    return mirror;
202}
203
204inline CharCategory category(UChar32 c)
205{
206    if (c > 0xffff)
207        return NoCategory;
208
209    return (CharCategory) U_MASK(g_unichar_type(c));
210}
211
212Direction direction(UChar32);
213
214inline bool isLower(UChar32 c)
215{
216    return g_unichar_islower(c);
217}
218
219inline int digitValue(UChar32 c)
220{
221    return g_unichar_digit_value(c);
222}
223
224inline uint8_t combiningClass(UChar32 c)
225{
226    // FIXME
227    // return g_unichar_combining_class(c);
228    return 0;
229}
230
231inline DecompositionType decompositionType(UChar32 c)
232{
233    // FIXME
234    return DecompositionNone;
235}
236
237int umemcasecmp(const UChar*, const UChar*, int len);
238
239}
240}
241
242#endif
243
244