1/* 2 * Copyright (C) 2006 George Staikos <staikos@kde.org> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#ifndef WTF_UNICODE_ICU_H 24#define WTF_UNICODE_ICU_H 25 26#include <stdlib.h> 27#include <unicode/uchar.h> 28#include <unicode/ustring.h> 29#include <unicode/utf16.h> 30 31namespace WTF { 32namespace Unicode { 33 34enum Direction { 35 LeftToRight = U_LEFT_TO_RIGHT, 36 RightToLeft = U_RIGHT_TO_LEFT, 37 EuropeanNumber = U_EUROPEAN_NUMBER, 38 EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR, 39 EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR, 40 ArabicNumber = U_ARABIC_NUMBER, 41 CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR, 42 BlockSeparator = U_BLOCK_SEPARATOR, 43 SegmentSeparator = U_SEGMENT_SEPARATOR, 44 WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL, 45 OtherNeutral = U_OTHER_NEUTRAL, 46 LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING, 47 LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE, 48 RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC, 49 RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING, 50 RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE, 51 PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT, 52 NonSpacingMark = U_DIR_NON_SPACING_MARK, 53 BoundaryNeutral = U_BOUNDARY_NEUTRAL 54}; 55 56enum DecompositionType { 57 DecompositionNone = U_DT_NONE, 58 DecompositionCanonical = U_DT_CANONICAL, 59 DecompositionCompat = U_DT_COMPAT, 60 DecompositionCircle = U_DT_CIRCLE, 61 DecompositionFinal = U_DT_FINAL, 62 DecompositionFont = U_DT_FONT, 63 DecompositionFraction = U_DT_FRACTION, 64 DecompositionInitial = U_DT_INITIAL, 65 DecompositionIsolated = U_DT_ISOLATED, 66 DecompositionMedial = U_DT_MEDIAL, 67 DecompositionNarrow = U_DT_NARROW, 68 DecompositionNoBreak = U_DT_NOBREAK, 69 DecompositionSmall = U_DT_SMALL, 70 DecompositionSquare = U_DT_SQUARE, 71 DecompositionSub = U_DT_SUB, 72 DecompositionSuper = U_DT_SUPER, 73 DecompositionVertical = U_DT_VERTICAL, 74 DecompositionWide = U_DT_WIDE, 75}; 76 77enum CharCategory { 78 NoCategory = 0, 79 Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES), 80 Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER), 81 Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER), 82 Letter_Titlecase = U_MASK(U_TITLECASE_LETTER), 83 Letter_Modifier = U_MASK(U_MODIFIER_LETTER), 84 Letter_Other = U_MASK(U_OTHER_LETTER), 85 86 Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK), 87 Mark_Enclosing = U_MASK(U_ENCLOSING_MARK), 88 Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK), 89 90 Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER), 91 Number_Letter = U_MASK(U_LETTER_NUMBER), 92 Number_Other = U_MASK(U_OTHER_NUMBER), 93 94 Separator_Space = U_MASK(U_SPACE_SEPARATOR), 95 Separator_Line = U_MASK(U_LINE_SEPARATOR), 96 Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR), 97 98 Other_Control = U_MASK(U_CONTROL_CHAR), 99 Other_Format = U_MASK(U_FORMAT_CHAR), 100 Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR), 101 Other_Surrogate = U_MASK(U_SURROGATE), 102 103 Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION), 104 Punctuation_Open = U_MASK(U_START_PUNCTUATION), 105 Punctuation_Close = U_MASK(U_END_PUNCTUATION), 106 Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION), 107 Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION), 108 109 Symbol_Math = U_MASK(U_MATH_SYMBOL), 110 Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL), 111 Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL), 112 Symbol_Other = U_MASK(U_OTHER_SYMBOL), 113 114 Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION), 115 Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION) 116}; 117 118inline UChar32 foldCase(UChar32 c) 119{ 120 return u_foldCase(c, U_FOLD_CASE_DEFAULT); 121} 122 123inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 124{ 125 UErrorCode status = U_ZERO_ERROR; 126 int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status); 127 *error = !U_SUCCESS(status); 128 return realLength; 129} 130 131inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 132{ 133 UErrorCode status = U_ZERO_ERROR; 134 int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status); 135 *error = !!U_FAILURE(status); 136 return realLength; 137} 138 139inline UChar32 toLower(UChar32 c) 140{ 141 return u_tolower(c); 142} 143 144inline UChar32 toUpper(UChar32 c) 145{ 146 return u_toupper(c); 147} 148 149inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 150{ 151 UErrorCode status = U_ZERO_ERROR; 152 int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status); 153 *error = !!U_FAILURE(status); 154 return realLength; 155} 156 157inline UChar32 toTitleCase(UChar32 c) 158{ 159 return u_totitle(c); 160} 161 162inline bool isArabicChar(UChar32 c) 163{ 164 return ublock_getCode(c) == UBLOCK_ARABIC; 165} 166 167inline bool isAlphanumeric(UChar32 c) 168{ 169 return u_isalnum(c); 170} 171 172inline bool isSeparatorSpace(UChar32 c) 173{ 174 return u_charType(c) == U_SPACE_SEPARATOR; 175} 176 177inline bool isPrintableChar(UChar32 c) 178{ 179 return !!u_isprint(c); 180} 181 182inline bool isPunct(UChar32 c) 183{ 184 return !!u_ispunct(c); 185} 186 187inline bool hasLineBreakingPropertyComplexContext(UChar32 c) 188{ 189 return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT; 190} 191 192inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) 193{ 194 int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); 195 return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC; 196} 197 198inline UChar32 mirroredChar(UChar32 c) 199{ 200 return u_charMirror(c); 201} 202 203inline CharCategory category(UChar32 c) 204{ 205 return static_cast<CharCategory>(U_GET_GC_MASK(c)); 206} 207 208inline Direction direction(UChar32 c) 209{ 210 return static_cast<Direction>(u_charDirection(c)); 211} 212 213inline bool isLower(UChar32 c) 214{ 215 return !!u_islower(c); 216} 217 218inline uint8_t combiningClass(UChar32 c) 219{ 220 return u_getCombiningClass(c); 221} 222 223inline DecompositionType decompositionType(UChar32 c) 224{ 225 return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE)); 226} 227 228inline int umemcasecmp(const UChar* a, const UChar* b, int len) 229{ 230 return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT); 231} 232 233} } 234 235#endif // WTF_UNICODE_ICU_H 236