16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Copyright (C) 2012 International Business Machines Corporation 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// and others. All rights reserved. 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// file: regeximp.cpp 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ICU Regular Expressions, 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// miscellaneous implementation functions. 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regeximp.h" 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fcsp = ucase_getSingleton(); 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 CaseFoldingUTextIterator::next() { 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 foldedC; 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 originalC; 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldChars == NULL) { 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are not in a string folding of an earlier character. 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start handling the next char from the input UText. 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org originalC = UTEXT_NEXT32(&fUText); 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (originalC == U_SENTINEL) { 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return originalC; 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // input code point folds to a single code point, possibly itself. 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldLength < 0) { 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldLength = ~fFoldLength; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldedC = (UChar32)fFoldLength; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldChars = NULL; 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return foldedC; 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // String foldings fall through here. 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldIndex = 0; 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldIndex >= fFoldLength) { 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldChars = NULL; 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return foldedC; 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool CaseFoldingUTextIterator::inExpansion() { 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fFoldChars != NULL; 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fcsp = ucase_getSingleton(); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCaseFoldingUCharIterator::~CaseFoldingUCharIterator() {} 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 CaseFoldingUCharIterator::next() { 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 foldedC; 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 originalC; 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldChars == NULL) { 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are not in a string folding of an earlier character. 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start handling the next char from the input UText. 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fIndex >= fLimit) { 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(fChars, fIndex, fLimit, originalC); 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // input code point folds to a single code point, possibly itself. 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldLength < 0) { 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldLength = ~fFoldLength; 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldedC = (UChar32)fFoldLength; 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldChars = NULL; 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return foldedC; 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // String foldings fall through here. 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldIndex = 0; 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFoldIndex >= fFoldLength) { 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFoldChars = NULL; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return foldedC; 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool CaseFoldingUCharIterator::inExpansion() { 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fFoldChars != NULL; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t CaseFoldingUCharIterator::getIndex() { 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fIndex; 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 121