183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// 283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Copyright (C) 2012 International Business Machines Corporation 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// and others. All rights reserved. 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// 583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// file: regeximp.cpp 683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// 783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// ICU Regular Expressions, 883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// miscellaneous implementation functions. 983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// 1083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utypes.h" 1283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "regeximp.h" 1583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_BEGIN 1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : 2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { 2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fcsp = ucase_getSingleton(); 2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} 2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUChar32 CaseFoldingUTextIterator::next() { 2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 foldedC; 2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 originalC; 2983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldChars == NULL) { 3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // We are not in a string folding of an earlier character. 3183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Start handling the next char from the input UText. 3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius originalC = UTEXT_NEXT32(&fUText); 3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (originalC == U_SENTINEL) { 3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return originalC; 3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); 3783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { 3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // input code point folds to a single code point, possibly itself. 3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. 4083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldLength < 0) { 4183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldLength = ~fFoldLength; 4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldedC = (UChar32)fFoldLength; 4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldChars = NULL; 4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return foldedC; 4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // String foldings fall through here. 4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldIndex = 0; 4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); 5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldIndex >= fFoldLength) { 5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldChars = NULL; 5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return foldedC; 5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUBool CaseFoldingUTextIterator::inExpansion() { 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return fFoldChars != NULL; 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6583a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : 6683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { 6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fcsp = ucase_getSingleton(); 6883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7183a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUCharIterator::~CaseFoldingUCharIterator() {} 7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUChar32 CaseFoldingUCharIterator::next() { 7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 foldedC; 7683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 originalC; 7783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldChars == NULL) { 7883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // We are not in a string folding of an earlier character. 7983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Start handling the next char from the input UText. 8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fIndex >= fLimit) { 8183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return U_SENTINEL; 8283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 8383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_NEXT(fChars, fIndex, fLimit, originalC); 8483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 8583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); 8683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { 8783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // input code point folds to a single code point, possibly itself. 8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. 8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldLength < 0) { 9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldLength = ~fFoldLength; 9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 9283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldedC = (UChar32)fFoldLength; 9383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldChars = NULL; 9483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return foldedC; 9583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 9683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // String foldings fall through here. 9783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldIndex = 0; 9883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 9983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); 10183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (fFoldIndex >= fFoldLength) { 10283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFoldChars = NULL; 10383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return foldedC; 10583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10883a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUBool CaseFoldingUCharIterator::inExpansion() { 10983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return fFoldChars != NULL; 11083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusint64_t CaseFoldingUCharIterator::getIndex() { 11383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return fIndex; 11483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 11583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_END 11883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif 12083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 121