183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//
283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//   Copyright (C) 2012 International Business Machines Corporation
383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//   and others. All rights reserved.
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//
583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//   file:  regeximp.cpp
683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//
783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//           ICU Regular Expressions,
883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//             miscellaneous implementation functions.
983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//
1083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utypes.h"
1283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#if !UCONFIG_NO_REGULAR_EXPRESSIONS
1483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "regeximp.h"
1583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_BEGIN
1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) :
2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius   fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) {
2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius   fcsp = ucase_getSingleton();
2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUTextIterator::~CaseFoldingUTextIterator() {}
2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUChar32 CaseFoldingUTextIterator::next() {
2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32  foldedC;
2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32  originalC;
2983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if (fFoldChars == NULL) {
3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // We are not in a string folding of an earlier character.
3183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // Start handling the next char from the input UText.
3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        originalC = UTEXT_NEXT32(&fUText);
3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        if (originalC == U_SENTINEL) {
3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            return originalC;
3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        }
3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
3783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            // input code point folds to a single code point, possibly itself.
3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
4083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            if (fFoldLength < 0) {
4183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                fFoldLength = ~fFoldLength;
4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            }
4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            foldedC = (UChar32)fFoldLength;
4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            fFoldChars = NULL;
4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            return foldedC;
4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        }
4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // String foldings fall through here.
4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldIndex = 0;
4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if (fFoldIndex >= fFoldLength) {
5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldChars = NULL;
5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return foldedC;
5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUBool CaseFoldingUTextIterator::inExpansion() {
6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return fFoldChars != NULL;
6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6583a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) :
6683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius   fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) {
6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius   fcsp = ucase_getSingleton();
6883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7183a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusCaseFoldingUCharIterator::~CaseFoldingUCharIterator() {}
7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUChar32 CaseFoldingUCharIterator::next() {
7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32  foldedC;
7683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32  originalC;
7783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if (fFoldChars == NULL) {
7883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // We are not in a string folding of an earlier character.
7983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // Start handling the next char from the input UText.
8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        if (fIndex >= fLimit) {
8183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            return U_SENTINEL;
8283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        }
8383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        U16_NEXT(fChars, fIndex, fLimit, originalC);
8483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
8583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
8683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
8783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            // input code point folds to a single code point, possibly itself.
8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            if (fFoldLength < 0) {
9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                fFoldLength = ~fFoldLength;
9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            }
9283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            foldedC = (UChar32)fFoldLength;
9383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            fFoldChars = NULL;
9483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            return foldedC;
9583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        }
9683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        // String foldings fall through here.
9783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldIndex = 0;
9883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
9983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
10183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if (fFoldIndex >= fFoldLength) {
10283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        fFoldChars = NULL;
10383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return foldedC;
10583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
10783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
10883a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUBool CaseFoldingUCharIterator::inExpansion() {
10983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return fFoldChars != NULL;
11083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusint64_t CaseFoldingUCharIterator::getIndex() {
11383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return fIndex;
11483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
11583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_END
11883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif
12083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
121