1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 359d709d503bab6e2b61931737e662dd293b40578ccornelius* Copyright (C) 2004-2013, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 659d709d503bab6e2b61931737e662dd293b40578ccornelius* file name: uregex.cpp 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uregex.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uobject.h" 19103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "regextxt.h" 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h> 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct RegularExpression: public UMemory { 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression(); 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ~RegularExpression(); 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fMagic; 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *fPat; 3859d709d503bab6e2b61931737e662dd293b40578ccornelius u_atomic_int32_t *fPatRefCount; 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *fPatString; 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fPatStringLen; 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *fMatcher; 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *fText; // Text from setText() 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fTextLength; // Length provided by user with setText(), which 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // may be -1. 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool fOwnsText; 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegularExpression::RegularExpression() { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMagic = REXP_MAGIC; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPat = NULL; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPatRefCount = NULL; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPatString = NULL; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPatStringLen = 0; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatcher = NULL; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = NULL; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fTextLength = 0; 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fOwnsText = FALSE; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegularExpression::~RegularExpression() { 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fMatcher; 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatcher = NULL; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) { 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPat; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(fPatString); 6859d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_free((void *)fPatRefCount); 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fOwnsText && fText!=NULL) { 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free((void *)fText); 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMagic = 0; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// validateRE Do boilerplate style checks on API function parameters. 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Return TRUE if they look OK. 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) { 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (re == NULL || re->fMagic != REXP_MAGIC) { 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (requiresText && re->fText == NULL && !re->fOwnsText) { 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_REGEX_INVALID_STATE; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_open 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI URegularExpression * U_EXPORT2 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuregex_open( const UChar *pattern, 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t patternLength, 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError *pe, 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pattern == NULL || patternLength < -1 || patternLength == 0) { 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualPatLen = patternLength; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualPatLen == -1) { 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualPatLen = u_strlen(pattern); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12559d709d503bab6e2b61931737e662dd293b40578ccornelius RegularExpression *re = new RegularExpression; 12659d709d503bab6e2b61931737e662dd293b40578ccornelius u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1)); 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (re == NULL || refC == NULL || patBuf == NULL) { 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete re; 13159d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_free((void *)refC); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(patBuf); 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru re->fPatRefCount = refC; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *re->fPatRefCount = 1; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make a copy of the pattern string, so we can return it later if asked. 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For compiling the pattern, we will use a UText wrapper around 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this local copy, to avoid making even more copies. 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru re->fPatString = patBuf; 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru re->fPatStringLen = patternLength; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(patBuf, pattern, actualPatLen); 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patBuf[actualPatLen] = 0; 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patText = UTEXT_INITIALIZER; 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&patText, patBuf, patternLength, status); 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compile the pattern 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pe != NULL) { 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPat = RegexPattern::compile(&patText, flags, *pe, *status); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPat = RegexPattern::compile(&patText, flags, *status); 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patText); 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto ErrorExit; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create the matcher object 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru re->fMatcher = re->fPat->matcher(*status); 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(*status)) { 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (URegularExpression*)re; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruErrorExit: 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete re; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_openUText 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------------------- 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI URegularExpression * U_EXPORT2 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_openUText(UText *pattern, 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError *pe, 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(*status)) { 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pattern == NULL) { 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t patternNativeLength = utext_nativeLength(pattern); 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (patternNativeLength == 0) { 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *re = new RegularExpression; 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus); 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21059d709d503bab6e2b61931737e662dd293b40578ccornelius u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1)); 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (re == NULL || refC == NULL || patBuf == NULL) { 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_MEMORY_ALLOCATION_ERROR; 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete re; 21559d709d503bab6e2b61931737e662dd293b40578ccornelius uprv_free((void *)refC); 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(patBuf); 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPatRefCount = refC; 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *re->fPatRefCount = 1; 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Make a copy of the pattern string, so we can return it later if asked. 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For compiling the pattern, we will use a read-only UText wrapper 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // around this local copy, to avoid making even more copies. 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPatString = patBuf; 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPatStringLen = pattern16Length; 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status); 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText patText = UTEXT_INITIALIZER; 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&patText, patBuf, pattern16Length, status); 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compile the pattern 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pe != NULL) { 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPat = RegexPattern::compile(&patText, flags, *pe, *status); 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fPat = RegexPattern::compile(&patText, flags, *status); 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&patText); 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(*status)) { 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto ErrorExit; 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Create the matcher object 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho re->fMatcher = re->fPat->matcher(*status); 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(*status)) { 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (URegularExpression*)re; 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoErrorExit: 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete re; 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------------------------- 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_close 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_close(URegularExpression *re2) { 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *re = (RegularExpression*)re2; 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(re, FALSE, &status) == FALSE) { 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete re; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_clone 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI URegularExpression * U_EXPORT2 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_clone(const URegularExpression *source2, UErrorCode *status) { 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *source = (RegularExpression*)source2; 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(source, FALSE, status) == FALSE) { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *clone = new RegularExpression; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (clone == NULL) { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone->fMatcher = source->fPat->matcher(*status); 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete clone; 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone->fPat = source->fPat; 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone->fPatRefCount = source->fPatRefCount; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone->fPatString = source->fPatString; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone->fPatStringLen = source->fPatStringLen; 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_atomic_inc(source->fPatRefCount); 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: fText is not cloned. 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (URegularExpression*)clone; 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_pattern 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI const UChar * U_EXPORT2 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_pattern(const URegularExpression *regexp2, 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *patLength, 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (patLength != NULL) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *patLength = regexp->fPatStringLen; 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return regexp->fPatString; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_patternUText 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UText * U_EXPORT2 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_patternUText(const URegularExpression *regexp2, 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 34527f654740f2a26ad62a5c155af9199af9e69b889claireho return regexp->fPat->patternText(*status); 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_flags 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 35550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_flags(const URegularExpression *regexp2, UErrorCode *status) { 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t flags = regexp->fPat->flags(); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return flags; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_setText 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setText(URegularExpression *regexp2, 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *text, 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t textLength, 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (text == NULL || textLength < -1) { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fOwnsText && regexp->fText != NULL) { 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free((void *)regexp->fText); 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru regexp->fText = text; 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru regexp->fTextLength = textLength; 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fOwnsText = FALSE; 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText input = UTEXT_INITIALIZER; 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&input, text, textLength, status); 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fMatcher->reset(&input); 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&input); // reset() made a shallow clone, so we don't need this copy 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_setUText 40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setUText(URegularExpression *regexp2, 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *text, 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (text == NULL) { 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fOwnsText && regexp->fText != NULL) { 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free((void *)regexp->fText); 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fText = NULL; // only fill it in on request 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fTextLength = -1; 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fOwnsText = TRUE; 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fMatcher->reset(text); 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_getText 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI const UChar * U_EXPORT2 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_getText(URegularExpression *regexp2, 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *textLength, 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fText == NULL) { 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // need to fill in the text 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *inputText = regexp->fMatcher->inputText(); 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t inputNativeLength = utext_nativeLength(inputText); 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) { 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fText = inputText->chunkContents; 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fTextLength = (int32_t)inputNativeLength; 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fOwnsText = FALSE; // because the UText owns it 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1)); 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status); 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fText = inputChars; 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fOwnsText = TRUE; // should already be set but just in case 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (textLength != NULL) { 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *textLength = regexp->fTextLength; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return regexp->fText; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_getUText 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 47427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI UText * U_EXPORT2 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_getUText(URegularExpression *regexp2, 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest, 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48227f654740f2a26ad62a5c155af9199af9e69b889claireho return regexp->fMatcher->getInput(dest, *status); 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 4862e615e9896b12236afe0ff2695e8afc2ee73f961claireho//------------------------------------------------------------------------------ 4872e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 4882e615e9896b12236afe0ff2695e8afc2ee73f961claireho// uregex_refreshUText 4892e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 4902e615e9896b12236afe0ff2695e8afc2ee73f961claireho//------------------------------------------------------------------------------ 491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CAPI void U_EXPORT2 4922e615e9896b12236afe0ff2695e8afc2ee73f961clairehouregex_refreshUText(URegularExpression *regexp2, 4932e615e9896b12236afe0ff2695e8afc2ee73f961claireho UText *text, 4942e615e9896b12236afe0ff2695e8afc2ee73f961claireho UErrorCode *status) { 4952e615e9896b12236afe0ff2695e8afc2ee73f961claireho RegularExpression *regexp = (RegularExpression*)regexp2; 496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 4972e615e9896b12236afe0ff2695e8afc2ee73f961claireho return; 4982e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 4992e615e9896b12236afe0ff2695e8afc2ee73f961claireho regexp->fMatcher->refreshInputText(text, *status); 5002e615e9896b12236afe0ff2695e8afc2ee73f961claireho} 501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 5022e615e9896b12236afe0ff2695e8afc2ee73f961claireho 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_matches 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_matches(URegularExpression *regexp2, 51027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t startIndex, 51127f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 51227f654740f2a26ad62a5c155af9199af9e69b889claireho return uregex_matches64( regexp2, (int64_t)startIndex, status); 51327f654740f2a26ad62a5c155af9199af9e69b889claireho} 51427f654740f2a26ad62a5c155af9199af9e69b889claireho 51527f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI UBool U_EXPORT2 51627f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_matches64(URegularExpression *regexp2, 51727f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t startIndex, 51827f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool result = FALSE; 521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startIndex == -1) { 525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->matches(*status); 526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->matches(startIndex, *status); 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_lookingAt 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_lookingAt(URegularExpression *regexp2, 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t startIndex, 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 54227f654740f2a26ad62a5c155af9199af9e69b889claireho return uregex_lookingAt64( regexp2, (int64_t)startIndex, status); 54327f654740f2a26ad62a5c155af9199af9e69b889claireho} 54427f654740f2a26ad62a5c155af9199af9e69b889claireho 54527f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI UBool U_EXPORT2 54627f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_lookingAt64(URegularExpression *regexp2, 54727f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t startIndex, 54827f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool result = FALSE; 551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startIndex == -1) { 555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->lookingAt(*status); 556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->lookingAt(startIndex, *status); 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_find 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_find(URegularExpression *regexp2, 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t startIndex, 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 57327f654740f2a26ad62a5c155af9199af9e69b889claireho return uregex_find64( regexp2, (int64_t)startIndex, status); 57427f654740f2a26ad62a5c155af9199af9e69b889claireho} 57527f654740f2a26ad62a5c155af9199af9e69b889claireho 57627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI UBool U_EXPORT2 57727f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_find64(URegularExpression *regexp2, 57827f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t startIndex, 57927f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool result = FALSE; 582b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startIndex == -1) { 586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->resetPreserveRegion(); 587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->find(); 588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result = regexp->fMatcher->find(startIndex, *status); 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59427f654740f2a26ad62a5c155af9199af9e69b889claireho 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_findNext 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_findNext(URegularExpression *regexp2, 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 604b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool result = regexp->fMatcher->find(); 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_groupCount 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 61750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_groupCount(URegularExpression *regexp2, 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 620b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = regexp->fMatcher->groupCount(); 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_group 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 63450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_group(URegularExpression *regexp2, 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum, 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (destCapacity == 0 || regexp->fText != NULL) { 64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If preflighting or if we already have the text as UChars, 65027f654740f2a26ad62a5c155af9199af9e69b889claireho // this is a little cheaper than going through uregex_groupUTextDeep() 65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pick up the range of characters from the matcher 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t startIx = regexp->fMatcher->start(groupNum, *status); 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t endIx = regexp->fMatcher->end (groupNum, *status); 65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(*status)) { 65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Trim length based on buffer capacity 66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fullLength = endIx - startIx; 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t copyLength = fullLength; 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (copyLength < destCapacity) { 66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[copyLength] = 0; 66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (copyLength == destCapacity) { 66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_STRING_NOT_TERMINATED_WARNING; 67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho copyLength = destCapacity; 67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_BUFFER_OVERFLOW_ERROR; 67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy capture group to user's buffer 67750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 67850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (copyLength > 0) { 67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_memcpy(dest, ®exp->fText[startIx], copyLength); 68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fullLength; 68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t result = 0; 68427f654740f2a26ad62a5c155af9199af9e69b889claireho UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status); 685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_SUCCESS(*status)) { 686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status); 687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(groupText); 68950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 69150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 69450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_groupUText 69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 69950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UText * U_EXPORT2 70050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_groupUText(URegularExpression *regexp2, 70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum, 70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest, 70327f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t *groupLength, 70427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 70527f654740f2a26ad62a5c155af9199af9e69b889claireho RegularExpression *regexp = (RegularExpression*)regexp2; 706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 70727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode emptyTextStatus = U_ZERO_ERROR; 70827f654740f2a26ad62a5c155af9199af9e69b889claireho return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 70927f654740f2a26ad62a5c155af9199af9e69b889claireho } 71027f654740f2a26ad62a5c155af9199af9e69b889claireho 71127f654740f2a26ad62a5c155af9199af9e69b889claireho return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); 71227f654740f2a26ad62a5c155af9199af9e69b889claireho} 71327f654740f2a26ad62a5c155af9199af9e69b889claireho 71427f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 71527f654740f2a26ad62a5c155af9199af9e69b889claireho// 71627f654740f2a26ad62a5c155af9199af9e69b889claireho// uregex_groupUTextDeep 71727f654740f2a26ad62a5c155af9199af9e69b889claireho// 71827f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 71927f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI UText * U_EXPORT2 72027f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_groupUTextDeep(URegularExpression *regexp2, 72127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t groupNum, 72227f654740f2a26ad62a5c155af9199af9e69b889claireho UText *dest, 72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 72650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode emptyTextStatus = U_ZERO_ERROR; 72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fText != NULL) { 73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pick up the range of characters from the matcher 73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and use our already-extracted characters 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t startIx = regexp->fMatcher->start(groupNum, *status); 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t endIx = regexp->fMatcher->end (groupNum, *status); 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(*status)) { 73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode emptyTextStatus = U_ZERO_ERROR; 73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status); 74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText groupText = UTEXT_INITIALIZER; 74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startIx, status); 74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); 74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&groupText); 74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 75250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return regexp->fMatcher->group(groupNum, dest, *status); 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_start 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 76350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_start(URegularExpression *regexp2, 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum, 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 76627f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)uregex_start64( regexp2, groupNum, status); 76727f654740f2a26ad62a5c155af9199af9e69b889claireho} 76827f654740f2a26ad62a5c155af9199af9e69b889claireho 76927f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int64_t U_EXPORT2 77027f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_start64(URegularExpression *regexp2, 77127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t groupNum, 77227f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = regexp->fMatcher->start(groupNum, *status); 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_end 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 78750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_end(URegularExpression *regexp2, 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum, 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 79027f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)uregex_end64( regexp2, groupNum, status); 79127f654740f2a26ad62a5c155af9199af9e69b889claireho} 79227f654740f2a26ad62a5c155af9199af9e69b889claireho 79327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int64_t U_EXPORT2 79427f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_end64(URegularExpression *regexp2, 79527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t groupNum, 79627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = regexp->fMatcher->end(groupNum, *status); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_reset 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 81150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_reset(URegularExpression *regexp2, 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t index, 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 81427f654740f2a26ad62a5c155af9199af9e69b889claireho uregex_reset64( regexp2, (int64_t)index, status); 81527f654740f2a26ad62a5c155af9199af9e69b889claireho} 81627f654740f2a26ad62a5c155af9199af9e69b889claireho 81727f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI void U_EXPORT2 81827f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_reset64(URegularExpression *regexp2, 81927f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t index, 82027f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru regexp->fMatcher->reset(index, *status); 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_setRegion 832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 83550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setRegion(URegularExpression *regexp2, 836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t regionStart, 837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t regionLimit, 838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 83927f654740f2a26ad62a5c155af9199af9e69b889claireho uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status); 84027f654740f2a26ad62a5c155af9199af9e69b889claireho} 84127f654740f2a26ad62a5c155af9199af9e69b889claireho 84227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI void U_EXPORT2 84327f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_setRegion64(URegularExpression *regexp2, 84427f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t regionStart, 84527f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t regionLimit, 84627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->region(regionStart, regionLimit, *status); 852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 85727f654740f2a26ad62a5c155af9199af9e69b889claireho// uregex_setRegionAndStart 85827f654740f2a26ad62a5c155af9199af9e69b889claireho// 85927f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 86054dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusU_CAPI void U_EXPORT2 86127f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_setRegionAndStart(URegularExpression *regexp2, 86227f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t regionStart, 86327f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t regionLimit, 86427f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t startIndex, 86527f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 86627f654740f2a26ad62a5c155af9199af9e69b889claireho RegularExpression *regexp = (RegularExpression*)regexp2; 867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 86827f654740f2a26ad62a5c155af9199af9e69b889claireho return; 86927f654740f2a26ad62a5c155af9199af9e69b889claireho } 87027f654740f2a26ad62a5c155af9199af9e69b889claireho regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status); 87127f654740f2a26ad62a5c155af9199af9e69b889claireho} 87227f654740f2a26ad62a5c155af9199af9e69b889claireho 87327f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 87427f654740f2a26ad62a5c155af9199af9e69b889claireho// 875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_regionStart 876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 87950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_regionStart(const URegularExpression *regexp2, 880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 88127f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)uregex_regionStart64(regexp2, status); 88227f654740f2a26ad62a5c155af9199af9e69b889claireho} 88327f654740f2a26ad62a5c155af9199af9e69b889claireho 88427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int64_t U_EXPORT2 88527f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_regionStart64(const URegularExpression *regexp2, 88627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->regionStart(); 892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_regionEnd 898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_regionEnd(const URegularExpression *regexp2, 902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 90327f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)uregex_regionEnd64(regexp2, status); 90427f654740f2a26ad62a5c155af9199af9e69b889claireho} 90527f654740f2a26ad62a5c155af9199af9e69b889claireho 90627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int64_t U_EXPORT2 90727f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_regionEnd64(const URegularExpression *regexp2, 90827f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->regionEnd(); 914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_hasTransparentBounds 920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 92350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_hasTransparentBounds(const URegularExpression *regexp2, 924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->hasTransparentBounds(); 930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_useTransparentBounds 936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 93950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_useTransparentBounds(URegularExpression *regexp2, 94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool b, 94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 943b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->useTransparentBounds(b); 947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_hasAnchoringBounds 953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 95650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_hasAnchoringBounds(const URegularExpression *regexp2, 95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 959b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->hasAnchoringBounds(); 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_useAnchoringBounds 969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 97250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_useAnchoringBounds(URegularExpression *regexp2, 97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool b, 97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status) == FALSE) { 977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->useAnchoringBounds(b); 980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_hitEnd 986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_hitEnd(const URegularExpression *regexp2, 990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 992b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->hitEnd(); 996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_requireEnd 1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 100550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_requireEnd(const URegularExpression *regexp2, 1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1008b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return regexp->fMatcher->requireEnd(); 1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_setTimeLimit 1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 102150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setTimeLimit(URegularExpression *regexp2, 1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t limit, 1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->setTimeLimit(limit, *status); 1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_getTimeLimit 1035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 103850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_getTimeLimit(const URegularExpression *regexp2, 1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 1040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t retVal = 0; 104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1042b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 1043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = regexp->fMatcher->getTimeLimit(); 1044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return retVal; 1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_setStackLimit 1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 105650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setStackLimit(URegularExpression *regexp2, 105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t limit, 105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 1061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->setStackLimit(limit, *status); 1062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_getStackLimit 1070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 107350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_getStackLimit(const URegularExpression *regexp2, 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 1075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t retVal = 0; 107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1077b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 1078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = regexp->fMatcher->getStackLimit(); 1079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return retVal; 1081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_setMatchCallback 1087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_setMatchCallback(URegularExpression *regexp2, 1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *callback, 1092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *context, 1093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fMatcher->setMatchCallback(callback, context, *status); 1097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// uregex_getMatchCallback 1104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CAPI void U_EXPORT2 110750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_getMatchCallback(const URegularExpression *regexp2, 1108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback **callback, 1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void **context, 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode *status) { 111150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru regexp->fMatcher->getMatchCallback(*callback, *context, *status); 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//------------------------------------------------------------------------------ 1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 112027f654740f2a26ad62a5c155af9199af9e69b889claireho// uregex_setMatchProgressCallback 112127f654740f2a26ad62a5c155af9199af9e69b889claireho// 112227f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 112327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI void U_EXPORT2 112427f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_setFindProgressCallback(URegularExpression *regexp2, 112527f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *callback, 112627f654740f2a26ad62a5c155af9199af9e69b889claireho const void *context, 112727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 112827f654740f2a26ad62a5c155af9199af9e69b889claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 113027f654740f2a26ad62a5c155af9199af9e69b889claireho regexp->fMatcher->setFindProgressCallback(callback, context, *status); 113127f654740f2a26ad62a5c155af9199af9e69b889claireho } 113227f654740f2a26ad62a5c155af9199af9e69b889claireho} 113327f654740f2a26ad62a5c155af9199af9e69b889claireho 113427f654740f2a26ad62a5c155af9199af9e69b889claireho 113527f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 113627f654740f2a26ad62a5c155af9199af9e69b889claireho// 113727f654740f2a26ad62a5c155af9199af9e69b889claireho// uregex_getMatchCallback 113827f654740f2a26ad62a5c155af9199af9e69b889claireho// 113927f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 114027f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI void U_EXPORT2 114127f654740f2a26ad62a5c155af9199af9e69b889clairehouregex_getFindProgressCallback(const URegularExpression *regexp2, 114227f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback **callback, 114327f654740f2a26ad62a5c155af9199af9e69b889claireho const void **context, 114427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 114527f654740f2a26ad62a5c155af9199af9e69b889claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, FALSE, status)) { 114727f654740f2a26ad62a5c155af9199af9e69b889claireho regexp->fMatcher->getFindProgressCallback(*callback, *context, *status); 114827f654740f2a26ad62a5c155af9199af9e69b889claireho } 114927f654740f2a26ad62a5c155af9199af9e69b889claireho} 115027f654740f2a26ad62a5c155af9199af9e69b889claireho 115127f654740f2a26ad62a5c155af9199af9e69b889claireho 115227f654740f2a26ad62a5c155af9199af9e69b889claireho//------------------------------------------------------------------------------ 115327f654740f2a26ad62a5c155af9199af9e69b889claireho// 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_replaceAll 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 115850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_replaceAll(URegularExpression *regexp2, 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *replacementText, 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t replacementLength, 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *destBuf, 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replacementText == NULL || replacementLength < -1 || 116927f654740f2a26ad62a5c155af9199af9e69b889claireho (destBuf == NULL && destCapacity > 0) || 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destCapacity < 0) { 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = 0; 1176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_reset(regexp2, 0, status); 1178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Note: Seperate error code variables for findNext() and appendReplacement() 1180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // are used so that destination buffer overflow errors 1181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // in appendReplacement won't stop findNext() from working. 1182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // appendReplacement() and appendTail() special case incoming buffer 1183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // overflow errors, continuing to return the correct length. 1184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode findStatus = *status; 118550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (uregex_findNext(regexp2, &findStatus)) { 118650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len += uregex_appendReplacement(regexp2, replacementText, replacementLength, 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &destBuf, &destCapacity, status); 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); 1190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(findStatus)) { 1192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If anything went wrong with the findNext(), make that error trump 1193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // whatever may have happened with the append() operations. 1194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Errors in findNext() are not expected. 1195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = findStatus; 1196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len; 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_replaceAllUText 120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 120750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UText * U_EXPORT2 120850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_replaceAllUText(URegularExpression *regexp2, 120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replacementText, 121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest, 121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (replacementText == NULL) { 121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = regexp->fMatcher->replaceAll(replacementText, dest, *status); 122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_replaceFirst 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 123250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_replaceFirst(URegularExpression *regexp2, 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *replacementText, 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t replacementLength, 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *destBuf, 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replacementText == NULL || replacementLength < -1 || 124327f654740f2a26ad62a5c155af9199af9e69b889claireho (destBuf == NULL && destCapacity > 0) || 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destCapacity < 0) { 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = 0; 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool findSucceeded; 125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uregex_reset(regexp2, 0, status); 125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho findSucceeded = uregex_find(regexp2, 0, status); 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (findSucceeded) { 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len = uregex_appendReplacement(regexp2, replacementText, replacementLength, 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &destBuf, &destCapacity, status); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len; 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_replaceFirstUText 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UText * U_EXPORT2 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_replaceFirstUText(URegularExpression *regexp2, 127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replacementText, 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest, 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (replacementText == NULL) { 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status); 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_appendReplacement 129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------ 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Dummy class, because these functions need to be friends of class RegexMatcher, 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and stand-alone C functions don't work as friends 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RegexCImpl { 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru public: 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inline static int32_t appendReplacement(RegularExpression *regexp, 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *replacementText, 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t replacementLength, 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar **destBuf, 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *destCapacity, 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status); 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inline static int32_t appendTail(RegularExpression *regexp, 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar **destBuf, 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *destCapacity, 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status); 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inline static int32_t split(RegularExpression *regexp, 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destBuf, 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *requiredCapacity, 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destFields[], 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destFieldsCapacity, 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status); 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar BACKSLASH = 0x5c; 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar DOLLARSIGN = 0x24; 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Move a character to an output buffer, with bounds checking on the index. 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Index advances even if capacity is exceeded, for preflight size computations. 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This little sequence is used a LOT. 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) { 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*idx < bufCapacity) { 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf[*idx] = c; 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*idx)++; 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// appendReplacement, the actual implementation. 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexCImpl::appendReplacement(RegularExpression *regexp, 134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *replacementText, 134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t replacementLength, 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar **destBuf, 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *destCapacity, 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we come in with a buffer overflow error, don't suppress the operation. 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A series of appendReplacements, appendTail need to correctly preflight 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the buffer size when an overflow happens somewhere in the middle. 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool pendingBufferOverflow = FALSE; 1355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) { 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pendingBufferOverflow = TRUE; 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ZERO_ERROR; 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Validate all paramters 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replacementText == NULL || replacementLength < -1 || 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destCapacity == NULL || destBuf == NULL || 136827f654740f2a26ad62a5c155af9199af9e69b889claireho (*destBuf == NULL && *destCapacity > 0) || 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destCapacity < 0) { 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = regexp->fMatcher; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m->fMatch == FALSE) { 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_REGEX_INVALID_STATE; 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest = *destBuf; 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t capacity = *destCapacity; 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destIdx = 0; 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If it wasn't supplied by the caller, get the length of the replacement text. 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: slightly smarter logic in the copy loop could watch for the NUL on 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the fly and avoid this step. 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replacementLength == -1) { 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacementLength = u_strlen(replacementText); 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy input string from the end of previous match to start of current match 139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fText != NULL) { 139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t matchStart; 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lastMatchEnd; 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(m->fInputText)) { 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastMatchEnd = (int32_t)m->fLastMatchEnd; 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchStart = (int32_t)m->fMatchStart; 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // !!!: Would like a better way to do this! 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status); 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status); 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=lastMatchEnd; i<matchStart; i++) { 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendToBuf(regexp->fText[i], &destIdx, dest, capacity); 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, 1412103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), 1413103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius &possibleOverflowError); 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1415103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(destIdx >= 0); 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // scan the replacement text, looking for substitutions ($n) and \escapes. 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t replIdx = 0; 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (replIdx < replacementLength) { 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = replacementText[replIdx]; 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replIdx++; 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != DOLLARSIGN && c != BACKSLASH) { 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Common case, no substitution, no escaping, 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // just copy the char to the dest buf. 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf(c, &destIdx, dest, capacity); 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == BACKSLASH) { 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Backslash Escape. Copy the following char out without further checks. 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: Surrogate pairs don't need any special handling 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The second half wont be a '$' or a '\', and 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will move to the dest normally on the next 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // loop iteration. 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replIdx >= replacementLength) { 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = replacementText[replIdx]; 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c==0x55/*U*/ || c==0x75/*u*/) { 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have a \udddd or \Udddddddd escape sequence. 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 escapedChar = 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_unescapeAt(uregex_ucstr_unescape_charAt, 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &replIdx, // Index is updated by unescapeAt 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacementLength, // Length of replacement text 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (void *)replacementText); 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapedChar != (UChar32)0xFFFFFFFF) { 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapedChar <= 0xffff) { 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf((UChar)escapedChar, &destIdx, dest, capacity); 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity); 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: if the \u escape was invalid, just fall through and 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // treat it as a plain \<anything> escape. 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Plain backslash escape. Just put out the escaped character. 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf(c, &destIdx, dest, capacity); 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replIdx++; 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've got a $. Pick up a capture group number if one follows. 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Consume at most the number of digits necessary for the largest capture 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number that is valid for this pattern. 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numDigits = 0; 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum = 0; 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 digitC; 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (replIdx >= replacementLength) { 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_GET(replacementText, 0, replIdx, replacementLength, digitC); 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_isdigit(digitC) == FALSE) { 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_FWD_1(replacementText, replIdx, replacementLength); 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru groupNum=groupNum*10 + u_charDigitValue(digitC); 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numDigits++; 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numDigits >= m->fPattern->fMaxCaptureDigits) { 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numDigits == 0) { 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The $ didn't introduce a group number at all. 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Treat it as just part of the substitution text. 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendToBuf(DOLLARSIGN, &destIdx, dest, capacity); 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Finally, append the capture group data to the destination. 1503103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius destIdx += uregex_group((URegularExpression*)regexp, groupNum, 1504103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status); 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ignore buffer overflow when extracting the group. We need to 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // continue on to get full size of the untruncated result. We will 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // raise our own buffer overflow error at the end. 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ZERO_ERROR; 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Can fail if group number is out of range. 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Nul Terminate the dest buffer if possible. 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set the appropriate buffer overflow or not terminated error, if needed. 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx < capacity) { 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIdx] = 0; 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (destIdx == *destCapacity) { 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_STRING_NOT_TERMINATED_WARNING; 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return an updated dest buffer and capacity to the caller. 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx > 0 && *destCapacity > 0) { 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx < capacity) { 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destBuf += destIdx; 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destCapacity -= destIdx; 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destBuf += capacity; 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destCapacity = 0; 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we came in with a buffer overflow, make sure we go out with one also. 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (A zero length match right at the end of the previous match could 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // make this function succeed even though a previous call had overflowed the buf) 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pendingBufferOverflow && U_SUCCESS(*status)) { 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIdx; 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendReplacement the actual API function, 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_appendReplacement(URegularExpression *regexp2, 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *replacementText, 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t replacementLength, 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar **destBuf, 156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *destCapacity, 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return RegexCImpl::appendReplacement( 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru regexp, replacementText, replacementLength,destBuf, destCapacity, status); 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_appendReplacementUText...can just use the normal C++ method 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI void U_EXPORT2 157450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_appendReplacementUText(URegularExpression *regexp2, 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replText, 157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest, 157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fMatcher->appendReplacement(dest, replText, *status); 158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_appendTail 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexCImpl::appendTail(RegularExpression *regexp, 158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar **destBuf, 159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *destCapacity, 159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) 1592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we come in with a buffer overflow error, don't suppress the operation. 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A series of appendReplacements, appendTail need to correctly preflight 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the buffer size when an overflow happens somewhere in the middle. 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool pendingBufferOverflow = FALSE; 1598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) { 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pendingBufferOverflow = TRUE; 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_ZERO_ERROR; 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1603b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (destCapacity == NULL || destBuf == NULL || 160827f654740f2a26ad62a5c155af9199af9e69b889claireho (*destBuf == NULL && *destCapacity > 0) || 1609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *destCapacity < 0) 1610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 1611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 1612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 1613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *m = regexp->fMatcher; 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destIdx = 0; 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCap = *destCapacity; 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest = *destBuf; 162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fText != NULL) { 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t srcIdx; 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd); 162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (nativeIdx == -1) { 162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = 0; 162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (UTEXT_USES_U16(m->fInputText)) { 162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = (int32_t)nativeIdx; 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status); 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 1634103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(destIdx >= 0); 1635103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (srcIdx == regexp->fTextLength) { 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c = regexp->fText[srcIdx]; 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0 && regexp->fTextLength == -1) { 164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho regexp->fTextLength = srcIdx; 164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1644103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (destIdx < destCap) { 164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[destIdx] = c; 164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We've overflowed the dest buffer. 164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If the total input string length is known, we can 165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // compute the total buffer size needed without scanning through the string. 165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (regexp->fTextLength > 0) { 165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx += (regexp->fTextLength - srcIdx); 165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 165450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx++; 165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx++; 165850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t srcIdx; 166150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (m->fMatch) { 166250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The most recent call to find() succeeded. 166350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = m->fMatchEnd; 166450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 166550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The last call to find() on this matcher failed(). 166650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look back to the end of the last find() that succeeded for src index. 166750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = m->fLastMatchEnd; 166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (srcIdx == -1) { 166950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There has been no successful match with this matcher. 167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We want to copy the whole string. 167150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcIdx = 0; 167250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 167450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 167550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL terminate the output string, if possible, otherwise issue the 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // appropriate error or warning. 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx < destCap) { 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIdx] = 0; 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (destIdx == destCap) { 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_STRING_NOT_TERMINATED_WARNING; 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Update the user's buffer ptr and capacity vars to reflect the 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // amount used. 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx < destCap) { 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destBuf += destIdx; 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destCapacity -= destIdx; 1697103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if (*destBuf != NULL) { 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destBuf += destCap; 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destCapacity = 0; 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pendingBufferOverflow && U_SUCCESS(*status)) { 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIdx; 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 171050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 171150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendTail the actual API function 171250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 171450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_appendTail(URegularExpression *regexp2, 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar **destBuf, 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *destCapacity, 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 171850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status); 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 172350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 172450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_appendTailUText...can just use the normal C++ method 172550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 172650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UText * U_EXPORT2 172750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_appendTailUText(URegularExpression *regexp2, 172827f654740f2a26ad62a5c155af9199af9e69b889claireho UText *dest, 172927f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *status) { 173050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 173127f654740f2a26ad62a5c155af9199af9e69b889claireho return regexp->fMatcher->appendTail(dest, *status); 173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 173350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 173450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// copyString Internal utility to copy a string to an output buffer, 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// while managing buffer overflow and preflight size 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// computation. NUL termination is added to destination, 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and the NUL is counted in the output size. 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 174350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void copyString(UChar *destBuffer, // Destination buffer. 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, // Total capacity of dest buffer 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *destIndex, // Index into dest buffer. Updated on return. 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Update not clipped to destCapacity. 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcPtr, // Pointer to source string 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLen) // Source string len. 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t si; 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t di = *destIndex; 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (si=0; si<srcLen; si++) { 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = srcPtr[si]; 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (di < destCapacity) { 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destBuffer[di] = c; 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru di++; 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru di += srcLen - si; 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (di<destCapacity) { 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destBuffer[di] = 0; 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru di++; 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *destIndex = di; 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 177150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// uregex_split 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 177850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexCImpl::split(RegularExpression *regexp, 177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destBuf, 178050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *requiredCapacity, 178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destFields[], 178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destFieldsCapacity, 178450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Reset for the input text 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru regexp->fMatcher->reset(); 178950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *inputText = regexp->fMatcher->fInputText; 179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nextOutputStringStart = 0; 179150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t inputLen = regexp->fMatcher->fInputLength; 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (inputLen == 0) { 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop through the input text, searching for the delimiter pattern 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; // Index of the field being processed. 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destIdx = 0; // Next available position in destBuf; 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numCaptureGroups = regexp->fMatcher->groupCount(); 180250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; ; i++) { 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>=destFieldsCapacity-1) { 180550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There are one or zero output strings left. 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fill the last output string with whatever is left from the input, then exit the loop. 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ( i will be == destFieldsCapacity if we filled the output array while processing 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // capture groups of the delimiter expression, in which case we will discard the 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // last capture group saved in favor of the unprocessed remainder of the 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input string.) 181150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputLen > nextOutputStringStart) { 181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i != destFieldsCapacity-1) { 181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // No fields are left. Recycle the last one for holding the trailing part of 181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the input string. 181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i = destFieldsCapacity-1; 181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx = (int32_t)(destFields[i] - destFields[0]); 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destFields[i] = &destBuf[destIdx]; 182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, 182150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (regexp->fMatcher->find()) { 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We found another delimiter. Move everything from where we started looking 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // up until the start of the delimiter into the next output string. 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destFields[i] = &destBuf[destIdx]; 183050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 183150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart, 183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus); 183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (tStatus == U_BUFFER_OVERFLOW_ERROR) { 183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tStatus = U_ZERO_ERROR; 183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 183650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = tStatus; 183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOutputStringStart = regexp->fMatcher->fMatchEnd; 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the delimiter pattern has capturing parentheses, the captured 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // text goes out into the next n destination strings. 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupNum; 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we've run out of output string slots, bail out. 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i==destFieldsCapacity-1) { 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set up to extract the capture group contents into the dest buffer. 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destFields[i] = &destBuf[destIdx]; 185250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tStatus = U_ZERO_ERROR; 1853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t t = uregex_group((URegularExpression*)regexp, 1854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho groupNum, 1855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho destFields[i], 1856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho REMAINING_CAPACITY(destIdx, destCapacity), 1857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho &tStatus); 1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIdx += t + 1; // Record the space used in the output string buffer. 1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // +1 for the NUL that terminates the string. 186050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (tStatus == U_BUFFER_OVERFLOW_ERROR) { 186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho tStatus = U_ZERO_ERROR; 186250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = tStatus; 186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (nextOutputStringStart == inputLen) { 1868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The delimiter was at the end of the string. 1869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Output an empty string, and then we are done. 1870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (destIdx < destCapacity) { 1871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho destBuf[destIdx] = 0; 1872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i < destFieldsCapacity-1) { 1874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 1875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (destIdx < destCapacity) { 1877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho destFields[i] = destBuf + destIdx; 1878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++destIdx; 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We ran off the end of the input while looking for the next delimiter. 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All the remaining text goes into the current output string. 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destFields[i] = &destBuf[destIdx]; 188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, 189050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Zero out any unused portion of the destFields array 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int j; 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j=i+1; j<destFieldsCapacity; j++) { 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destFields[j] = NULL; 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (requiredCapacity != NULL) { 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *requiredCapacity = destIdx; 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destIdx > destCapacity) { 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i+1; 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_split The actual API function 191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 191350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2 191450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_split(URegularExpression *regexp2, 191550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destBuf, 191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *requiredCapacity, 191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *destFields[], 191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destFieldsCapacity, 192050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 192150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 1922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (validateRE(regexp, TRUE, status) == FALSE) { 192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 192527f654740f2a26ad62a5c155af9199af9e69b889claireho if ((destBuf == NULL && destCapacity > 0) || 192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destCapacity < 0 || 192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destFields == NULL || 192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destFieldsCapacity < 1 ) { 192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ILLEGAL_ARGUMENT_ERROR; 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status); 193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// uregex_splitUText...can just use the normal C++ method 193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 194050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2 194150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouregex_splitUText(URegularExpression *regexp2, 194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *destFields[], 194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destFieldsCapacity, 194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *status) { 194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegularExpression *regexp = (RegularExpression*)regexp2; 194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status); 194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1952