regex.h revision 8393335b955da7340c9f19b1b4b2d6c0c2c04be7
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2002-2013, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: regex.h 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002oct22 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Andy Heninger 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* ICU Regular Expressions, API for C++ 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef REGEX_H 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_H 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//#define REGEX_DEBUG 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \file 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \brief C++ API: Regular Expressions 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <h2>Regular Expression API</h2> 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The ICU API for processing regular expressions consists of two classes, 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>RegexPattern</code> and <code>RegexMatcher</code>. 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>RegexPattern</code> objects represent a pre-processed, or compiled 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * regular expression. They are created from a regular expression pattern string, 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and can be used to create <code>RegexMatcher</code> objects for the pattern.</p> 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Class <code>RegexMatcher</code> bundles together a regular expression 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern and a target string to which the search pattern will be applied. 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>RegexMatcher</code> includes API for doing plain find or search 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operations, for search and replace operations, and for obtaining detailed 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * information about bounds of a match. </p> 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p>Note that by constructing <code>RegexMatcher</code> objects directly from regular 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * expression pattern strings application code can be simplified and the explicit 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * need for <code>RegexPattern</code> objects can usually be eliminated. 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * </p> 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uobject.h" 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utext.h" 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parseerr.h" 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uregex.h" 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Forward Declarations 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_NAMESPACE_BEGIN 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Regex8BitSet; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RegexCImpl; 62103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass RegexMatcher; 63103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass RegexPattern; 64103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstruct REStackFrame; 65103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass RuleBasedBreakIterator; 66103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass UnicodeSet; 67103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass UVector; 68103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass UVector32; 69103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusclass UVector64; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#ifndef U_HIDE_INTERNAL_API 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RBBIPatternDump Debug function, displays the compiled form of a pattern. 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @internal 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef REGEX_DEBUG 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_INTERNAL void U_EXPORT2 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPatternDump(const RegexPattern *pat); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #undef RegexPatternDump 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #define RegexPatternDump(pat) 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* U_HIDE_INTERNAL_API */ 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Class <code>RegexPattern</code> represents a compiled regular expression. It includes 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * factory methods for creating a RegexPattern object from the source (string) form 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of a regular expression, methods for creating RegexMatchers that allow the pattern 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to be applied to input text, and a few convenience methods for simple common 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * uses of regular expressions. 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Class RegexPattern is not intended to be subclassed.</p> 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_I18N_API RegexPattern: public UObject { 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * default constructor. Create a RegexPattern object that refers to no actual 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern. Not normally needed; RegexPattern objects are usually 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * created using the factory method <code>compile()</code>. 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern(); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copy Constructor. Create a new RegexPattern object that is equivalent 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to the source object. 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param source the pattern object to be copied. 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern(const RegexPattern &source); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructor. Note that a RegexPattern object must persist so long as any 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatcher objects that were created from the RegexPattern are active. 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RegexPattern(); 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Comparison operator. Two RegexPattern objects are considered equal if they 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * were constructed from identical source patterns using the same match flag 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * settings. 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param that a RegexPattern object to compare with "this". 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if the objects are equivalent. 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool operator==(const RegexPattern& that) const; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Comparison operator. Two RegexPattern objects are considered equal if they 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * were constructed from identical source patterns using the same match flag 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * settings. 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param that a RegexPattern object to compare with "this". 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if the objects are different. 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);} 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assignment operator. After assignment, this RegexPattern will behave identically 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to the source object. 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern &operator =(const RegexPattern &source); 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Create an exact copy of this RegexPattern object. Since RegexPattern is not 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * intended to be subclasses, <code>clone()</code> and the copy construction are 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equivalent operations. 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the copy of this RegexPattern 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexPattern *clone() const; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compiles the regular expression in string form into a RegexPattern 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object. These compile methods, rather than the constructors, are the usual 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * way that RegexPattern objects are created. 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects created from the pattern are active. RegexMatchers keep a pointer 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * back to their pattern, so premature deletion of the pattern is a 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * catastrophic error.</p> 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>All pattern match mode flags are set to their default values.</p> 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that it is often more convenient to construct a RegexMatcher directly 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from a pattern string rather than separately compiling the pattern and 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then creating a RegexMatcher object from the pattern.</p> 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param regex The regular expression to be compiled. 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pe Receives the position (line and column nubers) of any error 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * within the regular expression.) 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A regexPattern object for the compiled pattern. 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status); 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Compiles the regular expression in string form into a RegexPattern 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * object. These compile methods, rather than the constructors, are the usual 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * way that RegexPattern objects are created. 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * objects created from the pattern are active. RegexMatchers keep a pointer 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * back to their pattern, so premature deletion of the pattern is a 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * catastrophic error.</p> 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>All pattern match mode flags are set to their default values.</p> 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that it is often more convenient to construct a RegexMatcher directly 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * from a pattern string rather than separately compiling the pattern and 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * then creating a RegexMatcher object from the pattern.</p> 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regex The regular expression to be compiled. Note, the text referred 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to by this UText must not be deleted during the lifetime of the 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * RegexPattern object or any RegexMatcher object created from it. 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pe Receives the position (line and column nubers) of any error 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * within the regular expression.) 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return A regexPattern object for the compiled pattern. 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static RegexPattern * U_EXPORT2 compile( UText *regex, 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status); 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compiles the regular expression in string form into a RegexPattern 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object using the specified match mode flags. These compile methods, 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * rather than the constructors, are the usual way that RegexPattern objects 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are created. 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects created from the pattern are active. RegexMatchers keep a pointer 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * back to their pattern, so premature deletion of the pattern is a 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * catastrophic error.</p> 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that it is often more convenient to construct a RegexMatcher directly 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from a pattern string instead of than separately compiling the pattern and 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then creating a RegexMatcher object from the pattern.</p> 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param regex The regular expression to be compiled. 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param flags The match mode flags to be used. 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pe Receives the position (line and column numbers) of any error 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * within the regular expression.) 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A regexPattern object for the compiled pattern. 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status); 248103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Compiles the regular expression in string form into a RegexPattern 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * object using the specified match mode flags. These compile methods, 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * rather than the constructors, are the usual way that RegexPattern objects 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * are created. 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * objects created from the pattern are active. RegexMatchers keep a pointer 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * back to their pattern, so premature deletion of the pattern is a 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * catastrophic error.</p> 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that it is often more convenient to construct a RegexMatcher directly 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * from a pattern string instead of than separately compiling the pattern and 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * then creating a RegexMatcher object from the pattern.</p> 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regex The regular expression to be compiled. Note, the text referred 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to by this UText must not be deleted during the lifetime of the 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * RegexPattern object or any RegexMatcher object created from it. 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param flags The match mode flags to be used. 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pe Receives the position (line and column numbers) of any error 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * within the regular expression.) 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return A regexPattern object for the compiled pattern. 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static RegexPattern * U_EXPORT2 compile( UText *regex, 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status); 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compiles the regular expression in string form into a RegexPattern 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object using the specified match mode flags. These compile methods, 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * rather than the constructors, are the usual way that RegexPattern objects 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are created. 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects created from the pattern are active. RegexMatchers keep a pointer 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * back to their pattern, so premature deletion of the pattern is a 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * catastrophic error.</p> 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that it is often more convenient to construct a RegexMatcher directly 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from a pattern string instead of than separately compiling the pattern and 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then creating a RegexMatcher object from the pattern.</p> 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param regex The regular expression to be compiled. 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param flags The match mode flags to be used. 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A regexPattern object for the compiled pattern. 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.6 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Compiles the regular expression in string form into a RegexPattern 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * object using the specified match mode flags. These compile methods, 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * rather than the constructors, are the usual way that RegexPattern objects 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * are created. 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that RegexPattern objects must not be deleted while RegexMatcher 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * objects created from the pattern are active. RegexMatchers keep a pointer 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * back to their pattern, so premature deletion of the pattern is a 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * catastrophic error.</p> 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>Note that it is often more convenient to construct a RegexMatcher directly 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * from a pattern string instead of than separately compiling the pattern and 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * then creating a RegexMatcher object from the pattern.</p> 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regex The regular expression to be compiled. Note, the text referred 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to by this UText must not be deleted during the lifetime of the 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * RegexPattern object or any RegexMatcher object created from it. 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param flags The match mode flags to be used. 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return A regexPattern object for the compiled pattern. 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 328103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static RegexPattern * U_EXPORT2 compile( UText *regex, 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status); 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Get the match mode flags that were used when compiling this pattern. 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the match mode flags 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual uint32_t flags() const; 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Creates a RegexMatcher that will match the given input against this pattern. The 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatcher can then be used to perform match, find or replace operations 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on the input. Note that a RegexPattern object must not be deleted while 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatchers created from it still exist and might possibly be used again. 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The matcher will retain a reference to the supplied input string, and all regexp 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern matching operations happen directly on this original string. It is 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * critical that the string not be altered or deleted before use by the regular 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * expression operations is complete. 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The input string to which the regular expression will be applied. 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A RegexMatcher object for this pattern and input. 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexMatcher *matcher(const UnicodeString &input, 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const; 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Cause a compilation error if an application accidentally attempts to 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * create a matcher with a (UChar *) string as input rather than 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a UnicodeString. Avoids a dangling reference to a temporary string. 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * To efficiently work with UChar *strings, wrap the data in a UnicodeString 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * using one of the aliasing constructors, such as 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or in a UText, using 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @internal 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher(const UChar *input, 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Creates a RegexMatcher that will match against this pattern. The 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatcher can be used to perform match, find or replace operations. 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note that a RegexPattern object must not be deleted while 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatchers created from it still exist and might possibly be used again. 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A RegexMatcher object for this pattern and input. 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.6 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexMatcher *matcher(UErrorCode &status) const; 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test whether a string matches a regular expression. This convenience function 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * both compiles the regular expression and applies it in a single operation. 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note that if the same pattern needs to be applied repeatedly, this method will be 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * less efficient than creating and reusing a RegexMatcher object. 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param regex The regular expression 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The string data to be matched 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pe Receives the position of any syntax errors within the regular expression 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return True if the regular expression exactly matches the full input string. 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static UBool U_EXPORT2 matches(const UnicodeString ®ex, 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &input, 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status); 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Test whether a string matches a regular expression. This convenience function 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * both compiles the regular expression and applies it in a single operation. 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that if the same pattern needs to be applied repeatedly, this method will be 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * less efficient than creating and reusing a RegexMatcher object. 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regex The regular expression 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param input The string data to be matched 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pe Receives the position of any syntax errors within the regular expression 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return True if the regular expression exactly matches the full input string. 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static UBool U_EXPORT2 matches(UText *regex, 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *input, 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the regular expression from which this pattern was compiled. This method will work 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * even if the pattern was compiled from a UText. 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note: If the pattern was originally compiled from a UText, and that UText was modified, 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the returned string may no longer reflect the RegexPattern object. 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.4 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString pattern() const; 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the regular expression from which this pattern was compiled. This method will work 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * even if the pattern was compiled from a UnicodeString. 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note: This is the original input, not a clone. If the pattern was originally compiled from a 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * UText, and that UText was modified, the returned UText may no longer reflect the RegexPattern 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * object. 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 451103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 45327f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UText *patternText(UErrorCode &status) const; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Split a string into fields. Somewhat like split() from Perl or Java. 458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Pattern matches identify delimiters that separate the input 459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * into fields. The input data between the delimiters becomes the 460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * fields themselves. 461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If the delimiter pattern includes capture groups, the captured text will 463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * also appear in the destination array of output strings, interspersed 464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * with the fields. This is similar to Perl, but differs from Java, 465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * which ignores the presence of capture groups in the pattern. 466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Trailing empty fields will always be returned, assuming sufficient 468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * destination capacity. This differs from the default behavior for Java 469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and Perl where trailing empty fields are not returned. 470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The number of strings produced by the split operation is returned. 472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This count includes the strings from capture groups in the delimiter pattern. 473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This behavior differs from Java, which ignores capture groups. 474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * For the best performance on split() operations, 476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * <code>RegexMatcher::split</code> is preferable to this function 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The string to be split into fields. The field delimiters 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * match the pattern (in the "this" object) 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest An array of UnicodeStrings to receive the results of the split. 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is an array of actual UnicodeString objects, not an 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * array of pointers to strings. Local (stack based) arrays can 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * work well here. 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity The number of elements in the destination array. 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the number of fields found is less than destCapacity, the 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * extra strings in the destination array are not altered. 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the number of destination strings is less than the number 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of fields, the trailing part of the input string, including any 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * field delimiters, is placed in the last destination string. 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return The number of fields into which the input string was split. 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t split(const UnicodeString &input, 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest[], 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Split a string into fields. Somewhat like split() from Perl or Java. 502b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Pattern matches identify delimiters that separate the input 503b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * into fields. The input data between the delimiters becomes the 504b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * fields themselves. 505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If the delimiter pattern includes capture groups, the captured text will 507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * also appear in the destination array of output strings, interspersed 508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * with the fields. This is similar to Perl, but differs from Java, 509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * which ignores the presence of capture groups in the pattern. 510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Trailing empty fields will always be returned, assuming sufficient 512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * destination capacity. This differs from the default behavior for Java 513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and Perl where trailing empty fields are not returned. 514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The number of strings produced by the split operation is returned. 516b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This count includes the strings from capture groups in the delimiter pattern. 517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This behavior differs from Java, which ignores capture groups. 518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For the best performance on split() operations, 520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * <code>RegexMatcher::split</code> is preferable to this function 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param input The string to be split into fields. The field delimiters 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * match the pattern (in the "this" object) 52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest An array of mutable UText structs to receive the results of the split. 52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If a field is NULL, a new UText is allocated to contain the results for 52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that field. This new UText is not guaranteed to be mutable. 52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param destCapacity The number of elements in the destination array. 52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the number of fields found is less than destCapacity, the 52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * extra strings in the destination array are not altered. 53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the number of destination strings is less than the number 53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of fields, the trailing part of the input string, including any 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * field delimiters, is placed in the last destination string. 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 534b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return The number of destination strings used. 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 536103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t split(UText *input, 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const; 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Implementation Data 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 56250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *fPattern; // The original pattern string. 56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *fPatternString; // The original pattern UncodeString if relevant 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t fFlags; // The flags used when compiling the pattern. 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector64 *fCompiledPat; // The compiled pattern p-code. 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fLiteralText; // Any literal string data from the pattern, 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // after un-escaping, for use during the match. 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *fSets; // Any UnicodeSets referenced from the pattern. 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *fSets8; // (and fast sets for latin-1 range.) 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode fDeferredStatus; // status if some prior error has left this 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RegexPattern in an unusable state. 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fMinMatchLen; // Minimum Match Length. All matches will have length 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // >= this value. For some patterns, this calculated 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // value may be less than the true shortest 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // possible match. 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fFrameSize; // Size of a state stack frame in the 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // execution engine. 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fDataSize; // The size of the data needed by the pattern that 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // does not go on the state stack, but has just 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a single copy per matcher. 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 *fGroupMap; // Map from capture group number to position of 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the group's variables in the matcher stack frame. 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fMaxCaptureDigits; 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // regex character classes, e.g. Word. 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *fStaticSets8; // Ptr to the static (shared) latin-1 only 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // sets for predefined regex classes. 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fStartType; // Info on how a match must start. 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fInitialStringIdx; // 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fInitialStringLen; 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fInitialChars; 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 fInitialChar; 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *fInitialChars8; 60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool fNeedsAltInput; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class RegexCompile; 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class RegexMatcher; 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class RegexCImpl; 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Implementation Methods 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void init(); // Common initialization, for use by constructors. 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void zap(); // Common cleanup 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef REGEX_DEBUG 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void dumpOp(int32_t index) const; 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend void U_EXPORT2 RegexPatternDump(const RegexPattern *); 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 627b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * class RegexMatcher bundles together a regular expression pattern and 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * input text to which the expression can be applied. It includes methods 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for testing for matches, and for find and replace operations. 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Class RegexMatcher is not intended to be subclassed.</p> 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_I18N_API RegexMatcher: public UObject { 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Construct a RegexMatcher for a regular expression. 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a convenience method that avoids the need to explicitly create 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a RegexPattern object. Note that if several RegexMatchers need to be 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * created for the same expression, it will be more efficient to 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separately create and cache a RegexPattern object, and use 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its matcher() method to create the RegexMatcher objects. 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param regexp The Regular Expression to be compiled. 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param flags Regular expression options, such as case insensitive matching. 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see UREGEX_CASE_INSENSITIVE 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status Any errors are reported by setting this UErrorCode variable. 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.6 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Construct a RegexMatcher for a regular expression. 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is a convenience method that avoids the need to explicitly create 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a RegexPattern object. Note that if several RegexMatchers need to be 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * created for the same expression, it will be more efficient to 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separately create and cache a RegexPattern object, and use 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its matcher() method to create the RegexMatcher objects. 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regexp The regular expression to be compiled. 66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param flags Regular expression options, such as case insensitive matching. 66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see UREGEX_CASE_INSENSITIVE 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status Any errors are reported by setting this UErrorCode variable. 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 667103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status); 670103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Construct a RegexMatcher for a regular expression. 67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is a convenience method that avoids the need to explicitly create 67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a RegexPattern object. Note that if several RegexMatchers need to be 67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * created for the same expression, it will be more efficient to 67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * separately create and cache a RegexPattern object, and use 67750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * its matcher() method to create the RegexMatcher objects. 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The matcher will retain a reference to the supplied input string, and all regexp 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern matching operations happen directly on the original string. It is 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * critical that the string not be altered or deleted before use by the regular 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * expression operations is complete. 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param regexp The Regular Expression to be compiled. 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The string to match. The matcher retains a reference to the 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * caller's string; mo copy is made. 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param flags Regular expression options, such as case insensitive matching. 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see UREGEX_CASE_INSENSITIVE 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status Any errors are reported by setting this UErrorCode variable. 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.6 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Construct a RegexMatcher for a regular expression. 69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is a convenience method that avoids the need to explicitly create 69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a RegexPattern object. Note that if several RegexMatchers need to be 69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * created for the same expression, it will be more efficient to 70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * separately create and cache a RegexPattern object, and use 70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * its matcher() method to create the RegexMatcher objects. 70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p> 70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The matcher will make a shallow clone of the supplied input text, and all regexp 70450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pattern matching operations happen on this clone. While read-only operations on 70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the supplied text are permitted, it is critical that the underlying string not be 70650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * altered or deleted before use by the regular expression operations is complete. 70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 70850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param regexp The Regular Expression to be compiled. 70950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param input The string to match. The matcher retains a shallow clone of the text. 71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param flags Regular expression options, such as case insensitive matching. 71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see UREGEX_CASE_INSENSITIVE 71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status Any errors are reported by setting this UErrorCode variable. 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 714103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher(UText *regexp, UText *input, 71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, UErrorCode &status); 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Cause a compilation error if an application accidentally attempts to 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * create a matcher with a (UChar *) string as input rather than 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a UnicodeString. Avoids a dangling reference to a temporary string. 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * To efficiently work with UChar *strings, wrap the data in a UnicodeString 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * using one of the aliasing constructors, such as 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or in a UText, using 72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @internal 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher(const UnicodeString ®exp, const UChar *input, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status); 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructor. 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RegexMatcher(); 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Attempts to match the entire input region against the pattern. 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if there is a match 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UBool matches(UErrorCode &status); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Resets the matcher, then attempts to match the input beginning 757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * at the specified startIndex, and extending to the end of the input. 758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The input region is reset to include the entire input string. 759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A successful match must extend to the end of the input. 76027f654740f2a26ad62a5c155af9199af9e69b889claireho * @param startIndex The input string (native) index at which to begin matching. 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if there is a match 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.8 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 76527f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UBool matches(int64_t startIndex, UErrorCode &status); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Attempts to match the input string, starting from the beginning of the region, 770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * against the pattern. Like the matches() method, this function 771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * always starts at the beginning of the input region; 772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * unlike that function, it does not require that the entire region be matched. 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>If the match succeeds then more information can be obtained via the <code>start()</code>, 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>end()</code>, and <code>group()</code> functions.</p> 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if there is a match at the start of the input string. 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UBool lookingAt(UErrorCode &status); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Attempts to match the input string, starting from the specified index, against the pattern. 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The match may be of any length, and is not required to extend to the end 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the input string. Contrast with match(). 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>If the match succeeds then more information can be obtained via the <code>start()</code>, 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>end()</code>, and <code>group()</code> functions.</p> 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 79227f654740f2a26ad62a5c155af9199af9e69b889claireho * @param startIndex The input string (native) index at which to begin matching. 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if there is a match. 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.8 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 79727f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UBool lookingAt(int64_t startIndex, UErrorCode &status); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Find the next pattern match in the input string. 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The find begins searching the input at the location following the end of 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the previous match, or at the start of the string if there is no previous match. 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If a match is found, <code>start(), end()</code> and <code>group()</code> 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will provide more information regarding the match. 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Note that if the input string is changed by the application, 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * use find(startPos, status) instead of find(), because the saved starting 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * position may not be valid with the altered input string.</p> 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if a match is found. 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UBool find(); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resets this RegexMatcher and then attempts to find the next substring of the 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * input string that matches the pattern, starting at the specified index. 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 81927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param start The (native) index in the input string to begin the search. 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if a match is found. 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 82427f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UBool find(int64_t start, UErrorCode &status); 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string containing the text matched by the previous match. 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the pattern can match an empty string, an empty string may be returned. 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Possible errors are U_REGEX_INVALID_STATE if no match 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * has been attempted or the last match failed. 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a string containing the matched input text. 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString group(UErrorCode &status) const; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string containing the text captured by the given group 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * during the previous match operation. Group(0) is the entire match. 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param groupNum the capture group number 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Possible errors are U_REGEX_INVALID_STATE if no match 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * has been attempted or the last match failed and 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the captured text 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 85527f654740f2a26ad62a5c155af9199af9e69b889claireho * Returns the number of capturing groups in this matcher's pattern. 85627f654740f2a26ad62a5c155af9199af9e69b889claireho * @return the number of capture groups 85727f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 2.4 85827f654740f2a26ad62a5c155af9199af9e69b889claireho */ 85927f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int32_t groupCount() const; 86027f654740f2a26ad62a5c155af9199af9e69b889claireho 86127f654740f2a26ad62a5c155af9199af9e69b889claireho 86227f654740f2a26ad62a5c155af9199af9e69b889claireho /** 86327f654740f2a26ad62a5c155af9199af9e69b889claireho * Returns a shallow clone of the entire live input string with the UText current native index 86427f654740f2a26ad62a5c155af9199af9e69b889claireho * set to the beginning of the requested group. 865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param dest The UText into which the input should be cloned, or NULL to create a new UText 86727f654740f2a26ad62a5c155af9199af9e69b889claireho * @param group_len A reference to receive the length of the desired capture group 86827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param status A reference to a UErrorCode to receive any errors. 86927f654740f2a26ad62a5c155af9199af9e69b889claireho * Possible errors are U_REGEX_INVALID_STATE if no match 87027f654740f2a26ad62a5c155af9199af9e69b889claireho * has been attempted or the last match failed and 87127f654740f2a26ad62a5c155af9199af9e69b889claireho * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 87227f654740f2a26ad62a5c155af9199af9e69b889claireho * @return dest if non-NULL, a shallow copy of the input text otherwise 87327f654740f2a26ad62a5c155af9199af9e69b889claireho * 874103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 87527f654740f2a26ad62a5c155af9199af9e69b889claireho */ 87627f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const; 87727f654740f2a26ad62a5c155af9199af9e69b889claireho 87827f654740f2a26ad62a5c155af9199af9e69b889claireho /** 879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns a shallow clone of the entire live input string with the UText current native index 880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * set to the beginning of the requested group. 881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param groupNum The capture group number. 883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param dest The UText into which the input should be cloned, or NULL to create a new UText. 884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param group_len A reference to receive the length of the desired capture group 885b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status A reference to a UErrorCode to receive any errors. 886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Possible errors are U_REGEX_INVALID_STATE if no match 887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * has been attempted or the last match failed and 888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return dest if non-NULL, a shallow copy of the input text otherwise 890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 891103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 89227f654740f2a26ad62a5c155af9199af9e69b889claireho */ 89327f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; 89427f654740f2a26ad62a5c155af9199af9e69b889claireho 89527f654740f2a26ad62a5c155af9199af9e69b889claireho /** 89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns a string containing the text captured by the given group 89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * during the previous match operation. Group(0) is the entire match. 89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param groupNum the capture group number 90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest A mutable UText in which the matching text is placed. 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If NULL, a new UText will be created (which may not be mutable). 90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Possible errors are U_REGEX_INVALID_STATE if no match 90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * has been attempted or the last match failed. 90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return A string containing the matched input text. If a pre-allocated UText 90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * was provided, it will always be used and returned. 90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @internal ICU 4.4 technology preview 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const; 91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index in the input string of the start of the text matched 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * during the previous match operation. 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status a reference to a UErrorCode to receive any errors. 91727f654740f2a26ad62a5c155af9199af9e69b889claireho * @return The (native) position in the input string of the start of the last match. 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t start(UErrorCode &status) const; 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 92227f654740f2a26ad62a5c155af9199af9e69b889claireho /** 923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns the index in the input string of the start of the text matched 924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * during the previous match operation. 925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status a reference to a UErrorCode to receive any errors. 926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return The (native) position in the input string of the start of the last match. 927103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 92827f654740f2a26ad62a5c155af9199af9e69b889claireho */ 92927f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t start64(UErrorCode &status) const; 93027f654740f2a26ad62a5c155af9199af9e69b889claireho 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index in the input string of the start of the text matched by the 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified capture group during the previous match operation. Return -1 if 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the capture group exists in the pattern, but was not part of the last match. 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param group the capture group number 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. Possible 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * errors are U_REGEX_INVALID_STATE if no match has been 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * attempted or the last match failed, and 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 94227f654740f2a26ad62a5c155af9199af9e69b889claireho * @return the (native) start position of substring matched by the specified group. 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t start(int32_t group, UErrorCode &status) const; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94727f654740f2a26ad62a5c155af9199af9e69b889claireho /** 948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns the index in the input string of the start of the text matched by the 949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * specified capture group during the previous match operation. Return -1 if 950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * the capture group exists in the pattern, but was not part of the last match. 951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param group the capture group number. 953b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status A reference to a UErrorCode to receive any errors. Possible 954b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * errors are U_REGEX_INVALID_STATE if no match has been 955b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * attempted or the last match failed, and 956b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 957b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return the (native) start position of substring matched by the specified group. 958103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 95927f654740f2a26ad62a5c155af9199af9e69b889claireho */ 96027f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t start64(int32_t group, UErrorCode &status) const; 96127f654740f2a26ad62a5c155af9199af9e69b889claireho 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index in the input string of the first character following the 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * text matched during the previous match operation. 966b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. Possible 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * errors are U_REGEX_INVALID_STATE if no match has been 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * attempted or the last match failed. 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the index of the last character matched, plus one. 97127f654740f2a26ad62a5c155af9199af9e69b889claireho * The index value returned is a native index, corresponding to 97227f654740f2a26ad62a5c155af9199af9e69b889claireho * code units for the underlying encoding type, for example, 973b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a byte index for UTF-8. 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t end(UErrorCode &status) const; 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97827f654740f2a26ad62a5c155af9199af9e69b889claireho /** 979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns the index in the input string of the first character following the 980b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * text matched during the previous match operation. 981b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 982b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status A reference to a UErrorCode to receive any errors. Possible 983b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * errors are U_REGEX_INVALID_STATE if no match has been 984b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * attempted or the last match failed. 985b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return the index of the last character matched, plus one. 986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The index value returned is a native index, corresponding to 987b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * code units for the underlying encoding type, for example, 988b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a byte index for UTF-8. 989103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 99027f654740f2a26ad62a5c155af9199af9e69b889claireho */ 99127f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t end64(UErrorCode &status) const; 99227f654740f2a26ad62a5c155af9199af9e69b889claireho 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index in the input string of the character following the 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * text matched by the specified capture group during the previous match operation. 997b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param group the capture group number 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. Possible 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * errors are U_REGEX_INVALID_STATE if no match has been 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * attempted or the last match failed and 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the index of the first character following the text 1004b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * captured by the specified group during the previous match operation. 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return -1 if the capture group exists in the pattern but was not part of the match. 100627f654740f2a26ad62a5c155af9199af9e69b889claireho * The index value returned is a native index, corresponding to 100727f654740f2a26ad62a5c155af9199af9e69b889claireho * code units for the underlying encoding type, for example, 100827f654740f2a26ad62a5c155af9199af9e69b889claireho * a byte index for UTF8. 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t end(int32_t group, UErrorCode &status) const; 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101327f654740f2a26ad62a5c155af9199af9e69b889claireho /** 1014b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns the index in the input string of the character following the 1015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * text matched by the specified capture group during the previous match operation. 1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 1017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param group the capture group number 1018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status A reference to a UErrorCode to receive any errors. Possible 1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * errors are U_REGEX_INVALID_STATE if no match has been 1020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * attempted or the last match failed and 1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 1022b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return the index of the first character following the text 1023b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * captured by the specified group during the previous match operation. 1024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return -1 if the capture group exists in the pattern but was not part of the match. 1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The index value returned is a native index, corresponding to 1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * code units for the underlying encoding type, for example, 1027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a byte index for UTF8. 1028103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 102927f654740f2a26ad62a5c155af9199af9e69b889claireho */ 103027f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t end64(int32_t group, UErrorCode &status) const; 103127f654740f2a26ad62a5c155af9199af9e69b889claireho 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resets this matcher. The effect is to remove any memory of previous matches, 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and to cause subsequent find() operations to begin at the beginning of 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input string. 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this RegexMatcher. 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexMatcher &reset(); 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resets this matcher, and set the current input position. 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The effect is to remove any memory of previous matches, 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and to cause subsequent find() operations to begin at 104827f654740f2a26ad62a5c155af9199af9e69b889claireho * the specified (native) position in the input string. 1049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The matcher's region is reset to its default, which is the entire 1051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * input string. 1052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * An alternative to this function is to set a match region 1054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * beginning at the desired index. 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this RegexMatcher. 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.8 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 105927f654740f2a26ad62a5c155af9199af9e69b889claireho virtual RegexMatcher &reset(int64_t index, UErrorCode &status); 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resets this matcher with a new input string. This allows instances of RegexMatcher 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to be reused, which is more efficient than creating a new RegexMatcher for 1065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * each input string to be processed. 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The new string on which subsequent pattern matches will operate. 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The matcher retains a reference to the callers string, and operates 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * directly on that. Ownership of the string remains with the caller. 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Because no copy of the string is made, it is essential that the 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * caller not delete the string until after regexp operations on it 1071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * are done. 107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that while a reset on the matcher with an input string that is then 107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * modified across/during matcher operations may be supported currently for UnicodeString, 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * this was not originally intended behavior, and support for this is not guaranteed 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * in upcoming versions of ICU. 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this RegexMatcher. 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexMatcher &reset(const UnicodeString &input); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Resets this matcher with a new input string. This allows instances of RegexMatcher 108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to be reused, which is more efficient than creating a new RegexMatcher for 108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * each input string to be processed. 108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param input The new string on which subsequent pattern matches will operate. 108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The matcher makes a shallow clone of the given text; ownership of the 108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * original string remains with the caller. Because no deep copy of the 108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * text is made, it is essential that the caller not modify the string 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * until after regexp operations on it are done. 109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return this RegexMatcher. 109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1093103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual RegexMatcher &reset(UText *input); 109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 109727f654740f2a26ad62a5c155af9199af9e69b889claireho 10982e615e9896b12236afe0ff2695e8afc2ee73f961claireho /** 10992e615e9896b12236afe0ff2695e8afc2ee73f961claireho * Set the subject text string upon which the regular expression is looking for matches 11002e615e9896b12236afe0ff2695e8afc2ee73f961claireho * without changing any other aspect of the matching state. 11012e615e9896b12236afe0ff2695e8afc2ee73f961claireho * The new and previous text strings must have the same content. 11022e615e9896b12236afe0ff2695e8afc2ee73f961claireho * 1103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This function is intended for use in environments where ICU is operating on 11042e615e9896b12236afe0ff2695e8afc2ee73f961claireho * strings that may move around in memory. It provides a mechanism for notifying 11052e615e9896b12236afe0ff2695e8afc2ee73f961claireho * ICU that the string has been relocated, and providing a new UText to access the 11062e615e9896b12236afe0ff2695e8afc2ee73f961claireho * string in its new position. 11072e615e9896b12236afe0ff2695e8afc2ee73f961claireho * 1108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Note that the regular expression implementation never copies the underlying text 1109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of a string being matched, but always operates directly on the original text 1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * provided by the user. Refreshing simply drops the references to the old text 1111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and replaces them with references to the new. 1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 11132e615e9896b12236afe0ff2695e8afc2ee73f961claireho * Caution: this function is normally used only by very specialized, 1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * system-level code. One example use case is with garbage collection that moves 1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * the text in memory. 11162e615e9896b12236afe0ff2695e8afc2ee73f961claireho * 11172e615e9896b12236afe0ff2695e8afc2ee73f961claireho * @param input The new (moved) text string. 11182e615e9896b12236afe0ff2695e8afc2ee73f961claireho * @param status Receives errors detected by this function. 11192e615e9896b12236afe0ff2695e8afc2ee73f961claireho * 1120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 11212e615e9896b12236afe0ff2695e8afc2ee73f961claireho */ 11222e615e9896b12236afe0ff2695e8afc2ee73f961claireho virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status); 11232e615e9896b12236afe0ff2695e8afc2ee73f961claireho 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Cause a compilation error if an application accidentally attempts to 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * reset a matcher with a (UChar *) string as input rather than 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a UnicodeString. Avoids a dangling reference to a temporary string. 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * To efficiently work with UChar *strings, wrap the data in a UnicodeString 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * using one of the aliasing constructors, such as 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> 113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or in a UText, using 113450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @internal 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher &reset(const UChar *input); 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 114250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the input string being matched. Ownership of the string belongs to 114350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the matcher; it should not be altered or deleted. This method will work even if the input 114450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * was originally supplied as a UText. 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the input string 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const UnicodeString &input() const; 1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 115050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 115150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the input string being matched. This is the live input text; it should not be 115250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * altered or deleted. This method will work even if the input was originally supplied as 115350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a UnicodeString. 115450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the input text 115550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1156103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 115750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UText *inputText() const; 115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 116050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 116150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the input string being matched, either by copying it into the provided 116250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * UText parameter or by returning a shallow clone of the live input. Note that copying 116350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the entire input may cause significant performance and memory issues. 116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest The UText into which the input should be copied, or NULL to create a new UText 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status error code 116650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest if non-NULL, a shallow copy of the input text otherwise 116750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 116950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 117027f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UText *getInput(UText *dest, UErrorCode &status) const; 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** Sets the limits of this matcher's region. 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The region is the part of the input string that will be searched to find a match. 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Invoking this method resets the matcher, and then sets the region to start 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * at the index specified by the start parameter and end at the index specified 1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * by the end parameter. 1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Depending on the transparency and anchoring being used (see useTransparentBounds 1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * and useAnchoringBounds), certain constructs such as anchors may behave differently 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * at or around the boundaries of the region 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The function will fail if start is greater than limit, or if either index 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * is less than zero or greater than the length of the string being matched. 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 118627f654740f2a26ad62a5c155af9199af9e69b889claireho * @param start The (native) index to begin searches at. 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param limit The index to end searches at (exclusive). 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 119127f654740f2a26ad62a5c155af9199af9e69b889claireho virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status); 1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 119327f654740f2a26ad62a5c155af9199af9e69b889claireho /** 119427f654740f2a26ad62a5c155af9199af9e69b889claireho * Identical to region(start, limit, status) but also allows a start position without 119527f654740f2a26ad62a5c155af9199af9e69b889claireho * resetting the region state. 1196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param regionStart The region start 1197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param regionLimit the limit of the region 119827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param startIndex The (native) index within the region bounds at which to begin searches. 119927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param status A reference to a UErrorCode to receive any errors. 120027f654740f2a26ad62a5c155af9199af9e69b889claireho * If startIndex is not within the specified region bounds, 120127f654740f2a26ad62a5c155af9199af9e69b889claireho * U_INDEX_OUTOFBOUNDS_ERROR is returned. 1202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 120327f654740f2a26ad62a5c155af9199af9e69b889claireho */ 120427f654740f2a26ad62a5c155af9199af9e69b889claireho virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status); 1205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Reports the start index of this matcher's region. The searches this matcher 1208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * conducts are limited to finding matches within regionStart (inclusive) and 1209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * regionEnd (exclusive). 1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 121127f654740f2a26ad62a5c155af9199af9e69b889claireho * @return The starting (native) index of this matcher's region. 1212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual int32_t regionStart() const; 1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 121627f654740f2a26ad62a5c155af9199af9e69b889claireho /** 1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Reports the start index of this matcher's region. The searches this matcher 1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * conducts are limited to finding matches within regionStart (inclusive) and 1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * regionEnd (exclusive). 1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return The starting (native) index of this matcher's region. 1222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 122427f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t regionStart64() const; 122527f654740f2a26ad62a5c155af9199af9e69b889claireho 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Reports the end (limit) index (exclusive) of this matcher's region. The searches 1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * this matcher conducts are limited to finding matches within regionStart 1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * (inclusive) and regionEnd (exclusive). 1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 123227f654740f2a26ad62a5c155af9199af9e69b889claireho * @return The ending point (native) of this matcher's region. 1233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual int32_t regionEnd() const; 1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 123727f654740f2a26ad62a5c155af9199af9e69b889claireho /** 1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Reports the end (limit) index (exclusive) of this matcher's region. The searches 1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * this matcher conducts are limited to finding matches within regionStart 1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * (inclusive) and regionEnd (exclusive). 1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return The ending point (native) of this matcher's region. 1243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 124527f654740f2a26ad62a5c155af9199af9e69b889claireho virtual int64_t regionEnd64() const; 124627f654740f2a26ad62a5c155af9199af9e69b889claireho 1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Queries the transparency of region bounds for this matcher. 1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * See useTransparentBounds for a description of transparent and opaque bounds. 1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * By default, a matcher uses opaque region boundaries. 1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return TRUE if this matcher is using opaque bounds, false if it is not. 1253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual UBool hasTransparentBounds() const; 1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Sets the transparency of region bounds for this matcher. 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Invoking this function with an argument of true will set this matcher to use transparent bounds. 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If the boolean argument is false, then opaque bounds will be used. 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Using transparent bounds, the boundaries of this matcher's region are transparent 1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * to lookahead, lookbehind, and boundary matching constructs. Those constructs can 1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * see text beyond the boundaries of the region while checking for a match. 1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * With opaque bounds, no text outside of the matcher's region is visible to lookahead, 1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * lookbehind, and boundary matching constructs. 1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * By default, a matcher uses opaque bounds. 1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param b TRUE for transparent bounds; FALSE for opaque bounds 1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return This Matcher; 1273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru **/ 1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual RegexMatcher &useTransparentBounds(UBool b); 1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Return true if this matcher is using anchoring bounds. 1280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * By default, matchers use anchoring region bounds. 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return TRUE if this matcher is using anchoring bounds. 1283b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual UBool hasAnchoringBounds() const; 1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Set whether this matcher is using Anchoring Bounds for its region. 1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * With anchoring bounds, pattern anchors such as ^ and $ will match at the start 1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * and end of the region. Without Anchoring Bounds, anchors will only match at 1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * the positions they would in the complete text. 1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Anchoring Bounds are the default for regions. 1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param b TRUE if to enable anchoring bounds; FALSE to disable them. 1297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return This Matcher 1298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual RegexMatcher &useAnchoringBounds(UBool b); 1301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Return TRUE if the most recent matching operation attempted to access 1305103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * additional input beyond the available input text. 1306103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * In this case, additional input text could change the results of the match. 1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * hitEnd() is defined for both successful and unsuccessful matches. 1309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * In either case hitEnd() will return TRUE if if the end of the text was 1310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * reached at any point during the matching process. 1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return TRUE if the most recent match hit the end of input 1313b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual UBool hitEnd() const; 1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Return TRUE the most recent match succeeded and additional input could cause 1319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * it to fail. If this method returns false and a match was found, then more input 1320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * might change the match but the match won't be lost. If a match was not found, 1321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * then requireEnd has no meaning. 1322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return TRUE if more input could cause the most recent match to no longer match. 1324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual UBool requireEnd() const; 1327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the pattern that is interpreted by this matcher. 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the RegexPattern for this RegexMatcher 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const RegexPattern &pattern() const; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Replaces every substring of the input that matches the pattern 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the given replacement string. This is a convenience function that 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * provides a complete find-and-replace-all operation. 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This method first resets this matcher. It then scans the input string 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * looking for matches of the pattern. Input that is not part of any 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * match is left unchanged; each match is replaced in the result by the 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * replacement string. The replacement string may contain references to 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * capture groups. 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param replacement a string containing the replacement text. 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status a reference to a UErrorCode to receive any errors. 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a string containing the results of the find and replace. 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Replaces every substring of the input that matches the pattern 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * with the given replacement string. This is a convenience function that 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * provides a complete find-and-replace-all operation. 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This method first resets this matcher. It then scans the input string 136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * looking for matches of the pattern. Input that is not part of any 136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * match is left unchanged; each match is replaced in the result by the 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replacement string. The replacement string may contain references to 136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * capture groups. 136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param replacement a string containing the replacement text. 136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest a mutable UText in which the results are placed. 136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If NULL, a new UText will be created (which may not be mutable). 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status a reference to a UErrorCode to receive any errors. 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return a string containing the results of the find and replace. 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If a pre-allocated UText was provided, it will always be used and returned. 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status); 137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Replaces the first substring of the input that matches 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the pattern with the replacement string. This is a convenience 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * function that provides a complete find-and-replace operation. 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>This function first resets this RegexMatcher. It then scans the input string 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * looking for a match of the pattern. Input that is not part 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the match is appended directly to the result string; the match is replaced 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in the result by the replacement string. The replacement string may contain 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * references to captured groups.</p> 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The state of the matcher (the position at which a subsequent find() 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * would begin) after completing a replaceFirst() is not specified. The 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * RegexMatcher should be reset before doing additional find() operations.</p> 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param replacement a string containing the replacement text. 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status a reference to a UErrorCode to receive any errors. 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a string containing the results of the find and replace. 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Replaces the first substring of the input that matches 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the pattern with the replacement string. This is a convenience 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function that provides a complete find-and-replace operation. 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>This function first resets this RegexMatcher. It then scans the input string 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * looking for a match of the pattern. Input that is not part 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of the match is appended directly to the result string; the match is replaced 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * in the result by the replacement string. The replacement string may contain 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * references to captured groups.</p> 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>The state of the matcher (the position at which a subsequent find() 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would begin) after completing a replaceFirst() is not specified. The 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * RegexMatcher should be reset before doing additional find() operations.</p> 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param replacement a string containing the replacement text. 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest a mutable UText in which the results are placed. 141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If NULL, a new UText will be created (which may not be mutable). 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status a reference to a UErrorCode to receive any errors. 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return a string containing the results of the find and replace. 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If a pre-allocated UText was provided, it will always be used and returned. 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status); 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implements a replace operation intended to be used as part of an 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * incremental find-and-replace. 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The input string, starting from the end of the previous replacement and ending at 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the start of the current match, is appended to the destination string. Then the 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * replacement string is appended to the output string, 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * including handling any substitutions of captured text.</p> 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>For simple, prepackaged, non-incremental find-and-replace 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operations, see replaceFirst() or replaceAll().</p> 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest A UnicodeString to which the results of the find-and-replace are appended. 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param replacement A UnicodeString that provides the text to be substituted for 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input text that matched the regexp pattern. The replacement 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * text may contain references to captured text from the 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * input. 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. Possible 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * errors are U_REGEX_INVALID_STATE if no match has been 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the replacement text specifies a capture group that 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * does not exist in the pattern. 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this RegexMatcher 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual RegexMatcher &appendReplacement(UnicodeString &dest, 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &replacement, UErrorCode &status); 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Implements a replace operation intended to be used as part of an 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * incremental find-and-replace. 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>The input string, starting from the end of the previous replacement and ending at 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the start of the current match, is appended to the destination string. Then the 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * replacement string is appended to the output string, 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * including handling any substitutions of captured text.</p> 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * <p>For simple, prepackaged, non-incremental find-and-replace 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * operations, see replaceFirst() or replaceAll().</p> 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest A mutable UText to which the results of the find-and-replace are appended. 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Must not be NULL. 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param replacement A UText that provides the text to be substituted for 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the input text that matched the regexp pattern. The replacement 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * text may contain references to captured text from the input. 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. Possible 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * errors are U_REGEX_INVALID_STATE if no match has been 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR 148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * if the replacement text specifies a capture group that 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * does not exist in the pattern. 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return this RegexMatcher 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1485103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual RegexMatcher &appendReplacement(UText *dest, 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replacement, UErrorCode &status); 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * As the final step in a find-and-replace operation, append the remainder 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the input string, starting at the position following the last appendReplacement(), 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to the destination string. <code>appendTail()</code> is intended to be invoked after one 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or more invocations of the <code>RegexMatcher::appendReplacement()</code>. 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest A UnicodeString to which the results of the find-and-replace are appended. 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the destination string. 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString &appendTail(UnicodeString &dest); 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * As the final step in a find-and-replace operation, append the remainder 150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of the input string, starting at the position following the last appendReplacement(), 150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to the destination string. <code>appendTail()</code> is intended to be invoked after one 150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or more invocations of the <code>RegexMatcher::appendReplacement()</code>. 150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 151050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest A mutable UText to which the results of the find-and-replace are appended. 151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Must not be NULL. 1512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status error cod 151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the destination string. 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1515103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 151727f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UText *appendTail(UText *dest, UErrorCode &status); 151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Split a string into fields. Somewhat like split() from Perl. 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The pattern matches identify delimiters that separate the input 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * into fields. The input data between the matches becomes the 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * fields themselves. 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param input The string to be split into fields. The field delimiters 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * match the pattern (in the "this" object). This matcher 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will be reset to this input string. 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest An array of UnicodeStrings to receive the results of the split. 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is an array of actual UnicodeString objects, not an 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * array of pointers to strings. Local (stack based) arrays can 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * work well here. 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity The number of elements in the destination array. 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the number of fields found is less than destCapacity, the 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * extra strings in the destination array are not altered. 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the number of destination strings is less than the number 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of fields, the trailing part of the input string, including any 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * field delimiters, is placed in the last destination string. 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return The number of fields into which the input string was split. 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.6 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t split(const UnicodeString &input, 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest[], 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status); 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Split a string into fields. Somewhat like split() from Perl. 155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The pattern matches identify delimiters that separate the input 155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * into fields. The input data between the matches becomes the 155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * fields themselves. 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param input The string to be split into fields. The field delimiters 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * match the pattern (in the "this" object). This matcher 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will be reset to this input string. 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest An array of mutable UText structs to receive the results of the split. 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If a field is NULL, a new UText is allocated to contain the results for 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that field. This new UText is not guaranteed to be mutable. 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param destCapacity The number of elements in the destination array. 156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the number of fields found is less than destCapacity, the 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * extra strings in the destination array are not altered. 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the number of destination strings is less than the number 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of fields, the trailing part of the input string, including any 156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * field delimiters, is placed in the last destination string. 156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status A reference to a UErrorCode to receive any errors. 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return The number of fields into which the input string was split. 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 1570103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t split(UText *input, 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status); 157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Set a processing time limit for match operations with this Matcher. 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Some patterns, when matching certain strings, can run in exponential time. 1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * For practical purposes, the match operation may appear to be in an 1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * infinite loop. 1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * When a limit is set a match operation will fail with an error if the 1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * limit is exceeded. 1585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The units of the limit are steps of the match engine. 1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Correspondence with actual processor time will depend on the speed 1588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * of the processor and the details of the specific pattern, but will 1589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * typically be on the order of milliseconds. 1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * By default, the matching time is not limited. 1592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param limit The limit value, or 0 for no limit. 1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void setTimeLimit(int32_t limit, UErrorCode &status); 1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Get the time limit, if any, for match operations made with this Matcher. 1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return the maximum allowed time for a match, in units of processing steps. 1604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual int32_t getTimeLimit() const; 1607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1609b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Set the amount of heap storage available for use by the match backtracking stack. 1610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The matcher is also reset, discarding any results from previous matches. 1611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * ICU uses a backtracking regular expression engine, with the backtrack stack 1613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * maintained on the heap. This function sets the limit to the amount of memory 1614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * that can be used for this purpose. A backtracking stack overflow will 1615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * result in an error from the match operation that caused it. 1616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A limit is desirable because a malicious or poorly designed pattern can use 1618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * excessive memory, potentially crashing the process. A limit is enabled 1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * by default. 1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p> 1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param limit The maximum size, in bytes, of the matching backtrack stack. 1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * A value of zero means no limit. 1623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The limit must be greater or equal to zero. 1624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void setStackLimit(int32_t limit, UErrorCode &status); 1630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Get the size of the heap storage available for use by the back tracking stack. 1633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @return the maximum backtracking stack size, in bytes, or zero if the 1635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * stack size is unlimited. 1636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual int32_t getStackLimit() const; 1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Set a callback function for use with this Matcher. 1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * During matching operations the function will be called periodically, 1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * giving the application the opportunity to terminate a long-running 1645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * match. 1646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param callback A pointer to the user-supplied callback function. 1648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param context User context pointer. The value supplied at the 1649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * time the callback function is set will be saved 1650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * and passed to the callback each time that it is called. 1651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void setMatchCallback(URegexMatchCallback *callback, 1655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *context, 1656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode &status); 1657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** 1660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Get the callback function for this URegularExpression. 1661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1662b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param callback Out parameter, receives a pointer to the user-supplied 1663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * callback function. 1664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param context Out parameter, receives the user context pointer that 1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * was set when uregex_setMatchCallback() was called. 1666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * @param status A reference to a UErrorCode to receive any errors. 1667b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @stable ICU 4.0 1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void getMatchCallback(URegexMatchCallback *&callback, 1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *&context, 1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode &status); 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 167427f654740f2a26ad62a5c155af9199af9e69b889claireho /** 167527f654740f2a26ad62a5c155af9199af9e69b889claireho * Set a progress callback function for use with find operations on this Matcher. 167627f654740f2a26ad62a5c155af9199af9e69b889claireho * During find operations, the callback will be invoked after each return from a 167727f654740f2a26ad62a5c155af9199af9e69b889claireho * match attempt, giving the application the opportunity to terminate a long-running 167827f654740f2a26ad62a5c155af9199af9e69b889claireho * find operation. 167927f654740f2a26ad62a5c155af9199af9e69b889claireho * 168027f654740f2a26ad62a5c155af9199af9e69b889claireho * @param callback A pointer to the user-supplied callback function. 168127f654740f2a26ad62a5c155af9199af9e69b889claireho * @param context User context pointer. The value supplied at the 168227f654740f2a26ad62a5c155af9199af9e69b889claireho * time the callback function is set will be saved 168327f654740f2a26ad62a5c155af9199af9e69b889claireho * and passed to the callback each time that it is called. 168427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param status A reference to a UErrorCode to receive any errors. 1685103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 168627f654740f2a26ad62a5c155af9199af9e69b889claireho */ 168727f654740f2a26ad62a5c155af9199af9e69b889claireho virtual void setFindProgressCallback(URegexFindProgressCallback *callback, 168827f654740f2a26ad62a5c155af9199af9e69b889claireho const void *context, 168927f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status); 169027f654740f2a26ad62a5c155af9199af9e69b889claireho 169127f654740f2a26ad62a5c155af9199af9e69b889claireho 169227f654740f2a26ad62a5c155af9199af9e69b889claireho /** 169327f654740f2a26ad62a5c155af9199af9e69b889claireho * Get the find progress callback function for this URegularExpression. 169427f654740f2a26ad62a5c155af9199af9e69b889claireho * 1695b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param callback Out parameter, receives a pointer to the user-supplied 169627f654740f2a26ad62a5c155af9199af9e69b889claireho * callback function. 169727f654740f2a26ad62a5c155af9199af9e69b889claireho * @param context Out parameter, receives the user context pointer that 169827f654740f2a26ad62a5c155af9199af9e69b889claireho * was set when uregex_setFindProgressCallback() was called. 169927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param status A reference to a UErrorCode to receive any errors. 1700103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 170127f654740f2a26ad62a5c155af9199af9e69b889claireho */ 170227f654740f2a26ad62a5c155af9199af9e69b889claireho virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, 170327f654740f2a26ad62a5c155af9199af9e69b889claireho const void *&context, 170427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status); 170527f654740f2a26ad62a5c155af9199af9e69b889claireho 1706103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * setTrace Debug function, enable/disable tracing of the matching engine. 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For internal ICU development use only. DO NO USE!!!! 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @internal 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void setTrace(UBool state); 1713103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.2 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.2 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Constructors and other object boilerplate are private. 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Instances of RegexMatcher can not be assigned, copied, cloned, etc. 1732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher(); // default constructor not implemented 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher(const RegexPattern *pat); 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher(const RegexMatcher &other); 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher &operator =(const RegexMatcher &rhs); 1736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void init(UErrorCode &status); // Common initialization 173750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void init2(UText *t, UErrorCode &e); // Common initialization, part 2. 1738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class RegexPattern; 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class RegexCImpl; 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 1742103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /** @internal */ 1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void resetPreserveRegion(); // Reset matcher state, but preserve any region. 1745103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 1746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // MatchAt This is the internal interface to the match engine itself. 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match status comes back in matcher member variables. 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status); 175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inline void backTrack(int64_t &inputIdx, int32_t &patIdx); 175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isWordBoundary(int64_t pos); // perform Perl-like \b test 175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REStackFrame *resetStack(); 175750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); 1758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void IncrementTime(UErrorCode &status); 175927f654740f2a26ad62a5c155af9199af9e69b889claireho UBool ReportFindProgress(int64_t matchIndex, UErrorCode &status); 176050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; 176250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 176350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool findUsingChunk(); 176450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); 176550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isChunkWordBoundary(int32_t pos); 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const RegexPattern *fPattern; 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the pattern, and 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // should delete it when through. 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 177150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString *fInput; // The string being matched. Only used for input() 177250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *fInputText; // The text being matched. Is never NULL. 177350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *fAltInputText; // A shallow copy of the text being matched. 177450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only created if the pattern contains backreferences. 177550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fInputLength; // Full length of the input text. 1776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t fFrameSize; // The size of a frame in the backtrack stack. 1777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 177850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fRegionStart; // Start of the input region, default = 0. 177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fRegionLimit; // End of input region, default to input.length. 1780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $). 178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fAnchorLimit; // See useAnchoringBounds 1783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 178450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fLookStart; // Region bounds for look-ahead/behind and 178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fLookLimit; // and other boundary tests. See 1786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // useTransparentBounds 1787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 178850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fActiveStart; // Currently active bounds for matching. 178950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fActiveLimit; // Usually is the same as region, but 1790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is changed to fLookStart/Limit when 1791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // entering look around regions. 1792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fTransparentBounds; // True if using transparent bounds. 1794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fAnchoringBounds; // True if using anchoring bounds. 1795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fMatch; // True if the last attempted match was successful. 179750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fMatchStart; // Position of the start of the most recent match 179850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fMatchEnd; // First position after the end of the most recent match 1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Zero if no previous match, even when a region 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // is active. 180150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fLastMatchEnd; // First position after the end of the previous match, 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or -1 if there was no previous match. 180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fAppendPosition; // First position after the end of the previous 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // appendReplacement(). As described by the 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // JavaDoc for Java Matcher, where it is called 1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // "append position" 1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fHitEnd; // True if the last match touched the end of input. 1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fRequireEnd; // True if the last match required end-of-input 1809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (matched $ or Z) 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector64 *fStack; 1812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru REStackFrame *fFrame; // After finding a match, the last active stack frame, 1813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // which will contain the capture group results. 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NOT valid while match engine is running. 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *fData; // Data area for use by the compiled pattern. 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t fSmallData[8]; // Use this for data if it's enough. 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t fTimeLimit; // Max time (in arbitrary steps) to let the 1820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match engine run. Zero for unlimited. 1821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t fTime; // Match time, accumulates while matching. 1823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves. 1824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Kept separately from fTime to keep as much 1825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // code as possible out of the inline 1826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // StateSave function. 1827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t fStackLimit; // Maximum memory size to use for the backtrack 1829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // stack, in bytes. Zero for unlimited. 1830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct. 1832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // NULL if there is no callback. 1833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const void *fCallbackContext; // User Context ptr for callback function. 1834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 183527f654740f2a26ad62a5c155af9199af9e69b889claireho URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct. 183627f654740f2a26ad62a5c155af9199af9e69b889claireho // NULL if there is no callback. 183727f654740f2a26ad62a5c155af9199af9e69b889claireho const void *fFindProgressCallbackContext; // User Context ptr for callback function. 183827f654740f2a26ad62a5c155af9199af9e69b889claireho 183927f654740f2a26ad62a5c155af9199af9e69b889claireho 184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility. 184150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fTraceDebug; // Set true for debug tracing of match engine. 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode fDeferredStatus; // Save error state that cannot be immediately 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reported, or that permanently disables this matcher. 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator *fWordBreakItr; 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // UCONFIG_NO_REGULAR_EXPRESSIONS 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1853