regex.h revision b0ac937921a2c196d8b9da665135bf6ba01a1ccf
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   Copyright (C) 2002-2009, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  regex.h
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2002oct22
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Andy Heninger
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   ICU Regular Expressions, API for C++
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef REGEX_H
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define REGEX_H
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//#define REGEX_DEBUG
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \file
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \brief  C++ API:  Regular Expressions
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <h2>Regular Expression API</h2>
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The ICU API for processing regular expressions consists of two classes,
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  <code>RegexPattern</code> and <code>RegexMatcher</code>.
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  <code>RegexPattern</code> objects represent a pre-processed, or compiled
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  regular expression.  They are created from a regular expression pattern string,
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  and can be used to create <code>RegexMatcher</code> objects for the pattern.</p>
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Class <code>RegexMatcher</code> bundles together a regular expression
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  pattern and a target string to which the search pattern will be applied.
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  <code>RegexMatcher</code> includes API for doing plain find or search
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  operations, for search and replace operations, and for obtaining detailed
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  information about bounds of a match. </p>
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * <p>Note that by constructing <code>RegexMatcher</code> objects directly from regular
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * expression pattern strings application code can be simplified and the explicit
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * need for <code>RegexPattern</code> objects can usually be eliminated.
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * </p>
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uobject.h"
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parseerr.h"
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uregex.h"
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Forward Declarations...
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RegexMatcher;
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RegexPattern;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UVector;
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UVector32;
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSet;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct REStackFrame;
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Regex8BitSet;
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass  RuleBasedBreakIterator;
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass  RegexCImpl;
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   RBBIPatternDump   Debug function, displays the compiled form of a pattern.
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   @internal
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef REGEX_DEBUG
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_INTERNAL void U_EXPORT2
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPatternDump(const RegexPattern *pat);
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #define RegexPatternDump(pat)
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * factory methods for creating a RegexPattern object from the source (string) form
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * of a regular expression, methods for creating RegexMatchers that allow the pattern
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * to be applied to input text, and a few convenience methods for simple common
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * uses of regular expressions.
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  *
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * <p>Class RegexPattern is not intended to be subclassed.</p>
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  *
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * @stable ICU 2.4
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  */
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_I18N_API RegexPattern: public UObject {
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * default constructor.  Create a RegexPattern object that refers to no actual
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   pattern.  Not normally needed; RegexPattern objects are usually
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   created using the factory method <code>compile()</code>.
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern();
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Copy Constructor.  Create a new RegexPattern object that is equivalent
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                    to the source object.
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param source the pattern object to be copied.
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern(const RegexPattern &source);
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Destructor.  Note that a RegexPattern object must persist so long as any
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  RegexMatcher objects that were created from the RegexPattern are active.
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual ~RegexPattern();
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Comparison operator.  Two RegexPattern objects are considered equal if they
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * were constructed from identical source patterns using the same match flag
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * settings.
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param that a RegexPattern object to compare with "this".
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @return TRUE if the objects are equivalent.
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool           operator==(const RegexPattern& that) const;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Comparison operator.  Two RegexPattern objects are considered equal if they
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * were constructed from identical source patterns using the same match flag
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * settings.
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param that a RegexPattern object to compare with "this".
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @return TRUE if the objects are different.
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline UBool    operator!=(const RegexPattern& that) const {return ! operator ==(that);};
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Assignment operator.  After assignment, this RegexPattern will behave identically
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *     to the source object.
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern  &operator =(const RegexPattern &source);
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Create an exact copy of this RegexPattern object.  Since RegexPattern is not
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * intended to be subclasses, <code>clone()</code> and the copy construction are
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * equivalent operations.
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @return the copy of this RegexPattern
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexPattern  *clone() const;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Compiles the regular expression in string form into a RegexPattern
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * object.  These compile methods, rather than the constructors, are the usual
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * way that RegexPattern objects are created.
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * objects created from the pattern are active.  RegexMatchers keep a pointer
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * back to their pattern, so premature deletion of the pattern is a
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * catastrophic error.</p>
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>All pattern match mode flags are set to their default values.</p>
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that it is often more convenient to construct a RegexMatcher directly
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    from a pattern string rather than separately compiling the pattern and
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    then creating a RegexMatcher object from the pattern.</p>
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param regex The regular expression to be compiled.
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param pe    Receives the position (line and column nubers) of any error
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *              within the regular expression.)
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status A reference to a UErrorCode to receive any errors.
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return      A regexPattern object for the compiled pattern.
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.4
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError          &pe,
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode           &status);
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Compiles the regular expression in string form into a RegexPattern
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * object using the specified match mode flags.  These compile methods,
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * rather than the constructors, are the usual way that RegexPattern objects
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * are created.
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * objects created from the pattern are active.  RegexMatchers keep a pointer
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * back to their pattern, so premature deletion of the pattern is a
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * catastrophic error.</p>
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that it is often more convenient to construct a RegexMatcher directly
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    from a pattern string instead of than separately compiling the pattern and
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    then creating a RegexMatcher object from the pattern.</p>
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param regex The regular expression to be compiled.
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param flags The match mode flags to be used.
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param pe    Receives the position (line and column nubers) of any error
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *              within the regular expression.)
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status   A reference to a UErrorCode to receive any errors.
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return      A regexPattern object for the compiled pattern.
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.4
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t             flags,
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError          &pe,
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode           &status);
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Compiles the regular expression in string form into a RegexPattern
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * object using the specified match mode flags.  These compile methods,
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * rather than the constructors, are the usual way that RegexPattern objects
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * are created.
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * objects created from the pattern are active.  RegexMatchers keep a pointer
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * back to their pattern, so premature deletion of the pattern is a
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * catastrophic error.</p>
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>Note that it is often more convenient to construct a RegexMatcher directly
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    from a pattern string instead of than separately compiling the pattern and
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    then creating a RegexMatcher object from the pattern.</p>
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param regex The regular expression to be compiled.
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param flags The match mode flags to be used.
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status   A reference to a UErrorCode to receive any errors.
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return      A regexPattern object for the compiled pattern.
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.6
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t             flags,
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode           &status);
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Get the match mode flags that were used when compiling this pattern.
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return  the match mode flags
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.4
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual uint32_t flags() const;
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Creates a RegexMatcher that will match the given input against this pattern.  The
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * RegexMatcher can then be used to perform match, find or replace operations
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * on the input.  Note that a RegexPattern object must not be deleted while
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * RegexMatchers created from it still exist and might possibly be used again.
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * The matcher will retain a reference to the supplied input string, and all regexp
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * pattern matching operations happen directly on this original string.  It is
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * critical that the string not be altered or deleted before use by the regular
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * expression operations is complete.
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param input    The input string to which the regular expression will be applied.
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status   A reference to a UErrorCode to receive any errors.
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return         A RegexMatcher object for this pattern and input.
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.4
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher *matcher(const UnicodeString &input,
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          &status) const;
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Cause a compilation error if an application accidently attempts to
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   create a matcher with a (UChar *) string as input rather than
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   a UnicodeString.  Avoids a dangling reference to a temporary string.
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <p>
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * using one of the aliasing constructors, such as
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @internal
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher(const UChar *input,
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode          &status) const;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Creates a RegexMatcher that will match against this pattern.  The
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * RegexMatcher can be used to perform match, find or replace operations.
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Note that a RegexPattern object must not be deleted while
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * RegexMatchers created from it still exist and might possibly be used again.
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status   A reference to a UErrorCode to receive any errors.
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return      A RegexMatcher object for this pattern and input.
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.6
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher *matcher(UErrorCode  &status) const;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Test whether a string matches a regular expression.  This convenience function
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * both compiles the reguluar expression and applies it in a single operation.
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Note that if the same pattern needs to be applied repeatedly, this method will be
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * less efficient than creating and reusing a RegexMatcher object.
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param regex The regular expression
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param input The string data to be matched
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param pe Receives the position of any syntax errors within the regular expression
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @param status A reference to a UErrorCode to receive any errors.
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @return True if the regular expression exactly matches the full input string.
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.4
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static UBool U_EXPORT2 matches(const UnicodeString   &regex,
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UnicodeString   &input,
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError     &pe,
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode      &status);
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Returns the regular expression from which this pattern was compiled.
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString pattern() const;
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Split a string into fields.  Somewhat like split() from Perl.
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The pattern matches identify delimiters that separate the input
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  into fields.  The input data between the matches becomes the
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  fields themselves.
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <p>
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  For the best performance on split() operations,
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  <code>RegexMatcher::split</code> is perferable to this function
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param input   The string to be split into fields.  The field delimiters
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                match the pattern (in the "this" object)
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param dest    An array of UnicodeStrings to receive the results of the split.
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                This is an array of actual UnicodeString objects, not an
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                array of pointers to strings.  Local (stack based) arrays can
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                work well here.
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param destCapacity  The number of elements in the destination array.
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                If the number of fields found is less than destCapacity, the
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                extra strings in the destination array are not altered.
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                If the number of destination strings is less than the number
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                of fields, the trailing part of the input string, including any
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                field delimiters, is placed in the last destination string.
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param status  A reference to a UErrorCode to receive any errors.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @return        The number of fields into which the input string was split.
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t  split(const UnicodeString &input,
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString    dest[],
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t          destCapacity,
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode       &status) const;
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UClassID getDynamicClassID() const;
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for this class.
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.4
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static UClassID U_EXPORT2 getStaticClassID();
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Implementation Data
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString   fPattern;      // The original pattern string.
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t        fFlags;        // The flags used when compiling the pattern.
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector32       *fCompiledPat; // The compiled pattern p-code.
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString   fLiteralText;  // Any literal string data from the pattern,
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   after un-escaping, for use during the match.
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Regex8BitSet    *fSets8;       //      (and fast sets for latin-1 range.)
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode      fDeferredStatus; // status if some prior error has left this
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //  RegexPattern in an unusable state.
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fMinMatchLen;  // Minimum Match Length.  All matches will have length
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   >= this value.  For some patterns, this calculated
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   value may be less than the true shortest
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   possible match.
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fFrameSize;    // Size of a state stack frame in the
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   execution engine.
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fDataSize;     // The size of the data needed by the pattern that
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   does not go on the state stack, but has just
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   a single copy per matcher.
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector32       *fGroupMap;    // Map from capture group number to position of
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   the group's variables in the matcher stack frame.
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fMaxCaptureDigits;
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet     **fStaticSets;  // Ptr to static (shared) sets for predefined
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //   regex character classes, e.g. Word.
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Regex8BitSet   *fStaticSets8;  // Ptr to the static (shared) latin-1 only
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   //  sets for predefined regex classes.
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fStartType;    // Info on how a match must start.
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fInitialStringIdx;     //
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t         fInitialStringLen;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet     *fInitialChars;
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32         fInitialChar;
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Regex8BitSet   *fInitialChars8;
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class RegexCompile;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class RegexMatcher;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class RegexCImpl;
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Implementation Methods
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void        init();            // Common initialization, for use by constructors.
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void        zap();             // Common cleanup
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef REGEX_DEBUG
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void        dumpOp(int32_t index) const;
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend     void U_EXPORT2 RegexPatternDump(const RegexPattern *);
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  class RegexMatcher bundles together a reular expression pattern and
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  input text to which the expression can be applied.  It includes methods
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  for testing for matches, and for find and replace operations.
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Class RegexMatcher is not intended to be subclassed.</p>
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.4
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_I18N_API RegexMatcher: public UObject {
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * Construct a RegexMatcher for a regular expression.
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * This is a convenience method that avoids the need to explicitly create
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * a RegexPattern object.  Note that if several RegexMatchers need to be
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * created for the same expression, it will be more efficient to
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * separately create and cache a RegexPattern object, and use
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * its matcher() method to create the RegexMatcher objects.
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param regexp The Regular Expression to be compiled.
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param flags  Regular expression options, such as case insensitive matching.
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *                @see UREGEX_CASE_INSENSITIVE
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param status Any errors are reported by setting this UErrorCode variable.
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @stable ICU 2.6
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      */
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * Construct a RegexMatcher for a regular expression.
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * This is a convenience method that avoids the need to explicitly create
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * a RegexPattern object.  Note that if several RegexMatchers need to be
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * created for the same expression, it will be more efficient to
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * separately create and cache a RegexPattern object, and use
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * its matcher() method to create the RegexMatcher objects.
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * <p>
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * The matcher will retain a reference to the supplied input string, and all regexp
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * pattern matching operations happen directly on the original string.  It is
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * critical that the string not be altered or deleted before use by the regular
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      * expression operations is complete.
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *
483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  @param regexp The Regular Expression to be compiled.
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param input  The string to match.  The matcher retains a reference to the
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *                caller's string; mo copy is made.
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param flags  Regular expression options, such as case insensitive matching.
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *                @see UREGEX_CASE_INSENSITIVE
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @param status Any errors are reported by setting this UErrorCode variable.
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      *  @stable ICU 2.6
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      */
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t flags, UErrorCode &status);
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Cause a compilation error if an application accidently attempts to
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   create a matcher with a (UChar *) string as input rather than
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <p>
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * using one of the aliasing constructors, such as
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @internal
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher(const UnicodeString &regexp, const UChar *input,
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t flags, UErrorCode &status);
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Destructor.
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @stable ICU 2.4
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual ~RegexMatcher();
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Attempts to match the entire input region against the pattern.
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status     A reference to a UErrorCode to receive any errors.
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return TRUE if there is a match
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool matches(UErrorCode &status);
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Resets the matcher, then attempts to match the input beginning
529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   at the specified startIndex, and extending to the end of the input.
530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   The input region is reset to include the entire input string.
531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   A successful match must extend to the end of the input.
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   startIndex The input string index at which to begin matching.
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status     A reference to a UErrorCode to receive any errors.
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return TRUE if there is a match
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.8
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool matches(int32_t startIndex, UErrorCode &status);
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Attempts to match the input string, starting from the beginning of the region,
544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   against the pattern.  Like the matches() method, this function
545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   always starts at the beginning of the input region;
546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   unlike that function, it does not require that the entire region be matched.
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *     <code>end()</code>, and <code>group()</code> functions.</p>
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status     A reference to a UErrorCode to receive any errors.
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return  TRUE if there is a match at the start of the input string.
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool lookingAt(UErrorCode &status);
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Attempts to match the input string, starting from the specified index, against the pattern.
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   The match may be of any length, and is not required to extend to the end
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   of the input string.  Contrast with match().
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *     <code>end()</code>, and <code>group()</code> functions.</p>
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   startIndex The input string index at which to begin matching.
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status     A reference to a UErrorCode to receive any errors.
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return  TRUE if there is a match.
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.8
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  Find the next pattern match in the input string.
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  The find begins searching the input at the location following the end of
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  the previous match, or at the start of the string if there is no previous match.
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  will provide more information regarding the match.
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  <p>Note that if the input string is changed by the application,
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *     use find(startPos, status) instead of find(), because the saved starting
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *     position may not be valid with the altered input string.</p>
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @return  TRUE if a match is found.
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @stable ICU 2.4
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool find();
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Resets this RegexMatcher and then attempts to find the next substring of the
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   input string that matches the pattern, starting at the specified index.
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   start     the position in the input string to begin the search
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   status    A reference to a UErrorCode to receive any errors.
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return  TRUE if a match is found.
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UBool find(int32_t start, UErrorCode &status);
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Returns a string containing the text matched by the previous match.
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   If the pattern can match an empty string, an empty string may be returned.
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   status      A reference to a UErrorCode to receive any errors.
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        has been attempted or the last match failed.
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return  a string containing the matched input text.
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString group(UErrorCode &status) const;
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Returns a string containing the text captured by the given group
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    during the previous match operation.  Group(0) is the entire match.
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param groupNum the capture group number
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status     A reference to a UErrorCode to receive any errors.
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        has been attempted or the last match failed and
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return the captured text
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Returns the number of capturing groups in this matcher's pattern.
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return the number of capture groups
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t groupCount() const;
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Returns the index in the input string of the start of the text matched
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   during the previous match operation.
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status      a reference to a UErrorCode to receive any errors.
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return              The position in the input string of the start of the last match.
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t start(UErrorCode &status) const;
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Returns the index in the input string of the start of the text matched by the
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    specified capture group during the previous match operation.  Return -1 if
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    the capture group exists in the pattern, but was not part of the last match.
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param  group       the capture group number
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        errors are  U_REGEX_INVALID_STATE if no match has been
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        attempted or the last match failed, and
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return the start position of substring matched by the specified group.
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t start(int32_t group, UErrorCode &status) const;
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Returns the index in the input string of the first character following the
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    text matched during the previous match operation.
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        errors are  U_REGEX_INVALID_STATE if no match has been
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        attempted or the last match failed.
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return the index of the last character matched, plus one.
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t end(UErrorCode &status) const;
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Returns the index in the input string of the character following the
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    text matched by the specified capture group during the previous match operation.
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param group  the capture group number
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        errors are  U_REGEX_INVALID_STATE if no match has been
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        attempted or the last match failed and
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return  the index of the first character following the text
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *              captured by the specifed group during the previous match operation.
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *              Return -1 if the capture group exists in the pattern but was not part of the match.
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t end(int32_t group, UErrorCode &status) const;
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Resets this matcher.  The effect is to remove any memory of previous matches,
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *       and to cause subsequent find() operations to begin at the beginning of
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *       the input string.
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return this RegexMatcher.
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher &reset();
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Resets this matcher, and set the current input position.
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   The effect is to remove any memory of previous matches,
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *       and to cause subsequent find() operations to begin at
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *       the specified position in the input string.
705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * <p>
706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   The matcher's region is reset to its default, which is the entire
707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   input string.
708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * <p>
709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   An alternative to this function is to set a match region
710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   beginning at the desired index.
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return this RegexMatcher.
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.8
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *     to be reused, which is more efficient than creating a new RegexMatcher for
721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *     each input string to be processed.
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param input The new string on which subsequent pattern matches will operate.
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                The matcher retains a reference to the callers string, and operates
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                directly on that.  Ownership of the string remains with the caller.
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                Because no copy of the string is made, it is essential that the
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                caller not delete the string until after regexp operations on it
727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                are done.
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return this RegexMatcher.
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher &reset(const UnicodeString &input);
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Cause a compilation error if an application accidently attempts to
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   reset a matcher with a (UChar *) string as input rather than
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <p>
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * using one of the aliasing constructors, such as
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @internal
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher &reset(const UChar *input);
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Returns the input string being matched.  The returned string is not a copy,
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   but the live input string.  It should not be altered or deleted.
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return the input string
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const UnicodeString &input() const;
755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   /** Sets the limits of this matcher's region.
759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * The region is the part of the input string that will be searched to find a match.
760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * Invoking this method resets the matcher, and then sets the region to start
761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * at the index specified by the start parameter and end at the index specified
762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * by the end parameter.
763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * Depending on the transparency and anchoring being used (see useTransparentBounds
765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * and useAnchoringBounds), certain constructs such as anchors may behave differently
766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * at or around the boundaries of the region
767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * The function will fail if start is greater than limit, or if either index
769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *  is less than zero or greater than the length of the string being matched.
770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * @param start  The index to begin searches at.
772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * @param limit  The index to end searches at (exclusive).
773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * @param status A reference to a UErrorCode to receive any errors.
774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     * @stable ICU 4.0
775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     */
776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     virtual RegexMatcher &region(int32_t start, int32_t limit, UErrorCode &status);
777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru   /**
780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * Reports the start index of this matcher's region. The searches this matcher
781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * conducts are limited to finding matches within regionStart (inclusive) and
782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * regionEnd (exclusive).
783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     *
784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     * @return The starting index of this matcher's region.
785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     * @stable ICU 4.0
786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     */
787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru     virtual int32_t regionStart() const;
788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Reports the end (limit) index (exclusive) of this matcher's region. The searches
792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * this matcher conducts are limited to finding matches within regionStart
793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * (inclusive) and regionEnd (exclusive).
794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return The ending point of this matcher's region.
796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual int32_t regionEnd() const;
799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Queries the transparency of region bounds for this matcher.
802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * See useTransparentBounds for a description of transparent and opaque bounds.
803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * By default, a matcher uses opaque region boundaries.
804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return TRUE if this matcher is using opaque bounds, false if it is not.
806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual UBool hasTransparentBounds() const;
809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Sets the transparency of region bounds for this matcher.
812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Invoking this function with an argument of true will set this matcher to use transparent bounds.
813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * If the boolean argument is false, then opaque bounds will be used.
814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Using transparent bounds, the boundaries of this matcher's region are transparent
816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * see text beyond the boundaries of the region while checking for a match.
818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * With opaque bounds, no text outside of the matcher's region is visible to lookahead,
820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * lookbehind, and boundary matching constructs.
821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * By default, a matcher uses opaque bounds.
823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @param   b TRUE for transparent bounds; FALSE for opaque bounds
825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return  This Matcher;
826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      **/
828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual RegexMatcher &useTransparentBounds(UBool b);
829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Return true if this matcher is using anchoring bounds.
833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * By default, matchers use anchoring region boounds.
834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return TRUE if this matcher is using anchoring bounds.
836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual UBool hasAnchoringBounds() const;
839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Set whether this matcher is using Anchoring Bounds for its region.
842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * and end of the region.  Without Anchoring Bounds, anchors will only match at
844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * the positions they would in the complete text.
845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Anchoring Bounds are the default for regions.
847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @param b TRUE if to enable anchoring bounds; FALSE to disable them.
849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return  This Matcher
850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual RegexMatcher &useAnchoringBounds(UBool b);
853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Return TRUE if the most recent matching operation touched the
856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  end of the text being processed.  In this case, additional input text could
857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  change the results of that match.
858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  hitEnd() is defined for both successful and unsuccessful matches.
860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  In either case hitEnd() will return TRUE if if the end of the text was
861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  reached at any point during the matching process.
862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *  @return  TRUE if the most recent match hit the end of input
864b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      *  @stable ICU 4.0
865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual UBool hitEnd() const;
867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /**
869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * Return TRUE the most recent match succeeded and additional input could cause
870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * it to fail. If this method returns false and a match was found, then more input
871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * might change the match but the match won't be lost. If a match was not found,
872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * then requireEnd has no meaning.
873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      *
874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      * @return TRUE if more input could cause the most recent match to no longer match.
875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * @stable ICU 4.0
876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      */
877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      virtual UBool requireEnd() const;
878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Returns the pattern that is interpreted by this matcher.
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return  the RegexPattern for this RegexMatcher
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const RegexPattern &pattern() const;
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    Replaces every substring of the input that matches the pattern
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    with the given replacement string.  This is a convenience function that
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    provides a complete find-and-replace-all operation.
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    This method first resets this matcher. It then scans the input string
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    looking for matches of the pattern. Input that is not part of any
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    match is left unchanged; each match is replaced in the result by the
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    replacement string. The replacement string may contain references to
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    capture groups.
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   replacement a string containing the replacement text.
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status      a reference to a UErrorCode to receive any errors.
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return              a string containing the results of the find and replace.
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * Replaces the first substring of the input that matches
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * the pattern with the replacement string.   This is a convenience
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * function that provides a complete find-and-replace operation.
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>This function first resets this RegexMatcher. It then scans the input string
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * looking for a match of the pattern. Input that is not part
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * of the match is appended directly to the result string; the match is replaced
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * in the result by the replacement string. The replacement string may contain
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * references to captured groups.</p>
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * <p>The state of the matcher (the position at which a subsequent find()
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    would begin) after completing a replaceFirst() is not specified.  The
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    RegexMatcher should be reset before doing additional find() operations.</p>
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   replacement a string containing the replacement text.
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @param   status      a reference to a UErrorCode to receive any errors.
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @return              a string containing the results of the find and replace.
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *    @stable ICU 2.4
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   Implements a replace operation intended to be used as part of an
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   incremental find-and-replace.
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   <p>The input string, starting from the end of the previous replacement and ending at
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   the start of the current match, is appended to the destination string.  Then the
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   replacement string is appended to the output string,
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   including handling any substitutions of captured text.</p>
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   <p>For simple, prepackaged, non-incremental find-and-replace
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   operations, see replaceFirst() or replaceAll().</p>
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   dest        A UnicodeString to which the results of the find-and-replace are appended.
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   replacement A UnicodeString that provides the text to be substituted for
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        the input text that matched the regexp pattern.  The replacement
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        text may contain references to captured text from the
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        input.
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        errors are  U_REGEX_INVALID_STATE if no match has been
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        if the replacement text specifies a capture group that
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *                        does not exist in the pattern.
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @return  this  RegexMatcher
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *   @stable ICU 2.4
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual RegexMatcher &appendReplacement(UnicodeString &dest,
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UnicodeString &replacement, UErrorCode &status);
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * As the final step in a find-and-replace operation, append the remainder
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * of the input string, starting at the position following the last appendReplacement(),
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * to the destination string. <code>appendTail()</code> is intended to be invoked after one
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * or more invocations of the <code>RegexMatcher::appendReplacement()</code>.
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @param dest A UnicodeString to which the results of the find-and-replace are appended.
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @return  the destination string.
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *  @stable ICU 2.4
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString &appendTail(UnicodeString &dest);
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Split a string into fields.  Somewhat like split() from Perl.
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The pattern matches identify delimiters that separate the input
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  into fields.  The input data between the matches becomes the
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *  fields themselves.
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * <p>
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param input   The string to be split into fields.  The field delimiters
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                match the pattern (in the "this" object).  This matcher
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                will be reset to this input string.
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param dest    An array of UnicodeStrings to receive the results of the split.
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                This is an array of actual UnicodeString objects, not an
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                array of pointers to strings.  Local (stack based) arrays can
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                work well here.
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param destCapacity  The number of elements in the destination array.
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                If the number of fields found is less than destCapacity, the
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                extra strings in the destination array are not altered.
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                If the number of destination strings is less than the number
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                of fields, the trailing part of the input string, including any
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *                field delimiters, is placed in the last destination string.
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @param status  A reference to a UErrorCode to receive any errors.
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @return        The number of fields into which the input string was split.
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.6
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual int32_t  split(const UnicodeString &input,
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString    dest[],
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t          destCapacity,
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode       &status);
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Set a processing time limit for match operations with this Matcher.
1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Some patterns, when matching certain strings, can run in exponential time.
1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   For practical purposes, the match operation may appear to be in an
1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   infinite loop.
1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   When a limit is set a match operation will fail with an error if the
1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   limit is exceeded.
1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   <p>
1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   The units of the limit are steps of the match engine.
1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   Correspondence with actual processor time will depend on the speed
1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   of the processor and the details of the specific pattern, but will
1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   typically be on the order of milliseconds.
1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   <p>
1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   By default, the matching time is not limited.
1021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   <p>
1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   @param   limit       The limit value, or 0 for no limit.
1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   @param   status      A reference to a UErrorCode to receive any errors.
1025b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *   @stable ICU 4.0
1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * Get the time limit, if any, for match operations made with this Matcher.
1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *   @return the maximum allowed time for a match, in units of processing steps.
1033b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *   @stable ICU 4.0
1034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual int32_t getTimeLimit() const;
1036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  Set the amount of heap storage avaliable for use by the match backtracking stack.
1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  The matcher is also reset, discarding any results from previous matches.
1040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  <p>
1041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  ICU uses a backtracking regular expression engine, with the backtrack stack
1042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  maintained on the heap.  This function sets the limit to the amount of memory
1043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  that can be used  for this purpose.  A backtracking stack overflow will
1044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  result in an error from the match operation that caused it.
1045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  <p>
1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  A limit is desirable because a malicious or poorly designed pattern can use
1047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  excessive memory, potentially crashing the process.  A limit is enabled
1048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  by default.
1049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  <p>
1050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  @param limit  The maximum size, in bytes, of the matching backtrack stack.
1051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                A value of zero means no limit.
1052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                The limit must be greater or equal to zero.
1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  @param status   A reference to a UErrorCode to receive any errors.
1055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1056b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *  @stable ICU 4.0
1057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual void setStackLimit(int32_t  limit, UErrorCode &status);
1059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  Get the size of the heap storage available for use by the back tracking stack.
1062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  @return  the maximum backtracking stack size, in bytes, or zero if the
1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *           stack size is unlimited.
1065b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *  @stable ICU 4.0
1066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual int32_t  getStackLimit() const;
1068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * Set a callback function for use with this Matcher.
1072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * During matching operations the function will be called periodically,
1073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * giving the application the opportunity to terminate a long-running
1074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    * match.
1075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   callback    A pointer to the user-supplied callback function.
1077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   context     User context pointer.  The value supplied at the
1078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                         time the callback function is set will be saved
1079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                         and passed to the callback each time that it is called.
1080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   status      A reference to a UErrorCode to receive any errors.
1081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *  @stable ICU 4.0
1082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual void setMatchCallback(URegexMatchCallback     *callback,
1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                  const void              *context,
1085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                  UErrorCode              &status);
1086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru  /**
1090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *  Get the callback function for this URegularExpression.
1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *
1092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   callback    Out paramater, receives a pointer to the user-supplied
1093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                         callback function.
1094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   context     Out parameter, receives the user context pointer that
1095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *                         was set when uregex_setMatchCallback() was called.
1096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    *    @param   status      A reference to a UErrorCode to receive any errors.
1097b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    *    @stable ICU 4.0
1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    */
1099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    virtual void getMatchCallback(URegexMatchCallback     *&callback,
1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                  const void              *&context,
1101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                  UErrorCode              &status);
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   /**
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   setTrace   Debug function, enable/disable tracing of the matching engine.
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *              For internal ICU development use only.  DO NO USE!!!!
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *   @internal
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void setTrace(UBool state);
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * ICU "poor man's RTTI", returns a UClassID for this class.
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    * @stable ICU 2.2
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    */
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static UClassID U_EXPORT2 getStaticClassID();
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * @stable ICU 2.2
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UClassID getDynamicClassID() const;
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Constructors and other object boilerplate are private.
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    RegexMatcher();                  // default constructor not implemented
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher(const RegexPattern *pat);
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher(const RegexMatcher &other);
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher &operator =(const RegexMatcher &rhs);
1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void init(UErrorCode &status);                      // Common initialization
1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void init2(const UnicodeString &s, UErrorCode &e);  // Common initialization, part 2.
1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class RegexPattern;
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class RegexCImpl;
1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic:
1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    /** @internal  */
1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void resetPreserveRegion();  // Reset matcher state, but preserve any region.
1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate:
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  MatchAt   This is the internal interface to the match engine itself.
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //            Match status comes back in matcher member variables.
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void                 MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline void          backTrack(int32_t &inputIdx, int32_t &patIdx);
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool                isWordBoundary(int32_t pos);         // perform Perl-like  \b test
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool                isUWordBoundary(int32_t pos);        // perform RBBI based \b test
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REStackFrame        *resetStack();
1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    void                 IncrementTime(UErrorCode &status);
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const RegexPattern  *fPattern;
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern        *fPatternOwned;    // Non-NULL if this matcher owns the pattern, and
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   should delete it when through.
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const UnicodeString *fInput;           // The text being matched. Is never NULL.
1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fFrameSize;       // The size of a frame in the backtrack stack.
1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fRegionStart;     // Start of the input region, default = 0.
1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fRegionLimit;     // End of input region, default to input.length.
1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fAnchorStart;     // Region bounds for anchoring operations (^ or $).
1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fAnchorLimit;     //   See useAnchoringBounds
1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fLookStart;       // Region bounds for look-ahead/behind and
1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fLookLimit;       //   and other boundary tests.  See
1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   useTransparentBounds
1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fActiveStart;     // Currently active bounds for matching.
1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fActiveLimit;     //   Usually is the same as region, but
1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   is changed to fLookStart/Limit when
1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   entering look around regions.
1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool                fTransparentBounds;  // True if using transparent bounds.
1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool                fAnchoringBounds; // True if using anchoring bounds.
1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool                fMatch;           // True if the last attempted match was successful.
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t              fMatchStart;      // Position of the start of the most recent match
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t              fMatchEnd;        // First position after the end of the most recent match
1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   Zero if no previous match, even when a region
1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   is active.
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t              fLastMatchEnd;    // First position after the end of the previous match,
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   or -1 if there was no previous match.
1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t              fAppendPosition;  // First position after the end of the previous
1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   appendReplacement().  As described by the
1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   JavaDoc for Java Matcher, where it is called
1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   "append position"
1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool                fHitEnd;          // True if the last match touched the end of input.
1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UBool                fRequireEnd;      // True if the last match required end-of-input
1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //    (matched $ or Z)
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector32           *fStack;
1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    REStackFrame        *fFrame;           // After finding a match, the last active stack frame,
1198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   which will contain the capture group results.
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   NOT valid while match engine is running.
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             *fData;            // Data area for use by the compiled pattern.
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t             fSmallData[8];     //   Use this for data if it's enough.
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t             fTimeLimit;        // Max time (in arbitrary steps) to let the
1205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   match engine run.  Zero for unlimited.
1206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t             fTime;             // Match time, accumulates while matching.
1208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t             fTickCounter;      // Low bits counter for time.  Counts down StateSaves.
1209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   Kept separately from fTime to keep as much
1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   code as possible out of the inline
1211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   StateSave function.
1212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t             fStackLimit;       // Maximum memory size to use for the backtrack
1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   stack, in bytes.  Zero for unlimited.
1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    URegexMatchCallback *fCallbackFn;       // Pointer to match progress callback funct.
1217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                           //   NULL if there is no callback.
1218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    const void         *fCallbackContext;  // User Context ptr for callback function.
1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool               fTraceDebug;       // Set true for debug tracing of match engine.
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode          fDeferredStatus;   // Save error state that cannot be immediately
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           //   reported, or that permanently disables this matcher.
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleBasedBreakIterator  *fWordBreakItr;
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1233