1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2004-2005, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  xmlparser.h
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2004jul21
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Andy Heninger
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Tiny XML parser using ICU and intended for use in ICU tests and in build tools.
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Not suitable for production use. Not supported.
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Not conformant. Not efficient.
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* But very small.
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __XMLPARSER_H__
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __XMLPARSER_H__
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/regex.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum UXMLNodeType {
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /** Node type string (text contents), stored as a UnicodeString. */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXML_NODE_TYPE_STRING,
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /** Node type element, stored as a UXMLElement. */
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXML_NODE_TYPE_ELEMENT,
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXML_NODE_TYPE_COUNT
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UXMLParser;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This class represents an element node in a parsed XML tree.
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_TOOLUTIL_API UXMLElement : public UObject {
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor.
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~UXMLElement();
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the tag name of this element.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString &getTagName() const;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the text contents of the element.
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Append the contents of all text child nodes.
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param recurse If TRUE, also recursively appends the contents of all
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *        text child nodes of element children.
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return The text contents.
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString getText(UBool recurse) const;
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the number of attributes.
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t countAttributes() const;
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the i-th attribute.
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param i Index of the attribute.
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param name Output parameter, receives the attribute name.
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param value Output parameter, receives the attribute value.
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return A pointer to the attribute value (may be &value or a pointer to an
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *         internal string object), or NULL if i is out of bounds.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the value of the attribute with the given name.
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param name Attribute name to be looked up.
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return A pointer to the attribute value, or NULL if this element
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * does not have this attribute.
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString *getAttribute(const UnicodeString &name) const;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the number of child nodes.
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t countChildren() const;
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the i-th child node.
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param i Index of the child node.
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param type The child node type.
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return A pointer to the child node object, or NULL if i is out of bounds.
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UObject *getChild(int32_t i, UXMLNodeType &type) const;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the next child element node, skipping non-element child nodes.
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param i Enumeration index; initialize to 0 before getting the first child element.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return A pointer to the next child element, or NULL if there is none.
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UXMLElement *nextChildElement(int32_t &i) const;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Get the immediate child element with the given name.
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If there are multiple child elements with this name, then return
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the first one.
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param name Element name to be looked up.
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return A pointer to the element node, or NULL if this element
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * does not have this immediate child element.
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UXMLElement *getChildElement(const UnicodeString &name) const;
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UClassID getDynamicClassID() const;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for this class.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UClassID U_EXPORT2 getStaticClassID();
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // prevent default construction etc.
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement();
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement(const UXMLElement &other);
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement &operator=(const UXMLElement &other);
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void appendText(UnicodeString &text, UBool recurse) const;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    friend class UXMLParser;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode);
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UXMLParser *fParser;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString *fName;          // The tag name of this element (owned by the UXMLParser)
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString       fContent;        // The text content of this node.  All element content is
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //   concatenated even when there are intervening nested elements
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //   (which doesn't happen with most xml files we care about)
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //   Sections of content containing only white space are dropped,
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //   which gets rid  the bogus white space content from
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //   elements which are primarily containers for nested elements.
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector             fAttNames;       // A vector containing the names of this element's attributes
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //    The names are UnicodeString objects, owned by the UXMLParser.
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector             fAttValues;      // A vector containing the attribute values for
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //    this element's attributes.  The order is the same
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         //    as that of the attribute name vector.
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVector             fChildren;       // The child nodes of this element (a Vector)
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement        *fParent;         // A pointer to the parent element of this element.
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A simple XML parser; it is neither efficient nor conformant and only useful for
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * restricted types of XML documents.
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The parse methods parse whole documents and return the parse trees via their
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * root elements.
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_TOOLUTIL_API UXMLParser : public UObject {
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Create an XML parser.
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UXMLParser *createParser(UErrorCode &errorCode);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Destructor.
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~UXMLParser();
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse an XML document, create the entire document tree, and
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return a pointer to the root element of the parsed tree.
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The caller must delete the element.
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode);
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Parse an XML file, create the entire document tree, and
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return a pointer to the root element of the parsed tree.
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The caller must delete the element.
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement *parseFile(const char *filename, UErrorCode &errorCode);
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UClassID getDynamicClassID() const;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for this class.
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UClassID U_EXPORT2 getStaticClassID();
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // prevent default construction etc.
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLParser();
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLParser(const UXMLParser &other);
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLParser &operator=(const UXMLParser &other);
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // constructor
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLParser(UErrorCode &status);
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void           parseMisc(UErrorCode &status);
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UXMLElement   *createElement(RegexMatcher &mEl, UErrorCode &status);
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void           error(const char *message, UErrorCode &status);
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString  scanContent(UErrorCode &status);
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void           replaceCharRefs(UnicodeString &s, UErrorCode &status);
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode);
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // public for UXMLElement only
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UnicodeString *findName(const UnicodeString &s) const;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // There is one ICU regex matcher for each of the major XML syntax items
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  that are recognized.
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLDecl;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLComment;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLSP;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLDoctype;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLPI;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLElemStart;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLElemEnd;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLElemEmpty;
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mXMLCharData;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mAttrValue;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mAttrNormalizer;
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mNewLineNormalizer;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    RegexMatcher mAmps;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Hashtable             fNames;           // interned element/attribute name strings
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UStack                fElementStack;    // Stack holds the parent elements when nested
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                            //    elements are being parsed.  All items on this
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                            //    stack are of type UXMLElement.
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t               fPos;             // String index of the current scan position in
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                            //    xml source (in fSrc).
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString         fOneLF;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
246