1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 2004-2005, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  xmlparser.h
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2004jul21
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Andy Heninger
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Tiny XML parser using ICU and intended for use in ICU tests and in build tools.
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Not suitable for production use. Not supported.
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Not conformant. Not efficient.
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* But very small.
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __XMLPARSER_H__
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define __XMLPARSER_H__
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uobject.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h"
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/regex.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h"
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "hash.h"
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum UXMLNodeType {
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Node type string (text contents), stored as a UnicodeString. */
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXML_NODE_TYPE_STRING,
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Node type element, stored as a UXMLElement. */
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXML_NODE_TYPE_ELEMENT,
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXML_NODE_TYPE_COUNT
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class UXMLParser;
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This class represents an element node in a parsed XML tree.
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class U_TOOLUTIL_API UXMLElement : public UObject {
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Destructor.
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual ~UXMLElement();
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the tag name of this element.
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString &getTagName() const;
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the text contents of the element.
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Append the contents of all text child nodes.
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param recurse If TRUE, also recursively appends the contents of all
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *        text child nodes of element children.
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return The text contents.
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString getText(UBool recurse) const;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the number of attributes.
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t countAttributes() const;
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the i-th attribute.
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param i Index of the attribute.
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param name Output parameter, receives the attribute name.
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param value Output parameter, receives the attribute value.
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return A pointer to the attribute value (may be &value or a pointer to an
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *         internal string object), or NULL if i is out of bounds.
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const;
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the value of the attribute with the given name.
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param name Attribute name to be looked up.
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return A pointer to the attribute value, or NULL if this element
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * does not have this attribute.
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *getAttribute(const UnicodeString &name) const;
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the number of child nodes.
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t countChildren() const;
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the i-th child node.
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param i Index of the child node.
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param type The child node type.
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return A pointer to the child node object, or NULL if i is out of bounds.
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UObject *getChild(int32_t i, UXMLNodeType &type) const;
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the next child element node, skipping non-element child nodes.
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param i Enumeration index; initialize to 0 before getting the first child element.
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return A pointer to the next child element, or NULL if there is none.
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UXMLElement *nextChildElement(int32_t &i) const;
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Get the immediate child element with the given name.
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * If there are multiple child elements with this name, then return
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the first one.
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @param name Element name to be looked up.
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @return A pointer to the element node, or NULL if this element
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * does not have this immediate child element.
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UXMLElement *getChildElement(const UnicodeString &name) const;
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * ICU "poor man's RTTI", returns a UClassID for the actual class.
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual UClassID getDynamicClassID() const;
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * ICU "poor man's RTTI", returns a UClassID for this class.
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static UClassID U_EXPORT2 getStaticClassID();
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // prevent default construction etc.
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement();
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement(const UXMLElement &other);
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement &operator=(const UXMLElement &other);
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void appendText(UnicodeString &text, UBool recurse) const;
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    friend class UXMLParser;
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode);
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UXMLParser *fParser;
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *fName;          // The tag name of this element (owned by the UXMLParser)
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString       fContent;        // The text content of this node.  All element content is
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //   concatenated even when there are intervening nested elements
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //   (which doesn't happen with most xml files we care about)
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //   Sections of content containing only white space are dropped,
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //   which gets rid  the bogus white space content from
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //   elements which are primarily containers for nested elements.
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector             fAttNames;       // A vector containing the names of this element's attributes
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //    The names are UnicodeString objects, owned by the UXMLParser.
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector             fAttValues;      // A vector containing the attribute values for
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //    this element's attributes.  The order is the same
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         //    as that of the attribute name vector.
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector             fChildren;       // The child nodes of this element (a Vector)
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement        *fParent;         // A pointer to the parent element of this element.
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A simple XML parser; it is neither efficient nor conformant and only useful for
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * restricted types of XML documents.
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The parse methods parse whole documents and return the parse trees via their
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * root elements.
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class U_TOOLUTIL_API UXMLParser : public UObject {
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Create an XML parser.
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static UXMLParser *createParser(UErrorCode &errorCode);
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Destructor.
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual ~UXMLParser();
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Parse an XML document, create the entire document tree, and
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * return a pointer to the root element of the parsed tree.
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * The caller must delete the element.
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode);
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Parse an XML file, create the entire document tree, and
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * return a pointer to the root element of the parsed tree.
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * The caller must delete the element.
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement *parseFile(const char *filename, UErrorCode &errorCode);
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * ICU "poor man's RTTI", returns a UClassID for the actual class.
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual UClassID getDynamicClassID() const;
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * ICU "poor man's RTTI", returns a UClassID for this class.
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static UClassID U_EXPORT2 getStaticClassID();
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // prevent default construction etc.
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLParser();
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLParser(const UXMLParser &other);
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLParser &operator=(const UXMLParser &other);
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // constructor
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLParser(UErrorCode &status);
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void           parseMisc(UErrorCode &status);
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UXMLElement   *createElement(RegexMatcher &mEl, UErrorCode &status);
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void           error(const char *message, UErrorCode &status);
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString  scanContent(UErrorCode &status);
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void           replaceCharRefs(UnicodeString &s, UErrorCode &status);
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode);
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // public for UXMLElement only
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *findName(const UnicodeString &s) const;
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // There is one ICU regex matcher for each of the major XML syntax items
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  that are recognized.
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLDecl;
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLComment;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLSP;
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLDoctype;
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLPI;
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLElemStart;
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLElemEnd;
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLElemEmpty;
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mXMLCharData;
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mAttrValue;
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mAttrNormalizer;
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mNewLineNormalizer;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher mAmps;
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Hashtable             fNames;           // interned element/attribute name strings
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UStack                fElementStack;    // Stack holds the parent elements when nested
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                            //    elements are being parsed.  All items on this
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                            //    stack are of type UXMLElement.
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t               fPos;             // String index of the current scan position in
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                            //    xml source (in fSrc).
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString         fOneLF;
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
246