1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2004-2005, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: xmlparser.h 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2004jul21 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Andy Heninger 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Tiny XML parser using ICU and intended for use in ICU tests and in build tools. 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Not suitable for production use. Not supported. 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Not conformant. Not efficient. 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* But very small. 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __XMLPARSER_H__ 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define __XMLPARSER_H__ 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uobject.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/regex.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h" 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "hash.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum UXMLNodeType { 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Node type string (text contents), stored as a UnicodeString. */ 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXML_NODE_TYPE_STRING, 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Node type element, stored as a UXMLElement. */ 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXML_NODE_TYPE_ELEMENT, 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXML_NODE_TYPE_COUNT 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class UXMLParser; 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This class represents an element node in a parsed XML tree. 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class U_TOOLUTIL_API UXMLElement : public UObject { 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Destructor. 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~UXMLElement(); 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the tag name of this element. 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString &getTagName() const; 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the text contents of the element. 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Append the contents of all text child nodes. 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param recurse If TRUE, also recursively appends the contents of all 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * text child nodes of element children. 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The text contents. 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString getText(UBool recurse) const; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the number of attributes. 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t countAttributes() const; 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the i-th attribute. 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param i Index of the attribute. 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param name Output parameter, receives the attribute name. 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value Output parameter, receives the attribute value. 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A pointer to the attribute value (may be &value or a pointer to an 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * internal string object), or NULL if i is out of bounds. 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the value of the attribute with the given name. 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param name Attribute name to be looked up. 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A pointer to the attribute value, or NULL if this element 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * does not have this attribute. 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *getAttribute(const UnicodeString &name) const; 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the number of child nodes. 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t countChildren() const; 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the i-th child node. 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param i Index of the child node. 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param type The child node type. 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A pointer to the child node object, or NULL if i is out of bounds. 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UObject *getChild(int32_t i, UXMLNodeType &type) const; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the next child element node, skipping non-element child nodes. 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param i Enumeration index; initialize to 0 before getting the first child element. 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A pointer to the next child element, or NULL if there is none. 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UXMLElement *nextChildElement(int32_t &i) const; 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the immediate child element with the given name. 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If there are multiple child elements with this name, then return 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the first one. 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param name Element name to be looked up. 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A pointer to the element node, or NULL if this element 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * does not have this immediate child element. 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UXMLElement *getChildElement(const UnicodeString &name) const; 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for the actual class. 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UClassID getDynamicClassID() const; 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for this class. 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static UClassID U_EXPORT2 getStaticClassID(); 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // prevent default construction etc. 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement(); 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement(const UXMLElement &other); 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement &operator=(const UXMLElement &other); 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void appendText(UnicodeString &text, UBool recurse) const; 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) friend class UXMLParser; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode); 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UXMLParser *fParser; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *fName; // The tag name of this element (owned by the UXMLParser) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString fContent; // The text content of this node. All element content is 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // concatenated even when there are intervening nested elements 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (which doesn't happen with most xml files we care about) 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Sections of content containing only white space are dropped, 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which gets rid the bogus white space content from 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // elements which are primarily containers for nested elements. 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fAttNames; // A vector containing the names of this element's attributes 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The names are UnicodeString objects, owned by the UXMLParser. 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fAttValues; // A vector containing the attribute values for 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // this element's attributes. The order is the same 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // as that of the attribute name vector. 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fChildren; // The child nodes of this element (a Vector) 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement *fParent; // A pointer to the parent element of this element. 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A simple XML parser; it is neither efficient nor conformant and only useful for 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * restricted types of XML documents. 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The parse methods parse whole documents and return the parse trees via their 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * root elements. 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class U_TOOLUTIL_API UXMLParser : public UObject { 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Create an XML parser. 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static UXMLParser *createParser(UErrorCode &errorCode); 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Destructor. 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~UXMLParser(); 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse an XML document, create the entire document tree, and 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * return a pointer to the root element of the parsed tree. 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The caller must delete the element. 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode); 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse an XML file, create the entire document tree, and 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * return a pointer to the root element of the parsed tree. 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The caller must delete the element. 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement *parseFile(const char *filename, UErrorCode &errorCode); 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for the actual class. 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UClassID getDynamicClassID() const; 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for this class. 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static UClassID U_EXPORT2 getStaticClassID(); 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // prevent default construction etc. 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLParser(); 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLParser(const UXMLParser &other); 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLParser &operator=(const UXMLParser &other); 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // constructor 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLParser(UErrorCode &status); 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void parseMisc(UErrorCode &status); 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UXMLElement *createElement(RegexMatcher &mEl, UErrorCode &status); 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void error(const char *message, UErrorCode &status); 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString scanContent(UErrorCode &status); 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void replaceCharRefs(UnicodeString &s, UErrorCode &status); 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode); 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // public for UXMLElement only 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *findName(const UnicodeString &s) const; 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // There is one ICU regex matcher for each of the major XML syntax items 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // that are recognized. 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLDecl; 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLComment; 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLSP; 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLDoctype; 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLPI; 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLElemStart; 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLElemEnd; 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLElemEmpty; 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mXMLCharData; 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mAttrValue; 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mAttrNormalizer; 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mNewLineNormalizer; 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher mAmps; 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Hashtable fNames; // interned element/attribute name strings 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UStack fElementStack; // Stack holds the parent elements when nested 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // elements are being parsed. All items on this 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // stack are of type UXMLElement. 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fPos; // String index of the current scan position in 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // xml source (in fSrc). 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString fOneLF; 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 246