1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2004-2005, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: xmlparser.h 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2004jul21 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Andy Heninger 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Tiny XML parser using ICU and intended for use in ICU tests and in build tools. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Not suitable for production use. Not supported. 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Not conformant. Not efficient. 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* But very small. 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __XMLPARSER_H__ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __XMLPARSER_H__ 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/regex.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum UXMLNodeType { 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Node type string (text contents), stored as a UnicodeString. */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXML_NODE_TYPE_STRING, 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Node type element, stored as a UXMLElement. */ 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXML_NODE_TYPE_ELEMENT, 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXML_NODE_TYPE_COUNT 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UXMLParser; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This class represents an element node in a parsed XML tree. 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_TOOLUTIL_API UXMLElement : public UObject { 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~UXMLElement(); 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the tag name of this element. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString &getTagName() const; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the text contents of the element. 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Append the contents of all text child nodes. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param recurse If TRUE, also recursively appends the contents of all 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text child nodes of element children. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The text contents. 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString getText(UBool recurse) const; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the number of attributes. 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t countAttributes() const; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the i-th attribute. 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param i Index of the attribute. 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param name Output parameter, receives the attribute name. 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param value Output parameter, receives the attribute value. 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the attribute value (may be &value or a pointer to an 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * internal string object), or NULL if i is out of bounds. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the value of the attribute with the given name. 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param name Attribute name to be looked up. 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the attribute value, or NULL if this element 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * does not have this attribute. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString *getAttribute(const UnicodeString &name) const; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the number of child nodes. 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t countChildren() const; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the i-th child node. 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param i Index of the child node. 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param type The child node type. 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the child node object, or NULL if i is out of bounds. 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UObject *getChild(int32_t i, UXMLNodeType &type) const; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the next child element node, skipping non-element child nodes. 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param i Enumeration index; initialize to 0 before getting the first child element. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the next child element, or NULL if there is none. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UXMLElement *nextChildElement(int32_t &i) const; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the immediate child element with the given name. 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If there are multiple child elements with this name, then return 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the first one. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param name Element name to be looked up. 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the element node, or NULL if this element 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * does not have this immediate child element. 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UXMLElement *getChildElement(const UnicodeString &name) const; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // prevent default construction etc. 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement(); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement(const UXMLElement &other); 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement &operator=(const UXMLElement &other); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void appendText(UnicodeString &text, UBool recurse) const; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class UXMLParser; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode); 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UXMLParser *fParser; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString *fName; // The tag name of this element (owned by the UXMLParser) 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString fContent; // The text content of this node. All element content is 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // concatenated even when there are intervening nested elements 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (which doesn't happen with most xml files we care about) 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Sections of content containing only white space are dropped, 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which gets rid the bogus white space content from 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // elements which are primarily containers for nested elements. 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector fAttNames; // A vector containing the names of this element's attributes 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The names are UnicodeString objects, owned by the UXMLParser. 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector fAttValues; // A vector containing the attribute values for 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // this element's attributes. The order is the same 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // as that of the attribute name vector. 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector fChildren; // The child nodes of this element (a Vector) 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement *fParent; // A pointer to the parent element of this element. 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A simple XML parser; it is neither efficient nor conformant and only useful for 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * restricted types of XML documents. 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The parse methods parse whole documents and return the parse trees via their 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * root elements. 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_TOOLUTIL_API UXMLParser : public UObject { 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Create an XML parser. 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UXMLParser *createParser(UErrorCode &errorCode); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~UXMLParser(); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse an XML document, create the entire document tree, and 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return a pointer to the root element of the parsed tree. 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The caller must delete the element. 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode); 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Parse an XML file, create the entire document tree, and 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return a pointer to the root element of the parsed tree. 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The caller must delete the element. 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement *parseFile(const char *filename, UErrorCode &errorCode); 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // prevent default construction etc. 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLParser(); 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLParser(const UXMLParser &other); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLParser &operator=(const UXMLParser &other); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // constructor 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLParser(UErrorCode &status); 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void parseMisc(UErrorCode &status); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UXMLElement *createElement(RegexMatcher &mEl, UErrorCode &status); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void error(const char *message, UErrorCode &status); 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString scanContent(UErrorCode &status); 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void replaceCharRefs(UnicodeString &s, UErrorCode &status); 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode); 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // public for UXMLElement only 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString *findName(const UnicodeString &s) const; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // There is one ICU regex matcher for each of the major XML syntax items 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that are recognized. 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLDecl; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLComment; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLSP; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLDoctype; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLPI; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLElemStart; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLElemEnd; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLElemEmpty; 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mXMLCharData; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mAttrValue; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mAttrNormalizer; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mNewLineNormalizer; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RegexMatcher mAmps; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Hashtable fNames; // interned element/attribute name strings 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UStack fElementStack; // Stack holds the parent elements when nested 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // elements are being parsed. All items on this 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // stack are of type UXMLElement. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fPos; // String index of the current scan position in 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // xml source (in fSrc). 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString fOneLF; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 246