10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2004-2010, International Business Machines
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  xmlparser.cpp
110596faeddefbf198de137d5e893708495ab1584cFredrik Roubert*   encoding:   UTF-8
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2004jul21
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Andy Heninger
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "filestrm.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "xmlparser.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// character constants
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_QUOT=0x22,
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_AMP=0x26,
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_APOS=0x27,
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_LT=0x3c,
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_GT=0x3e,
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    x_l=0x6c
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define  XML_SPACES "[ \\u0009\\u000d\\u000a]"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// XML #4
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define  XML_NAMESTARTCHAR "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" \
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" \
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" \
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]"
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  XML #5
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define  XML_NAMECHAR "[" XML_NAMESTARTCHAR "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]"
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  XML #6
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define  XML_NAME    XML_NAMESTARTCHAR "(?:" XML_NAMECHAR ")*"
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLParser)
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLElement)
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   UXMLParser constructor.   Mostly just initializes the ICU regexes that are
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                             used for parsing.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::UXMLParser(UErrorCode &status) :
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Declaration.  XML Production #23.
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //      example:  "<?xml version=1.0 encoding="utf-16" ?>
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //      This is a sloppy implementation - just look for the leading <?xml and the closing ?>
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //            allow for a possible leading BOM.
66c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Comment   production #15
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //     example:  "<!-- whatever -->
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //       note, does not detect an illegal "--" within comments
71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INV), 0, status),
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Spaces
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //      production [3]
75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Doctype decl  production #28
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //     example   "<!DOCTYPE foo SYSTEM "somewhere" >
79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      //       or      "<!DOCTYPE foo [internal dtd]>
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //    TODO:  we don't actually parse the DOCTYPE or internal subsets.
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //           Some internal dtd subsets could confuse this simple-minded
82c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      //           attempt at skipping over them, specifically, occcurences
83c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      //           of closeing square brackets.  These could appear in comments,
84c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      //           or in parameter entity declarations, for example.
85c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLDoctype(UnicodeString(
86c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru           "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV
87c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru           ), 0, status),
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML PI     production #16
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //     example   "<?target stuff?>
91c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Element Start   Productions #40, #41
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //          example   <foo att1='abc'  att2="d e f" >
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //      capture #1:  the tag name
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      mXMLElemStart (UnicodeString("(?s)<(" XML_NAME ")"                                 // match  "<tag_name"
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          "(?:"
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*"     // match  "ATTR_NAME = "
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"        // match  '"attribute value"'
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          ")*"                                                             //   * for zero or more attributes.
102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          XML_SPACES "*?>", -1, US_INV), 0, status),                               // match " >"
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  XML Element End     production #42
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //     example   </foo>
106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      // XML Element Empty    production #44
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //     example   <foo att1="abc"   att2="d e f" />
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      mXMLElemEmpty (UnicodeString("(?s)<(" XML_NAME ")"                                 // match  "<tag_name"
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          "(?:"
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*"     // match  "ATTR_NAME = "
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"        // match  '"attribute value"'
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          ")*"                                                             //   * for zero or more attributes.
115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru          XML_SPACES "*?/>", -1, US_INV), 0, status),                              // match " />"
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      // XMLCharData.  Everything but '<'.  Note that & will be dealt with later.
119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      // Attribute name = "value".  XML Productions 10, 40/41
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //  Capture group 1 is name,
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //                2 is the attribute value, including the quotes.
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //   Note that attributes are scanned twice.  The first time is with
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //        the regex for an entire element start.  There, the attributes
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //        are checked syntactically, but not separted out one by one.
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //        Here, we match a single attribute, and make its name and
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //        attribute value available to the parser code.
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      mAttrValue(UnicodeString(XML_SPACES "+("  XML_NAME ")"  XML_SPACES "*=" XML_SPACES "*"
131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru         "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      // Match any of the new-line sequences in content.
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //   All are changed to \u000a.
138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru      mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      // & char references
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //   We will figure out what we've got based on which capture group has content.
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //   The last one is a catchall for unrecognized entity references..
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      //             1     2     3      4      5           6                    7          8
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      mAmps(UnicodeString("&(?:(amp;)|(lt;)|(gt;)|(apos;)|(quot;)|#x([0-9A-Fa-f]{1,8});|#([0-9]{1,8});|(.))"),
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                0, status),
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      fNames(status),
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      fElementStack(status),
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      fOneLF((UChar)0x0a)        // Plain new-line string, used in new line normalization.
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      {
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser *
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::createParser(UErrorCode &errorCode) {
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(errorCode)) {
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return new UXMLParser(errorCode);
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::~UXMLParser() {}
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement *
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char bytes[4096], charsetBuffer[100];
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FileStream *f;
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *charset, *pb;
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString src;
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *cnv;
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *buffer, *pu;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t fileLength, bytesLength, length, capacity;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool flush;
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    f=T_FileStream_open(filename, "rb");
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(f==NULL) {
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errorCode=U_FILE_ACCESS_ERROR;
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bytesLength<(int32_t)sizeof(bytes)) {
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // we have already read the entire file
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fileLength=bytesLength;
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // get the file length
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fileLength=T_FileStream_size(f);
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * get the charset:
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 1. Unicode signature
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 2. treat as ISO-8859-1 and read XML encoding="charser"
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 3. default to UTF-8
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode);
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(errorCode) && charset!=NULL) {
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // open converter according to Unicode signature
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cnv=ucnv_open(charset, &errorCode);
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // read as Latin-1 and parse the XML declaration and encoding
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cnv=ucnv_open("ISO-8859-1", &errorCode);
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // unexpected error opening Latin-1 converter
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto exit;
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2120596faeddefbf198de137d5e893708495ab1584cFredrik Roubert        buffer=toUCharPtr(src.getBuffer(bytesLength));
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(buffer==NULL) {
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // unexpected failure to reserve some string capacity
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errorCode=U_MEMORY_ALLOCATION_ERROR;
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto exit;
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pb=bytes;
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pu=buffer;
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_toUnicode(
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cnv,
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &pu, buffer+src.getCapacity(),
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &pb, bytes+bytesLength,
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            NULL, TRUE, &errorCode);
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_close(cnv);
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cnv=NULL;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // unexpected error in conversion from Latin-1
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            src.remove();
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto exit;
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // parse XML declaration
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(mXMLDecl.reset(src).lookingAt(0, errorCode)) {
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t declEnd=mXMLDecl.end(errorCode);
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // go beyond <?xml
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t pos=src.indexOf((UChar)x_l)+1;
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            mAttrValue.reset(src);
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) {  // loop runs once per attribute on this element.
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString attName  = mAttrValue.group(1, errorCode);
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString attValue = mAttrValue.group(2, errorCode);
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Trim the quotes from the att value.  These are left over from the original regex
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   that parsed the attribue, which couldn't conveniently strip them.
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                attValue.remove(0,1);                    // one char from the beginning
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                attValue.truncate(attValue.length()-1);  // and one from the end.
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(attName==UNICODE_STRING("encoding", 8)) {
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer));
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    charset=charsetBuffer;
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pos = mAttrValue.end(2, errorCode);
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(charset==NULL) {
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // default to UTF-8
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                charset="UTF-8";
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cnv=ucnv_open(charset, &errorCode);
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // unable to open the converter
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto exit;
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // convert the file contents
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    capacity=fileLength;        // estimated capacity
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    src.getBuffer(capacity);
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    src.releaseBuffer(0);       // zero length
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    flush=FALSE;
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(;;) {
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // convert contents of bytes[bytesLength]
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pb=bytes;
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=src.length();
2810596faeddefbf198de137d5e893708495ab1584cFredrik Roubert            buffer=toUCharPtr(src.getBuffer(capacity));
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(buffer==NULL) {
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // unexpected failure to reserve some string capacity
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errorCode=U_MEMORY_ALLOCATION_ERROR;
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto exit;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pu=buffer+length;
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ucnv_toUnicode(
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                cnv, &pu, buffer+src.getCapacity(),
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                &pb, bytes+bytesLength,
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                NULL, FALSE, &errorCode);
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errorCode=U_ZERO_ERROR;
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                capacity=(3*src.getCapacity())/2; // increase capacity by 50%
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; // conversion error
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(flush) {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; // completely converted the file
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // read next block
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(bytesLength==0) {
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // reached end of file, convert once more to flush the converter
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            flush=TRUE;
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruexit:
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(cnv);
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    T_FileStream_close(f);
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return parse(src, errorCode);
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement *
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::parse(const UnicodeString &src, UErrorCode &status) {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(status)) {
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UXMLElement   *root = NULL;
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fPos = 0; // TODO use just a local pos variable and pass it into functions
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              // where necessary?
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // set all matchers to work on the input string
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLDecl.reset(src);
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLComment.reset(src);
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLSP.reset(src);
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLDoctype.reset(src);
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLPI.reset(src);
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLElemStart.reset(src);
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLElemEnd.reset(src);
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLElemEmpty.reset(src);
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mXMLCharData.reset(src);
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mAttrValue.reset(src);
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mAttrNormalizer.reset(src);
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mNewLineNormalizer.reset(src);
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mAmps.reset(src);
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume the XML Declaration, if present.
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (mXMLDecl.lookingAt(fPos, status)) {
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPos = mXMLDecl.end(status);
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume "misc" [XML production 27] appearing before DocType
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseMisc(status);
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume a DocType declaration, if present.
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (mXMLDoctype.lookingAt(fPos, status)) {
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPos = mXMLDoctype.end(status);
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume additional "misc" [XML production 27] appearing after the DocType
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseMisc(status);
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Get the root element
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (mXMLElemEmpty.lookingAt(fPos, status)) {
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Root is an empty element (no nested elements or content)
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        root = createElement(mXMLElemEmpty, status);
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPos = mXMLElemEmpty.end(status);
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mXMLElemStart.lookingAt(fPos, status) == FALSE) {
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            error("Root Element expected", status);
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto errorExit;
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        root = createElement(mXMLElemStart, status);
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UXMLElement  *el = root;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // This is the loop that consumes the root element of the document,
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //      including all nested content.   Nested elements are handled by
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //      explicit pushes/pops of the element stack; there is no recursion
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //      in the control flow of this code.
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //      "el" always refers to the current element, the one to which content
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //      is being added.  It is above the top of the element stack.
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Nested Element Start
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mXMLElemStart.lookingAt(fPos, status)) {
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UXMLElement *t = createElement(mXMLElemStart, status);
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                el->fChildren.addElement(t, status);
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t->fParent = el;
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fElementStack.push(el, status);
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                el = t;
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Text Content.  String is concatenated onto the current node's content,
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                but only if it contains something other than spaces.
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString s = scanContent(status);
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (s.length() > 0) {
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                mXMLSP.reset(s);
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (mXMLSP.matches(status) == FALSE) {
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // This chunk of text contains something other than just
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  white space. Make a child node for it.
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    replaceCharRefs(s, status);
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    el->fChildren.addElement(s.clone(), status);
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                mXMLSP.reset(src);    // The matchers need to stay set to the main input string.
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Comments.  Discard.
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mXMLComment.lookingAt(fPos, status)) {
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fPos = mXMLComment.end(status);
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // PIs.  Discard.
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mXMLPI.lookingAt(fPos, status)) {
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fPos = mXMLPI.end(status);
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Element End
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mXMLElemEnd.lookingAt(fPos, status)) {
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fPos = mXMLElemEnd.end(0, status);
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UnicodeString name = mXMLElemEnd.group(1, status);
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (name != *el->fName) {
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    error("Element start / end tag mismatch", status);
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    goto errorExit;
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (fElementStack.empty()) {
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Close of the root element.  We're done with the doc.
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    el = NULL;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                el = (UXMLElement *)fElementStack.pop();
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Empty Element.  Stored as a child of the current element, but not stacked.
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mXMLElemEmpty.lookingAt(fPos, status)) {
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UXMLElement *t = createElement(mXMLElemEmpty, status);
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                el->fChildren.addElement(t, status);
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Hit something within the document that doesn't match anything.
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   It's an error.
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            error("Unrecognized markup", status);
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (el != NULL || !fElementStack.empty()) {
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We bailed out early, for some reason.
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            error("Root element not closed.", status);
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto errorExit;
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Root Element parse is complete.
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Consume the annoying xml "Misc" that can appear at the end of the doc.
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseMisc(status);
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We should have reached the end of the input
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fPos != src.length()) {
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        error("Extra content at the end of the document", status);
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto errorExit;
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Success!
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return root;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruerrorExit:
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete root;
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  createElement
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      We've just matched an element start tag.  Create and fill in a UXMLElement object
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      for it.
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement *
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::createElement(RegexMatcher  &mEl, UErrorCode &status) {
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First capture group is the element's name.
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status);
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Scan for attributes.
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t   pos = mEl.end(1, status);  // The position after the end of the tag name
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (mAttrValue.lookingAt(pos, status)) {  // loop runs once per attribute on this element.
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString attName  = mAttrValue.group(1, status);
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString attValue = mAttrValue.group(2, status);
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Trim the quotes from the att value.  These are left over from the original regex
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   that parsed the attribue, which couldn't conveniently strip them.
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        attValue.remove(0,1);                    // one char from the beginning
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        attValue.truncate(attValue.length()-1);  // and one from the end.
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // XML Attribue value normalization.
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // This is one of the really screwy parts of the XML spec.
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Note that non-validating parsers must treat all entities as type CDATA
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   which simplifies things some.
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Att normalization step 1:  normalize any newlines in the attribute value
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mNewLineNormalizer.reset(attValue);
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        attValue = mNewLineNormalizer.replaceAll(fOneLF, status);
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Next change all xml white space chars to plain \u0020 spaces.
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mAttrNormalizer.reset(attValue);
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString oneSpace((UChar)0x0020);
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        attValue = mAttrNormalizer.replaceAll(oneSpace, status);
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Replace character entities.
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        replaceCharRefs(attValue, status);
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Save the attribute name and value in our document structure.
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        el->fAttNames.addElement((void *)intern(attName, status), status);
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        el->fAttValues.addElement(attValue.clone(), status);
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = mAttrValue.end(2, status);
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fPos = mEl.end(0, status);
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return el;
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  parseMisc
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//     Consume XML "Misc" [production #27]
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//        which is any combination of space, PI and comments
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      Need to watch end-of-input because xml MISC stuff is allowed after
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//        the document element, so we WILL scan off the end in this function
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::parseMisc(UErrorCode &status)  {
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fPos >= mXMLPI.input().length()) {
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mXMLPI.lookingAt(fPos, status)) {
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fPos = mXMLPI.end(status);
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mXMLSP.lookingAt(fPos, status)) {
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fPos = mXMLSP.end(status);
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mXMLComment.lookingAt(fPos, status)) {
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fPos = mXMLComment.end(status);
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  Scan for document content.
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::scanContent(UErrorCode &status) {
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString  result;
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (mXMLCharData.lookingAt(fPos, status)) {
56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result = mXMLCharData.group((int32_t)0, status);
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Normalize the new-lines.  (Before char ref substitution)
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mNewLineNormalizer.reset(result);
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = mNewLineNormalizer.replaceAll(fOneLF, status);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // TODO:  handle CDATA
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPos = mXMLCharData.end(0, status);
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   replaceCharRefs
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      replace the char entities &lt;  &amp; &#123; &#x12ab; etc. in a string
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       with the corresponding actual character.
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString result;
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString replacement;
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int     i;
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mAmps.reset(s);
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // See the initialization for the regex matcher mAmps.
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Which entity we've matched is determined by which capture group has content,
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //      which is flaged by start() of that group not being -1.
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (mAmps.find()) {
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mAmps.start(1, status) != -1) {
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo((UChar)x_AMP);
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(2, status) != -1) {
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo((UChar)x_LT);
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(3, status) != -1) {
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo((UChar)x_GT);
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(4, status) != -1) {
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo((UChar)x_APOS);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(5, status) != -1) {
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo((UChar)x_QUOT);
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(6, status) != -1) {
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString hexString = mAmps.group(6, status);
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 val = 0;
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i=0; i<hexString.length(); i++) {
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                val = (val << 4) + u_digit(hexString.charAt(i), 16);
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // TODO:  some verification that the character is valid
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo(val);
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (mAmps.start(7, status) != -1) {
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString decimalString = mAmps.group(7, status);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 val = 0;
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i=0; i<decimalString.length(); i++) {
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                val = val*10 + u_digit(decimalString.charAt(i), 10);
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // TODO:  some verification that the character is valid
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            replacement.setTo(val);
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // An unrecognized &entity;  Leave it alone.
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  TODO:  check that it really looks like an entity, and is not some
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //         random & in the text.
62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            replacement = mAmps.group((int32_t)0, status);
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        mAmps.appendReplacement(result, replacement, status);
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    mAmps.appendTail(result);
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s = result;
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::error(const char *message, UErrorCode &status) {
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO:  something better here...
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString &src=mXMLDecl.input();
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int  line = 0;
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int  ci = 0;
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (ci < fPos && ci>=0) {
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ci = src.indexOf((UChar)0x0a, ci+1);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        line++;
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Error: %s at line %d\n", message, line);
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(status)) {
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_PARSE_ERROR;
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// intern strings like in Java
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString *
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) {
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UHashElement *he=fNames.find(s);
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(he!=NULL) {
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // already a known name, return its hashed key pointer
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (const UnicodeString *)he->key.pointer;
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // add this new name and return its hashed key pointer
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fNames.puti(s, 0, errorCode);
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        he=fNames.find(s);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (const UnicodeString *)he->key.pointer;
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString *
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLParser::findName(const UnicodeString &s) const {
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UHashElement *he=fNames.find(s);
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(he!=NULL) {
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // a known name, return its hashed key pointer
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (const UnicodeString *)he->key.pointer;
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // unknown name
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UXMLElement ------------------------------------------------------------- ***
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode) :
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fParser(parser),
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fName(name),
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fAttNames(errorCode),
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fAttValues(errorCode),
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fChildren(errorCode),
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   fParent(NULL)
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::~UXMLElement() {
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int   i;
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // attribute names are owned by the UXMLParser, don't delete them here
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=fAttValues.size()-1; i>=0; i--) {
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete (UObject *)fAttValues.elementAt(i);
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=fChildren.size()-1; i>=0; i--) {
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete (UObject *)fChildren.elementAt(i);
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString &
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getTagName() const {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *fName;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getText(UBool recurse) const {
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString text;
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    appendText(text, recurse);
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return text;
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::appendText(UnicodeString &text, UBool recurse) const {
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UObject *node;
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, count=fChildren.size();
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<count; ++i) {
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        node=(const UObject *)fChildren.elementAt(i);
72027f654740f2a26ad62a5c155af9199af9e69b889claireho        const UnicodeString *s=dynamic_cast<const UnicodeString *>(node);
72127f654740f2a26ad62a5c155af9199af9e69b889claireho        if(s!=NULL) {
72227f654740f2a26ad62a5c155af9199af9e69b889claireho            text.append(*s);
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(recurse) /* must be a UXMLElement */ {
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ((const UXMLElement *)node)->appendText(text, recurse);
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::countAttributes() const {
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return fAttNames.size();
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString *
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const {
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(0<=i && i<fAttNames.size()) {
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        name.setTo(*(const UnicodeString *)fAttNames.elementAt(i));
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        value.setTo(*(const UnicodeString *)fAttValues.elementAt(i));
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return &value; // or return (UnicodeString *)fAttValues.elementAt(i);
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString *
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getAttribute(const UnicodeString &name) const {
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // search for the attribute name by comparing the interned pointer,
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // not the string contents
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *p=fParser->findName(name);
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(p==NULL) {
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL; // no such attribute seen by the parser at all
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, count=fAttNames.size();
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<count; ++i) {
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(p==(const UnicodeString *)fAttNames.elementAt(i)) {
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return (const UnicodeString *)fAttValues.elementAt(i);
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::countChildren() const {
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return fChildren.size();
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UObject *
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getChild(int32_t i, UXMLNodeType &type) const {
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(0<=i && i<fChildren.size()) {
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UObject *node=(const UObject *)fChildren.elementAt(i);
77227f654740f2a26ad62a5c155af9199af9e69b889claireho        if(dynamic_cast<const UXMLElement *>(node)!=NULL) {
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            type=UXML_NODE_TYPE_ELEMENT;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            type=UXML_NODE_TYPE_STRING;
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return node;
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UXMLElement *
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::nextChildElement(int32_t &i) const {
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(i<0) {
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UObject *node;
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count=fChildren.size();
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i<count) {
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        node=(const UObject *)fChildren.elementAt(i++);
79327f654740f2a26ad62a5c155af9199af9e69b889claireho        const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);
79427f654740f2a26ad62a5c155af9199af9e69b889claireho        if(elem!=NULL) {
79527f654740f2a26ad62a5c155af9199af9e69b889claireho            return elem;
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UXMLElement *
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUXMLElement::getChildElement(const UnicodeString &name) const {
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // search for the element name by comparing the interned pointer,
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // not the string contents
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *p=fParser->findName(name);
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(p==NULL) {
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL; // no such element seen by the parser at all
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UObject *node;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, count=fChildren.size();
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<count; ++i) {
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        node=(const UObject *)fChildren.elementAt(i);
81427f654740f2a26ad62a5c155af9199af9e69b889claireho        const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);
81527f654740f2a26ad62a5c155af9199af9e69b889claireho        if(elem!=NULL) {
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(p==elem->fName) {
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return elem;
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
828