1/* 2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) 3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) 5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 6 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25#ifndef XMLTokenizer_h 26#define XMLTokenizer_h 27 28#if USE(EXPAT) 29 30#include "CachedResourceClient.h" 31#include "SegmentedString.h" 32#include "StringHash.h" 33#include "Tokenizer.h" 34#include <libexpat/expat.h> 35#include <wtf/HashMap.h> 36#include <wtf/OwnPtr.h> 37 38namespace WebCore { 39 40 class Node; 41 class CachedScript; 42 class DocumentFragment; 43 class Document; 44 class Element; 45 class FrameView; 46 class PendingCallbacks; 47 48 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 49 public: 50 XMLTokenizer(Document *, FrameView * = 0); 51 XMLTokenizer(DocumentFragment *, Element *); 52 ~XMLTokenizer(); 53 54 enum ErrorType { warning, nonFatal, fatal }; 55 56 // from Tokenizer 57 virtual bool write(const SegmentedString &str, bool); 58 virtual void finish(); 59 virtual bool isWaitingForScripts() const; 60 virtual void stopParsing(); 61 virtual bool wellFormed() const { return !m_sawError; } 62 virtual int lineNumber() const; 63 virtual int columnNumber() const; 64 65 // from CachedObjectClient 66 virtual void notifyFinished(CachedResource *finishedObj); 67 68 // callbacks from parser expat 69 void startElementNs(const XML_Char *name, const XML_Char **atts); 70 void endElementNs(); 71 void characters(const XML_Char *s, int len); 72 void processingInstruction(const XML_Char *target, const XML_Char *data); 73 void comment(const XML_Char *s); 74 void startCdata(); 75 void endCdata(); 76 77 void error(ErrorType type, const char* m, int lineNumber, int columnNumber); 78 79 // utilities 80 XML_Parser getXMLParser() const { return m_parser; } 81 void setXMLParser(XML_Parser parser) { m_parser = parser; } 82 83 private: 84 void setCurrentNode(Node*); 85 86 void end(); 87 88 void pauseParsing(); 89 void resumeParsing(); 90 91 void reportError(); 92 void insertErrorMessageBlock(); 93 94 bool enterText(); 95 void exitText(); 96 97 Document *m_doc; 98 FrameView *m_view; 99 100 XML_Parser m_parser; 101 102 Node *m_currentNode; 103 bool m_currentNodeIsReferenced; 104 105 bool m_sawError; 106 bool m_sawXSLTransform; 107 bool m_sawFirstElement; 108 109 bool m_parserPaused; 110 bool m_requestingScript; 111 bool m_finishCalled; 112 113 int m_errorCount; 114 String m_errorMessages; 115 116 CachedScript *m_pendingScript; 117 RefPtr<Element> m_scriptElement; 118 int m_scriptStartLine; 119 120 bool m_parsingFragment; 121 String m_defaultNamespaceURI; 122 123 typedef HashMap<String, String> PrefixForNamespaceMap; 124 PrefixForNamespaceMap m_prefixToNamespaceMap; 125 126 OwnPtr<PendingCallbacks> m_pendingCallbacks; 127 SegmentedString m_pendingSrc; 128 }; 129 130HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 131bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 132 133} // namespace WebCore 134 135#else // USE(EXPAT) 136 137#include "CachedResourceClient.h" 138#include "CachedResourceHandle.h" 139#include "MappedAttributeEntry.h" 140#include "SegmentedString.h" 141#include "StringHash.h" 142#include "Tokenizer.h" 143#include <wtf/HashMap.h> 144#include <wtf/OwnPtr.h> 145 146#if USE(QXMLSTREAM) 147#include <qxmlstream.h> 148#else 149#include <libxml/tree.h> 150#include <libxml/xmlstring.h> 151#endif 152 153namespace WebCore { 154 155 class Node; 156 class CachedScript; 157 class DocLoader; 158 class DocumentFragment; 159 class Document; 160 class Element; 161 class FrameView; 162 class PendingCallbacks; 163 class ScriptElement; 164 165#if !USE(QXMLSTREAM) 166 class XMLParserContext : public RefCounted<XMLParserContext> { 167 public: 168 static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void*, const char*); 169 static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void*); 170 ~XMLParserContext(); 171 xmlParserCtxtPtr context() const { return m_context; } 172 173 private: 174 XMLParserContext(xmlParserCtxtPtr context) 175 : m_context(context) 176 { 177 } 178 xmlParserCtxtPtr m_context; 179 }; 180#endif 181 182 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 183 public: 184 XMLTokenizer(Document*, FrameView* = 0); 185 XMLTokenizer(DocumentFragment*, Element*, FragmentScriptingPermission); 186 ~XMLTokenizer(); 187 188 enum ErrorType { warning, nonFatal, fatal }; 189 190 // from Tokenizer 191 virtual void write(const SegmentedString&, bool appendData); 192 virtual void finish(); 193 virtual bool isWaitingForScripts() const; 194 virtual void stopParsing(); 195 196 void end(); 197 198 void pauseParsing(); 199 void resumeParsing(); 200 201 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; } 202 bool isXHTMLDocument() const { return m_isXHTMLDocument; } 203#if ENABLE(XHTMLMP) 204 void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; } 205 bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; } 206#endif 207#if ENABLE(WML) 208 bool isWMLDocument() const; 209#endif 210 211 // from CachedResourceClient 212 virtual void notifyFinished(CachedResource* finishedObj); 213 214 215 void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber); 216 217 virtual bool wellFormed() const { return !m_sawError; } 218 219 int lineNumber() const; 220 int columnNumber() const; 221 222#if USE(QXMLSTREAM) 223private: 224 void parse(); 225 void startDocument(); 226 void parseStartElement(); 227 void parseEndElement(); 228 void parseCharacters(); 229 void parseProcessingInstruction(); 230 void parseCdata(); 231 void parseComment(); 232 void endDocument(); 233 void parseDtd(); 234 bool hasError() const; 235#else 236public: 237 // callbacks from parser SAX 238 void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 239 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 240 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes); 241 void endElementNs(); 242 void characters(const xmlChar* s, int len); 243 void processingInstruction(const xmlChar* target, const xmlChar* data); 244 void cdataBlock(const xmlChar* s, int len); 245 void comment(const xmlChar* s); 246 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 247 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 248 void endDocument(); 249#endif 250 private: 251 friend bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element*, FragmentScriptingPermission); 252 253 void initializeParserContext(const char* chunk = 0); 254 255 void pushCurrentNode(Node*); 256 void popCurrentNode(); 257 void clearCurrentNodeStack(); 258 259 void insertErrorMessageBlock(); 260 261 bool enterText(); 262 void exitText(); 263 264 void doWrite(const String&); 265 void doEnd(); 266 267 Document* m_doc; 268 FrameView* m_view; 269 270 String m_originalSourceForTransform; 271 272#if USE(QXMLSTREAM) 273 QXmlStreamReader m_stream; 274 bool m_wroteText; 275#else 276 xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; }; 277 RefPtr<XMLParserContext> m_context; 278 OwnPtr<PendingCallbacks> m_pendingCallbacks; 279 Vector<xmlChar> m_bufferedText; 280#endif 281 Node* m_currentNode; 282 Vector<Node*> m_currentNodeStack; 283 284 bool m_sawError; 285 bool m_sawXSLTransform; 286 bool m_sawFirstElement; 287 bool m_isXHTMLDocument; 288#if ENABLE(XHTMLMP) 289 bool m_isXHTMLMPDocument; 290 bool m_hasDocTypeDeclaration; 291#endif 292 293 bool m_parserPaused; 294 bool m_requestingScript; 295 bool m_finishCalled; 296 297 int m_errorCount; 298 int m_lastErrorLine; 299 int m_lastErrorColumn; 300 String m_errorMessages; 301 302 CachedResourceHandle<CachedScript> m_pendingScript; 303 RefPtr<Element> m_scriptElement; 304 int m_scriptStartLine; 305 306 bool m_parsingFragment; 307 String m_defaultNamespaceURI; 308 309 typedef HashMap<String, String> PrefixForNamespaceMap; 310 PrefixForNamespaceMap m_prefixToNamespaceMap; 311 SegmentedString m_pendingSrc; 312 FragmentScriptingPermission m_scriptingPermission; 313 }; 314 315#if ENABLE(XSLT) 316void* xmlDocPtrForString(DocLoader*, const String& source, const String& url); 317#endif 318 319HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 320bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed); 321 322} // namespace WebCore 323 324#endif // USE(EXPAT) 325 326#endif // XMLTokenizer_h 327