1/*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
6 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB.  If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#ifndef XMLTokenizer_h
26#define XMLTokenizer_h
27
28#if USE(EXPAT)
29
30#include "CachedResourceClient.h"
31#include "SegmentedString.h"
32#include "StringHash.h"
33#include "Tokenizer.h"
34#include <libexpat/expat.h>
35#include <wtf/HashMap.h>
36#include <wtf/OwnPtr.h>
37
38namespace WebCore {
39
40    class Node;
41    class CachedScript;
42    class DocumentFragment;
43    class Document;
44    class Element;
45    class FrameView;
46    class PendingCallbacks;
47
48    class XMLTokenizer : public Tokenizer, public CachedResourceClient {
49    public:
50        XMLTokenizer(Document *, FrameView * = 0);
51        XMLTokenizer(DocumentFragment *, Element *);
52        ~XMLTokenizer();
53
54        enum ErrorType { warning, nonFatal, fatal };
55
56        // from Tokenizer
57        virtual bool write(const SegmentedString &str, bool);
58        virtual void finish();
59        virtual bool isWaitingForScripts() const;
60        virtual void stopParsing();
61        virtual bool wellFormed() const { return !m_sawError; }
62        virtual int lineNumber() const;
63        virtual int columnNumber() const;
64
65        // from CachedObjectClient
66        virtual void notifyFinished(CachedResource *finishedObj);
67
68        // callbacks from parser expat
69        void startElementNs(const XML_Char *name, const XML_Char **atts);
70        void endElementNs();
71        void characters(const XML_Char *s, int len);
72        void processingInstruction(const XML_Char *target, const XML_Char *data);
73        void comment(const XML_Char *s);
74        void startCdata();
75        void endCdata();
76
77        void error(ErrorType type, const char* m, int lineNumber, int columnNumber);
78
79        // utilities
80        XML_Parser getXMLParser() const { return m_parser; }
81        void setXMLParser(XML_Parser parser) { m_parser = parser; }
82
83    private:
84        void setCurrentNode(Node*);
85
86        void end();
87
88        void pauseParsing();
89        void resumeParsing();
90
91        void reportError();
92        void insertErrorMessageBlock();
93
94        bool enterText();
95        void exitText();
96
97        Document *m_doc;
98        FrameView *m_view;
99
100        XML_Parser m_parser;
101
102        Node *m_currentNode;
103        bool m_currentNodeIsReferenced;
104
105        bool m_sawError;
106        bool m_sawXSLTransform;
107        bool m_sawFirstElement;
108
109        bool m_parserPaused;
110        bool m_requestingScript;
111        bool m_finishCalled;
112
113        int m_errorCount;
114        String m_errorMessages;
115
116        CachedScript *m_pendingScript;
117        RefPtr<Element> m_scriptElement;
118        int m_scriptStartLine;
119
120        bool m_parsingFragment;
121        String m_defaultNamespaceURI;
122
123        typedef HashMap<String, String> PrefixForNamespaceMap;
124        PrefixForNamespaceMap m_prefixToNamespaceMap;
125
126        OwnPtr<PendingCallbacks> m_pendingCallbacks;
127        SegmentedString m_pendingSrc;
128    };
129
130HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
131bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
132
133} // namespace WebCore
134
135#else   // USE(EXPAT)
136
137#include "CachedResourceClient.h"
138#include "CachedResourceHandle.h"
139#include "MappedAttributeEntry.h"
140#include "SegmentedString.h"
141#include "StringHash.h"
142#include "Tokenizer.h"
143#include <wtf/HashMap.h>
144#include <wtf/OwnPtr.h>
145
146#if USE(QXMLSTREAM)
147#include <qxmlstream.h>
148#else
149#include <libxml/tree.h>
150#include <libxml/xmlstring.h>
151#endif
152
153namespace WebCore {
154
155    class Node;
156    class CachedScript;
157    class DocLoader;
158    class DocumentFragment;
159    class Document;
160    class Element;
161    class FrameView;
162    class PendingCallbacks;
163    class ScriptElement;
164
165#if !USE(QXMLSTREAM)
166    class XMLParserContext : public RefCounted<XMLParserContext> {
167    public:
168        static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void*, const char*);
169        static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void*);
170        ~XMLParserContext();
171        xmlParserCtxtPtr context() const { return m_context; }
172
173    private:
174        XMLParserContext(xmlParserCtxtPtr context)
175            : m_context(context)
176        {
177        }
178        xmlParserCtxtPtr m_context;
179    };
180#endif
181
182    class XMLTokenizer : public Tokenizer, public CachedResourceClient {
183    public:
184        XMLTokenizer(Document*, FrameView* = 0);
185        XMLTokenizer(DocumentFragment*, Element*, FragmentScriptingPermission);
186        ~XMLTokenizer();
187
188        enum ErrorType { warning, nonFatal, fatal };
189
190        // from Tokenizer
191        virtual void write(const SegmentedString&, bool appendData);
192        virtual void finish();
193        virtual bool isWaitingForScripts() const;
194        virtual void stopParsing();
195
196        void end();
197
198        void pauseParsing();
199        void resumeParsing();
200
201        void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
202        bool isXHTMLDocument() const { return m_isXHTMLDocument; }
203#if ENABLE(XHTMLMP)
204        void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; }
205        bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; }
206#endif
207#if ENABLE(WML)
208        bool isWMLDocument() const;
209#endif
210
211        // from CachedResourceClient
212        virtual void notifyFinished(CachedResource* finishedObj);
213
214
215        void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber);
216
217        virtual bool wellFormed() const { return !m_sawError; }
218
219        int lineNumber() const;
220        int columnNumber() const;
221
222#if USE(QXMLSTREAM)
223private:
224        void parse();
225        void startDocument();
226        void parseStartElement();
227        void parseEndElement();
228        void parseCharacters();
229        void parseProcessingInstruction();
230        void parseCdata();
231        void parseComment();
232        void endDocument();
233        void parseDtd();
234        bool hasError() const;
235#else
236public:
237        // callbacks from parser SAX
238        void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
239        void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
240                            const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
241        void endElementNs();
242        void characters(const xmlChar* s, int len);
243        void processingInstruction(const xmlChar* target, const xmlChar* data);
244        void cdataBlock(const xmlChar* s, int len);
245        void comment(const xmlChar* s);
246        void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
247        void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
248        void endDocument();
249#endif
250    private:
251        friend bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element*, FragmentScriptingPermission);
252
253        void initializeParserContext(const char* chunk = 0);
254
255        void pushCurrentNode(Node*);
256        void popCurrentNode();
257        void clearCurrentNodeStack();
258
259        void insertErrorMessageBlock();
260
261        bool enterText();
262        void exitText();
263
264        void doWrite(const String&);
265        void doEnd();
266
267        Document* m_doc;
268        FrameView* m_view;
269
270        String m_originalSourceForTransform;
271
272#if USE(QXMLSTREAM)
273        QXmlStreamReader m_stream;
274        bool m_wroteText;
275#else
276        xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; };
277        RefPtr<XMLParserContext> m_context;
278        OwnPtr<PendingCallbacks> m_pendingCallbacks;
279        Vector<xmlChar> m_bufferedText;
280#endif
281        Node* m_currentNode;
282        Vector<Node*> m_currentNodeStack;
283
284        bool m_sawError;
285        bool m_sawXSLTransform;
286        bool m_sawFirstElement;
287        bool m_isXHTMLDocument;
288#if ENABLE(XHTMLMP)
289        bool m_isXHTMLMPDocument;
290        bool m_hasDocTypeDeclaration;
291#endif
292
293        bool m_parserPaused;
294        bool m_requestingScript;
295        bool m_finishCalled;
296
297        int m_errorCount;
298        int m_lastErrorLine;
299        int m_lastErrorColumn;
300        String m_errorMessages;
301
302        CachedResourceHandle<CachedScript> m_pendingScript;
303        RefPtr<Element> m_scriptElement;
304        int m_scriptStartLine;
305
306        bool m_parsingFragment;
307        String m_defaultNamespaceURI;
308
309        typedef HashMap<String, String> PrefixForNamespaceMap;
310        PrefixForNamespaceMap m_prefixToNamespaceMap;
311        SegmentedString m_pendingSrc;
312        FragmentScriptingPermission m_scriptingPermission;
313    };
314
315#if ENABLE(XSLT)
316void* xmlDocPtrForString(DocLoader*, const String& source, const String& url);
317#endif
318
319HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
320bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed);
321
322} // namespace WebCore
323
324#endif // USE(EXPAT)
325
326#endif // XMLTokenizer_h
327