1/**
2 * This file is part of the DOM implementation for KDE.
3 *
4 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
5 * Copyright (C) 2005, 2006 Apple Computer, Inc.
6 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
7 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
8 * Copyright (C) 2007 The Android Open Source Project
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB.  If not, write to
22 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
24 */
25
26#include "config.h"
27#include "XMLTokenizer.h"
28
29#include "CDATASection.h"
30#include "CachedScript.h"
31#include "Comment.h"
32#include "CString.h"
33#include "DocLoader.h"
34#include "Document.h"
35#include "DocumentFragment.h"
36#include "Frame.h"
37#include "FrameLoader.h"
38#include "FrameView.h"
39#include "HTMLNames.h"
40#include "HTMLScriptElement.h"
41#include "HTMLTableSectionElement.h"
42#include "HTMLTokenizer.h"
43#include "ProcessingInstruction.h"
44#include "EventNames.h"
45
46// strndup is not available everywhere, so here is a portable version <reed>
47static char* portable_strndup(const char src[], size_t len)
48{
49    char* origDst = (char*)malloc(len + 1);
50    if (NULL == origDst)
51        return NULL;
52
53    char* dst = origDst;
54    while (len-- > 0) {
55        if ((*dst++ = *src++) == 0)
56            return origDst;
57    }
58    *dst = 0;
59    return origDst;
60}
61
62namespace WebCore {
63
64using namespace EventNames;
65using namespace HTMLNames;
66
67const int maxErrors = 25;
68
69class PendingCallbacks {
70public:
71    PendingCallbacks() {
72        m_callbacks.setAutoDelete(true);
73    }
74
75    void appendStartElementNSCallback(const XML_Char* name, const XML_Char** atts) {
76        PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
77
78        callback->name = strdup(name);
79        callback->count = 0;
80        while (atts[callback->count])
81            callback->count++;
82        callback->atts = (XML_Char**)malloc(sizeof(XML_Char*) * (callback->count+1));
83        for (int i=0; i<callback->count; i++)
84            callback->atts[i] = strdup(atts[i]);
85        callback->atts[callback->count] = NULL;
86
87        m_callbacks.append(callback);
88    }
89
90    void appendEndElementNSCallback() {
91        PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
92
93        m_callbacks.append(callback);
94    }
95
96    void appendCharactersCallback(const XML_Char* s, int len) {
97        PendingCharactersCallback* callback = new PendingCharactersCallback;
98
99        callback->s = portable_strndup(s, len);
100        callback->len = len;
101
102        m_callbacks.append(callback);
103    }
104
105    void appendProcessingInstructionCallback(const XML_Char* target, const XML_Char* data) {
106        PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
107
108        callback->target = strdup(target);
109        callback->data = strdup(data);
110
111        m_callbacks.append(callback);
112    }
113
114    void appendStartCDATABlockCallback() {
115        PendingStartCDATABlockCallback* callback = new PendingStartCDATABlockCallback;
116
117        m_callbacks.append(callback);
118    }
119
120    void appendEndCDATABlockCallback() {
121        PendingEndCDATABlockCallback* callback = new PendingEndCDATABlockCallback;
122
123        m_callbacks.append(callback);
124    }
125
126    void appendCommentCallback(const XML_Char* s) {
127        PendingCommentCallback* callback = new PendingCommentCallback;
128
129        callback->s = strdup(s);
130
131        m_callbacks.append(callback);
132    }
133
134    void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber) {
135        PendingErrorCallback* callback = new PendingErrorCallback;
136
137        callback->message = strdup(message);
138        callback->type = type;
139        callback->lineNumber = lineNumber;
140        callback->columnNumber = columnNumber;
141
142        m_callbacks.append(callback);
143    }
144
145    void callAndRemoveFirstCallback(XMLTokenizer* tokenizer) {
146        PendingCallback* cb = m_callbacks.getFirst();
147
148        cb->call(tokenizer);
149        m_callbacks.removeFirst();
150    }
151
152    bool isEmpty() const { return m_callbacks.isEmpty(); }
153
154private:
155    struct PendingCallback {
156
157        virtual ~PendingCallback() { }
158
159        virtual void call(XMLTokenizer* tokenizer) = 0;
160    };
161
162    struct PendingStartElementNSCallback : public PendingCallback {
163        virtual ~PendingStartElementNSCallback() {
164            free(name);
165            for (int i=0; i<count; i++)
166                free(atts[i]);
167            free(atts);
168        }
169
170        virtual void call(XMLTokenizer* tokenizer) {
171            tokenizer->startElementNs(name, (const XML_Char**)(atts));
172        }
173
174        XML_Char* name;
175        int count;
176        XML_Char** atts;
177    };
178
179    struct PendingEndElementNSCallback : public PendingCallback {
180        virtual void call(XMLTokenizer* tokenizer) {
181            tokenizer->endElementNs();
182        }
183    };
184
185    struct PendingCharactersCallback : public PendingCallback {
186        virtual ~PendingCharactersCallback() {
187            free(s);
188        }
189
190        virtual void call(XMLTokenizer* tokenizer) {
191            tokenizer->characters(s, len);
192        }
193
194        XML_Char* s;
195        int len;
196    };
197
198    struct PendingProcessingInstructionCallback : public PendingCallback {
199        virtual ~PendingProcessingInstructionCallback() {
200            free(target);
201            free(data);
202        }
203
204        virtual void call(XMLTokenizer* tokenizer) {
205            tokenizer->processingInstruction(target, data);
206        }
207
208        XML_Char* target;
209        XML_Char* data;
210    };
211
212    struct PendingStartCDATABlockCallback : public PendingCallback {
213        virtual void call(XMLTokenizer* tokenizer) {
214            tokenizer->startCdata();
215        }
216    };
217
218    struct PendingEndCDATABlockCallback : public PendingCallback {
219        virtual void call(XMLTokenizer* tokenizer) {
220            tokenizer->endCdata();
221        }
222    };
223
224    struct PendingCommentCallback : public PendingCallback {
225        virtual ~PendingCommentCallback() {
226            free(s);
227        }
228
229        virtual void call(XMLTokenizer* tokenizer) {
230            tokenizer->comment(s);
231        }
232
233        XML_Char* s;
234    };
235
236    struct PendingErrorCallback: public PendingCallback {
237        virtual ~PendingErrorCallback() {
238            free (message);
239        }
240
241        virtual void call(XMLTokenizer* tokenizer) {
242            tokenizer->error(type, message, lineNumber, columnNumber);
243        }
244
245        XMLTokenizer::ErrorType type;
246        char* message;
247        int lineNumber;
248        int columnNumber;
249    };
250
251public:
252    DeprecatedPtrList<PendingCallback> m_callbacks;
253};
254
255// --------------------------------
256
257XMLTokenizer::XMLTokenizer(Document *_doc, FrameView *_view)
258    : m_doc(_doc)
259    , m_view(_view)
260    , m_parser(0)
261    , m_currentNode(_doc)
262    , m_currentNodeIsReferenced(false)
263    , m_sawError(false)
264    , m_sawXSLTransform(false)
265    , m_sawFirstElement(false)
266    , m_parserPaused(false)
267    , m_requestingScript(false)
268    , m_finishCalled(false)
269    , m_errorCount(0)
270    , m_pendingScript(0)
271    , m_scriptStartLine(0)
272    , m_parsingFragment(false)
273    , m_pendingCallbacks(new PendingCallbacks)
274{
275}
276
277XMLTokenizer::XMLTokenizer(DocumentFragment *fragment, Element *parentElement)
278    : m_doc(fragment->document())
279    , m_view(0)
280    , m_parser(0)
281    , m_currentNode(fragment)
282    , m_currentNodeIsReferenced(fragment)
283    , m_sawError(false)
284    , m_sawXSLTransform(false)
285    , m_sawFirstElement(false)
286    , m_parserPaused(false)
287    , m_requestingScript(false)
288    , m_finishCalled(false)
289    , m_errorCount(0)
290    , m_pendingScript(0)
291    , m_scriptStartLine(0)
292    , m_parsingFragment(true)
293    , m_pendingCallbacks(new PendingCallbacks)
294{
295    if (fragment)
296        fragment->ref();
297    if (m_doc)
298        m_doc->ref();
299
300    // Add namespaces based on the parent node
301    Vector<Element*> elemStack;
302    while (parentElement) {
303        elemStack.append(parentElement);
304
305        Node* n = parentElement->parentNode();
306        if (!n || !n->isElementNode())
307            break;
308        parentElement = static_cast<Element*>(n);
309    }
310
311    if (elemStack.isEmpty())
312        return;
313
314    for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
315        if (NamedAttrMap* attrs = element->attributes()) {
316            for (unsigned i = 0; i < attrs->length(); i++) {
317                Attribute* attr = attrs->attributeItem(i);
318                if (attr->localName() == "xmlns")
319                    m_defaultNamespaceURI = attr->value();
320                else if (attr->prefix() == "xmlns")
321                    m_prefixToNamespaceMap.set(attr->localName(), attr->value());
322            }
323        }
324    }
325}
326
327XMLTokenizer::~XMLTokenizer()
328{
329    setCurrentNode(0);
330    if (m_parsingFragment && m_doc)
331        m_doc->deref();
332    if (m_pendingScript)
333        m_pendingScript->deref(this);
334}
335
336void XMLTokenizer::setCurrentNode(Node* n)
337{
338    bool nodeNeedsReference = n && n != m_doc;
339    if (nodeNeedsReference)
340        n->ref();
341    if (m_currentNodeIsReferenced)
342        m_currentNode->deref();
343    m_currentNode = n;
344    m_currentNodeIsReferenced = nodeNeedsReference;
345}
346
347// use space instead of ':' as separator because ':' can be inside an uri
348const XML_Char tripletSep=' ';
349
350inline DeprecatedString toQString(const XML_Char* str, unsigned int len)
351{
352    return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
353}
354
355inline DeprecatedString toQString(const XML_Char* str)
356{
357    return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
358}
359
360// triplet is formatted as URI + sep + local_name + sep + prefix.
361static inline void splitTriplet(const XML_Char *name, String &uri, String &localname, String &prefix)
362{
363    String string[3];
364    int found = 0;
365    const char *start = reinterpret_cast<const char *>(name);
366
367    while(start && (found < 3)) {
368        char *next = strchr(start, tripletSep);
369        if (next) {
370            string[found++] = toQString(start, (next-start));
371            start = next+1;
372        } else {
373            string[found++] = toQString(start);
374            break;
375        }
376    }
377
378    switch(found) {
379    case 1:
380        localname = string[0];
381        break;
382    case 2:
383        uri = string[0];
384        localname = string[1];
385        break;
386    case 3:
387        uri = string[0];
388        localname = string[1];
389        prefix = string[2];
390        break;
391    }
392}
393
394static inline void handleElementNamespaces(Element *newElement, const String &uri, const String &prefix, ExceptionCode &exceptioncode)
395{
396    if (uri.isEmpty())
397        return;
398
399    String namespaceQName("xmlns");
400    if(!prefix.isEmpty())
401        namespaceQName += String(":")+ prefix;
402    newElement->setAttributeNS(String("http://www.w3.org/2000/xmlns/"), namespaceQName, uri, exceptioncode);
403}
404
405static inline void handleElementAttributes(Element *newElement, const XML_Char **atts, ExceptionCode &exceptioncode)
406{
407    for (int i = 0; atts[i]; i += 2) {
408        String attrURI, attrLocalName, attrPrefix;
409        splitTriplet(atts[i], attrURI, attrLocalName, attrPrefix);
410        String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + String(":") + attrLocalName;
411        String attrValue = toQString(atts[i+1]);
412        newElement->setAttributeNS(attrURI, attrQName, attrValue, exceptioncode);
413        if (exceptioncode) // exception while setting attributes
414            return;
415    }
416}
417
418void XMLTokenizer::startElementNs(const XML_Char *name, const XML_Char **atts)
419{
420    if (m_parserStopped)
421        return;
422
423    if (m_parserPaused) {
424        m_pendingCallbacks->appendStartElementNSCallback(name, atts);
425        return;
426    }
427
428    m_sawFirstElement = true;
429
430    exitText();
431
432    String uri, localName, prefix;
433    splitTriplet(name, uri, localName, prefix);
434    String qName = prefix.isEmpty() ? localName : prefix + ":" + localName;
435
436    if (m_parsingFragment && uri.isEmpty()) {
437        if (!prefix.isEmpty())
438            uri = String(m_prefixToNamespaceMap.get(prefix.impl()));
439        else
440            uri = m_defaultNamespaceURI;
441    }
442
443    ExceptionCode ec = 0;
444    RefPtr<Element> newElement = m_doc->createElementNS(uri, qName, ec);
445    if (!newElement) {
446        stopParsing();
447        return;
448    }
449
450    handleElementNamespaces(newElement.get(), uri, prefix, ec);
451    if (ec) {
452        stopParsing();
453        return;
454    }
455
456    handleElementAttributes(newElement.get(), atts, ec);
457    if (ec) {
458        stopParsing();
459        return;
460    }
461
462    if (newElement->hasTagName(scriptTag))
463        static_cast<HTMLScriptElement*>(newElement.get())->setCreatedByParser(true);
464
465    if (newElement->hasTagName(HTMLNames::scriptTag))
466        m_scriptStartLine = lineNumber();
467
468    if (!m_currentNode->addChild(newElement.get())) {
469        stopParsing();
470        return;
471    }
472
473    setCurrentNode(newElement.get());
474    if (m_view && !newElement->attached())
475        newElement->attach();
476}
477
478void XMLTokenizer::endElementNs()
479{
480    if (m_parserStopped)
481        return;
482
483    if (m_parserPaused) {
484        m_pendingCallbacks->appendEndElementNSCallback();
485        return;
486    }
487
488    exitText();
489
490    Node* n = m_currentNode;
491    RefPtr<Node> parent = n->parentNode();
492    n->finishedParsing();
493
494    // don't load external scripts for standalone documents (for now)
495    if (n->isElementNode() && m_view && static_cast<Element*>(n)->hasTagName(scriptTag)) {
496        ASSERT(!m_pendingScript);
497
498        m_requestingScript = true;
499
500        Element* scriptElement = static_cast<Element*>(n);
501        String scriptHref;
502
503        if (static_cast<Element*>(n)->hasTagName(scriptTag))
504            scriptHref = scriptElement->getAttribute(srcAttr);
505
506        if (!scriptHref.isEmpty()) {
507            // we have a src attribute
508            const AtomicString& charset = scriptElement->getAttribute(charsetAttr);
509            if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) {
510                m_scriptElement = scriptElement;
511                m_pendingScript->ref(this);
512
513                // m_pendingScript will be 0 if script was already loaded and ref() executed it
514                if (m_pendingScript)
515                    pauseParsing();
516            } else
517                m_scriptElement = 0;
518
519        } else {
520            String scriptCode = "";
521            for (Node* child = scriptElement->firstChild(); child; child = child->nextSibling()) {
522                if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE)
523                    scriptCode += static_cast<CharacterData*>(child)->data();
524            }
525            m_view->frame()->loader()->executeScript(m_doc->URL(), m_scriptStartLine - 1, scriptCode);
526        }
527
528        m_requestingScript = false;
529    }
530
531    setCurrentNode(parent.get());
532}
533
534void XMLTokenizer::characters(const XML_Char *s, int len)
535{
536    if (m_parserStopped)
537        return;
538
539    if (m_parserPaused) {
540        m_pendingCallbacks->appendCharactersCallback(s, len);
541        return;
542    }
543
544    if (m_currentNode->isTextNode() || enterText()) {
545        ExceptionCode ec = 0;
546        static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec);
547    }
548}
549
550bool XMLTokenizer::enterText()
551{
552    RefPtr<Node> newNode = new Text(m_doc, "");
553    if (!m_currentNode->addChild(newNode.get()))
554        return false;
555    setCurrentNode(newNode.get());
556    return true;
557}
558
559void XMLTokenizer::exitText()
560{
561    if (m_parserStopped)
562        return;
563
564    if (!m_currentNode || !m_currentNode->isTextNode())
565        return;
566
567    if (m_view && m_currentNode && !m_currentNode->attached())
568        m_currentNode->attach();
569
570    // FIXME: What's the right thing to do if the parent is really 0?
571    // Just leaving the current node set to the text node doesn't make much sense.
572    if (Node* par = m_currentNode->parentNode())
573        setCurrentNode(par);
574}
575
576void XMLTokenizer::processingInstruction(const XML_Char *target, const XML_Char *data)
577{
578    if (m_parserStopped)
579        return;
580
581    if (m_parserPaused) {
582        m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
583        return;
584    }
585
586    exitText();
587
588    // ### handle exceptions
589    int exception = 0;
590    RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
591        toQString(target), toQString(data), exception);
592    if (exception)
593        return;
594
595    if (!m_currentNode->addChild(pi.get()))
596        return;
597    if (m_view && !pi->attached())
598        pi->attach();
599
600    // don't load stylesheets for standalone documents
601    if (m_doc->frame()) {
602        m_sawXSLTransform = !m_sawFirstElement && !pi->checkStyleSheet();
603        if (m_sawXSLTransform)
604            stopParsing();
605    }
606}
607
608void XMLTokenizer::comment(const XML_Char *s)
609{
610    if (m_parserStopped)
611        return;
612
613    if (m_parserPaused) {
614        m_pendingCallbacks->appendCommentCallback(s);
615        return;
616    }
617
618    exitText();
619
620    RefPtr<Node> newNode = m_doc->createComment(toQString(s));
621    m_currentNode->addChild(newNode.get());
622    if (m_view && !newNode->attached())
623        newNode->attach();
624}
625
626void XMLTokenizer::startCdata()
627{
628    if (m_parserStopped)
629        return;
630
631    if (m_parserPaused) {
632        m_pendingCallbacks->appendStartCDATABlockCallback();
633        return;
634    }
635
636    exitText();
637
638    RefPtr<Node> newNode = new CDATASection(m_doc, "");
639    if (!m_currentNode->addChild(newNode.get()))
640        return;
641    if (m_view && !newNode->attached())
642        newNode->attach();
643    setCurrentNode(newNode.get());
644}
645
646void XMLTokenizer::endCdata()
647{
648    if (m_parserStopped)
649        return;
650
651    if (m_parserPaused) {
652        m_pendingCallbacks->appendEndCDATABlockCallback();
653        return;
654    }
655
656    if (m_currentNode->parentNode() != 0)
657        setCurrentNode(m_currentNode->parentNode());
658}
659
660static void XMLCALL startElementHandler(void *userdata, const XML_Char *name, const XML_Char **atts)
661{
662    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
663    tokenizer->startElementNs(name, atts);
664}
665
666static void XMLCALL endElementHandler(void *userdata, const XML_Char *name)
667{
668    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
669    tokenizer->endElementNs();
670}
671
672static void charactersHandler(void *userdata, const XML_Char *s, int len)
673{
674    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
675    tokenizer->characters(s, len);
676}
677
678static void processingInstructionHandler(void *userdata, const XML_Char *target, const XML_Char *data)
679{
680    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
681    tokenizer->processingInstruction(target, data);
682}
683
684static void commentHandler(void *userdata, const XML_Char *comment)
685{
686    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
687    tokenizer->comment(comment);
688}
689
690static void startCdataHandler(void *userdata)
691{
692    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
693    tokenizer->startCdata();
694}
695
696static void endCdataHandler(void *userdata)
697{
698    XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
699    tokenizer->endCdata();
700}
701
702static int unknownEncodingHandler(void *userdata, const XML_Char *name, XML_Encoding *info)
703{
704    // Expat doesn't like latin1 so we have to build this map
705    // to do conversion correctly.
706    // FIXME: Create a wrapper for expat that looks like libxml.
707    if (strcasecmp(name, "latin1") == 0)
708    {
709        for (int i=0; i<256; i++) {
710            info->map[i] = i;
711        }
712        return XML_STATUS_OK;
713    }
714    return XML_STATUS_ERROR;
715}
716
717bool XMLTokenizer::write(const SegmentedString&s, bool /*appendData*/ )
718{
719    String parseString = s.toString();
720
721    if (m_parserStopped || m_sawXSLTransform)
722        return false;
723
724    if (m_parserPaused) {
725        m_pendingSrc.append(s);
726        return false;
727    }
728
729    if (!m_parser) {
730        static const UChar BOM = 0xFEFF;
731        static const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
732        m_parser = XML_ParserCreateNS(BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", tripletSep);
733        XML_SetUserData(m_parser, (void *)this);
734        XML_SetReturnNSTriplet(m_parser, true);
735
736        XML_SetStartElementHandler(m_parser, startElementHandler);
737        XML_SetEndElementHandler(m_parser, endElementHandler);
738        XML_SetCharacterDataHandler(m_parser, charactersHandler);
739        XML_SetProcessingInstructionHandler(m_parser, processingInstructionHandler);
740        XML_SetCommentHandler(m_parser, commentHandler);
741        XML_SetStartCdataSectionHandler(m_parser, startCdataHandler);
742        XML_SetEndCdataSectionHandler(m_parser, endCdataHandler);
743        XML_SetUnknownEncodingHandler(m_parser, unknownEncodingHandler, NULL);
744    }
745
746    enum XML_Status result = XML_Parse(m_parser, (const char*)parseString.characters(), sizeof(UChar) * parseString.length(), false);
747    if (result == XML_STATUS_ERROR) {
748        reportError();
749        return false;
750    }
751
752    return true;
753}
754
755void XMLTokenizer::end()
756{
757    if (m_parser) {
758        XML_Parse(m_parser, 0, 0, true);
759        XML_ParserFree(m_parser);
760        m_parser = 0;
761    }
762
763    if (m_sawError)
764        insertErrorMessageBlock();
765    else {
766        exitText();
767        m_doc->updateStyleSelector();
768    }
769
770    setCurrentNode(0);
771    m_doc->finishedParsing();
772}
773
774void XMLTokenizer::finish()
775{
776    if (m_parserPaused)
777        m_finishCalled = true;
778    else
779        end();
780}
781
782void XMLTokenizer::reportError()
783{
784    ErrorType type = nonFatal;
785    enum XML_Error code = XML_GetErrorCode(m_parser);
786    switch (code) {
787        case XML_ERROR_NO_MEMORY:
788            type = fatal;
789            break;
790        case XML_ERROR_FINISHED:
791            type = warning;
792            break;
793        default:
794            type = nonFatal;
795    }
796    error(type, XML_ErrorString(code), lineNumber(), columnNumber());
797}
798
799void XMLTokenizer::error(ErrorType type, const char* m, int lineNumber, int columnNumber)
800{
801    if (type == fatal || m_errorCount < maxErrors) {
802        switch (type) {
803            case warning:
804                m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m);
805                break;
806            case fatal:
807            case nonFatal:
808                m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m);
809        }
810        ++m_errorCount;
811    }
812
813    if (type != warning)
814        m_sawError = true;
815
816    if (type == fatal)
817        stopParsing();
818}
819
820static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
821{
822    ExceptionCode ec = 0;
823    RefPtr<Element> reportElement = doc->createElementNS(xhtmlNamespaceURI, "parsererror", ec);
824    reportElement->setAttribute(styleAttr, "display:block; pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
825
826    RefPtr<Element> h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
827    reportElement->appendChild(h3.get(), ec);
828    h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
829
830    RefPtr<Element> fixed = doc->createElementNS(xhtmlNamespaceURI, "div", ec);
831    reportElement->appendChild(fixed.get(), ec);
832    fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
833    fixed->appendChild(doc->createTextNode(errorMessages), ec);
834
835    h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
836    reportElement->appendChild(h3.get(), ec);
837    h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
838
839    return reportElement;
840}
841
842void XMLTokenizer::insertErrorMessageBlock()
843{
844    // One or more errors occurred during parsing of the code. Display an error block to the user above
845    // the normal content (the DOM tree is created manually and includes line/col info regarding
846    // where the errors are located)
847
848    // Create elements for display
849    ExceptionCode ec = 0;
850    Document* doc = m_doc;
851    Node* documentElement = doc->documentElement();
852    if (!documentElement) {
853        RefPtr<Node> rootElement = doc->createElementNS(xhtmlNamespaceURI, "html", ec);
854        doc->appendChild(rootElement, ec);
855        RefPtr<Node> body = doc->createElementNS(xhtmlNamespaceURI, "body", ec);
856        rootElement->appendChild(body, ec);
857        documentElement = body.get();
858    }
859
860    RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages);
861    documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
862    doc->updateRendering();
863}
864
865void XMLTokenizer::notifyFinished(CachedResource *finishedObj)
866{
867    ASSERT(m_pendingScript == finishedObj);
868
869    String cachedScriptUrl = m_pendingScript->url();
870    String scriptSource = m_pendingScript->script();
871    bool errorOccurred = m_pendingScript->errorOccurred();
872    m_pendingScript->deref(this);
873    m_pendingScript = 0;
874
875    RefPtr<Element> e = m_scriptElement;
876    m_scriptElement = 0;
877
878    if (errorOccurred)
879        EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false);
880    else {
881        m_view->frame()->loader()->executeScript(cachedScriptUrl, 0, scriptSource);
882        EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false);
883    }
884
885    m_scriptElement = 0;
886
887    if (!m_requestingScript)
888        resumeParsing();
889}
890
891bool XMLTokenizer::isWaitingForScripts() const
892{
893    return m_pendingScript != 0;
894}
895
896Tokenizer *newXMLTokenizer(Document *d, FrameView *v)
897{
898    return new XMLTokenizer(d, v);
899}
900
901int XMLTokenizer::lineNumber() const
902{
903    return XML_GetCurrentLineNumber(m_parser);
904}
905
906int XMLTokenizer::columnNumber() const
907{
908    return XML_GetCurrentColumnNumber(m_parser);
909}
910
911void XMLTokenizer::stopParsing()
912{
913    Tokenizer::stopParsing();
914    if (m_parser)
915        XML_StopParser(m_parser, 0);
916}
917
918void XMLTokenizer::pauseParsing()
919{
920    if (m_parsingFragment)
921        return;
922
923    m_parserPaused = true;
924}
925
926void XMLTokenizer::resumeParsing()
927{
928    ASSERT(m_parserPaused);
929
930    m_parserPaused = false;
931
932    // First, execute any pending callbacks
933    while (!m_pendingCallbacks->isEmpty()) {
934        m_pendingCallbacks->callAndRemoveFirstCallback(this);
935
936        // A callback paused the parser
937        if (m_parserPaused)
938            return;
939    }
940
941    // Then, write any pending data
942    SegmentedString rest = m_pendingSrc;
943    m_pendingSrc.clear();
944    write(rest, false);
945
946    // Finally, if finish() has been called and write() didn't result
947    // in any further callbacks being queued, call end()
948    if (m_finishCalled && m_pendingCallbacks->isEmpty())
949        end();
950}
951
952// --------------------------------
953
954bool parseXMLDocumentFragment(const String &string, DocumentFragment *fragment, Element *parent)
955{
956    XMLTokenizer tokenizer(fragment, parent);
957
958    XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
959    tokenizer.setXMLParser(parser);
960
961    XML_SetUserData(parser, (void *)&tokenizer);
962    XML_SetReturnNSTriplet(parser, true);
963
964    XML_SetStartElementHandler(parser, startElementHandler);
965    XML_SetEndElementHandler(parser, endElementHandler);
966    XML_SetCharacterDataHandler(parser, charactersHandler);
967    XML_SetProcessingInstructionHandler(parser, processingInstructionHandler);
968    XML_SetCommentHandler(parser, commentHandler);
969    XML_SetStartCdataSectionHandler(parser, startCdataHandler);
970    XML_SetEndCdataSectionHandler(parser, endCdataHandler);
971
972    CString cString = string.utf8();
973    int result = XML_Parse(parser, cString.data(), cString.length(), true);
974
975    XML_ParserFree(parser);
976    tokenizer.setXMLParser(0);
977
978    return result != XML_STATUS_ERROR;
979}
980
981// --------------------------------
982
983struct AttributeParseState {
984    HashMap<String, String> attributes;
985    bool gotAttributes;
986};
987
988static void attributesStartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts)
989{
990    if (strcmp(name, "attrs") != 0)
991        return;
992
993    if (atts[0] == 0 )
994        return;
995
996    AttributeParseState *state = static_cast<AttributeParseState *>(userData);
997    state->gotAttributes = true;
998
999    for (int i = 0; atts[i]; i += 2) {
1000        DeprecatedString attrName = toQString(atts[i]);
1001        DeprecatedString attrValue = toQString(atts[i+1]);
1002        state->attributes.set(attrName, attrValue);
1003    }
1004}
1005
1006HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1007{
1008    AttributeParseState state;
1009    state.gotAttributes = false;
1010
1011    XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
1012    XML_SetUserData(parser, (void *)&state);
1013    XML_SetReturnNSTriplet(parser, true);
1014
1015    XML_SetStartElementHandler(parser, attributesStartElementHandler);
1016    String input = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />";
1017    CString cString = input.deprecatedString().utf8();
1018    if ( XML_Parse(parser, cString.data(), cString.length(), true) != XML_STATUS_ERROR )
1019        attrsOK = state.gotAttributes;
1020    XML_ParserFree(parser);
1021
1022    return state.attributes;
1023}
1024
1025}
1026