1/*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB.  If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26#include "config.h"
27#include "core/xml/parser/XMLDocumentParser.h"
28
29#include <libxml/catalog.h>
30#include <libxml/parser.h>
31#include <libxml/parserInternals.h>
32#include <libxslt/xslt.h>
33#include "FetchInitiatorTypeNames.h"
34#include "HTMLNames.h"
35#include "RuntimeEnabledFeatures.h"
36#include "XMLNSNames.h"
37#include "bindings/v8/ExceptionState.h"
38#include "bindings/v8/ExceptionStatePlaceholder.h"
39#include "bindings/v8/ScriptController.h"
40#include "bindings/v8/ScriptSourceCode.h"
41#include "core/dom/CDATASection.h"
42#include "core/dom/Comment.h"
43#include "core/dom/Document.h"
44#include "core/dom/DocumentFragment.h"
45#include "core/dom/DocumentType.h"
46#include "core/dom/ProcessingInstruction.h"
47#include "core/dom/ScriptLoader.h"
48#include "core/dom/TransformSource.h"
49#include "core/fetch/ResourceFetcher.h"
50#include "core/fetch/ScriptResource.h"
51#include "core/fetch/TextResourceDecoder.h"
52#include "core/frame/Frame.h"
53#include "core/html/HTMLHtmlElement.h"
54#include "core/html/HTMLTemplateElement.h"
55#include "core/html/parser/HTMLEntityParser.h"
56#include "core/loader/FrameLoader.h"
57#include "core/loader/ImageLoader.h"
58#include "core/frame/UseCounter.h"
59#include "core/xml/XMLTreeViewer.h"
60#include "core/xml/parser/XMLDocumentParserScope.h"
61#include "core/xml/parser/XMLParserInput.h"
62#include "platform/SharedBuffer.h"
63#include "platform/network/ResourceError.h"
64#include "platform/network/ResourceRequest.h"
65#include "platform/network/ResourceResponse.h"
66#include "platform/weborigin/SecurityOrigin.h"
67#include "wtf/StringExtras.h"
68#include "wtf/TemporaryChange.h"
69#include "wtf/Threading.h"
70#include "wtf/Vector.h"
71#include "wtf/unicode/UTF8.h"
72
73using namespace std;
74
75namespace WebCore {
76
77using namespace HTMLNames;
78
79// FIXME: HTMLConstructionSite has a limit of 512, should these match?
80static const unsigned maxXMLTreeDepth = 5000;
81
82static inline String toString(const xmlChar* string, size_t length)
83{
84    return String::fromUTF8(reinterpret_cast<const char*>(string), length);
85}
86
87static inline String toString(const xmlChar* string)
88{
89    return String::fromUTF8(reinterpret_cast<const char*>(string));
90}
91
92static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
93{
94    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
95}
96
97static inline AtomicString toAtomicString(const xmlChar* string)
98{
99    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
100}
101
102static inline bool hasNoStyleInformation(Document* document)
103{
104    if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
105        return false;
106
107    if (!document->frame() || !document->frame()->page())
108        return false;
109
110    if (document->frame()->tree().parent())
111        return false; // This document is not in a top frame
112
113    return true;
114}
115
116class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
117public:
118    PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
119        int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
120        : m_localName(localName)
121        , m_prefix(prefix)
122        , m_uri(uri)
123        , m_namespaceCount(namespaceCount)
124        , m_attributeCount(attributeCount)
125        , m_defaultedCount(defaultedCount)
126    {
127        m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
128        for (int i = 0; i < namespaceCount * 2 ; i++)
129            m_namespaces[i] = xmlStrdup(namespaces[i]);
130        m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
131        for (int i = 0; i < attributeCount; i++) {
132            // Each attribute has 5 elements in the array:
133            // name, prefix, uri, value and an end pointer.
134            for (int j = 0; j < 3; j++)
135                m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
136            int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
137            m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
138            m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
139        }
140    }
141
142    virtual ~PendingStartElementNSCallback()
143    {
144        for (int i = 0; i < m_namespaceCount * 2; i++)
145            xmlFree(m_namespaces[i]);
146        xmlFree(m_namespaces);
147        for (int i = 0; i < m_attributeCount; i++)
148            for (int j = 0; j < 4; j++)
149                xmlFree(m_attributes[i * 5 + j]);
150        xmlFree(m_attributes);
151    }
152
153    virtual void call(XMLDocumentParser* parser) OVERRIDE
154    {
155        parser->startElementNs(m_localName, m_prefix, m_uri,
156                                  m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
157                                  m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
158    }
159
160private:
161    AtomicString m_localName;
162    AtomicString m_prefix;
163    AtomicString m_uri;
164    int m_namespaceCount;
165    xmlChar** m_namespaces;
166    int m_attributeCount;
167    int m_defaultedCount;
168    xmlChar** m_attributes;
169};
170
171class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
172public:
173    virtual void call(XMLDocumentParser* parser) OVERRIDE
174    {
175        parser->endElementNs();
176    }
177};
178
179class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
180public:
181    PendingCharactersCallback(const xmlChar* chars, int length)
182        : m_chars(xmlStrndup(chars, length))
183        , m_length(length)
184    {
185    }
186
187    virtual ~PendingCharactersCallback()
188    {
189        xmlFree(m_chars);
190    }
191
192    virtual void call(XMLDocumentParser* parser) OVERRIDE
193    {
194        parser->characters(m_chars, m_length);
195    }
196
197private:
198    xmlChar* m_chars;
199    int m_length;
200};
201
202class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
203public:
204    PendingProcessingInstructionCallback(const String& target, const String& data)
205        : m_target(target)
206        , m_data(data)
207    {
208    }
209
210    virtual void call(XMLDocumentParser* parser) OVERRIDE
211    {
212        parser->processingInstruction(m_target, m_data);
213    }
214
215private:
216    String m_target;
217    String m_data;
218};
219
220class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
221public:
222    explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
223
224    virtual void call(XMLDocumentParser* parser) OVERRIDE
225    {
226        parser->cdataBlock(m_text);
227    }
228
229private:
230    String m_text;
231};
232
233class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
234public:
235    explicit PendingCommentCallback(const String& text) : m_text(text) { }
236
237    virtual void call(XMLDocumentParser* parser) OVERRIDE
238    {
239        parser->comment(m_text);
240    }
241
242private:
243    String m_text;
244};
245
246class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
247public:
248    PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
249        : m_name(name)
250        , m_externalID(externalID)
251        , m_systemID(systemID)
252    {
253    }
254
255    virtual void call(XMLDocumentParser* parser) OVERRIDE
256    {
257        parser->internalSubset(m_name, m_externalID, m_systemID);
258    }
259
260private:
261    String m_name;
262    String m_externalID;
263    String m_systemID;
264};
265
266class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
267public:
268    PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
269        : m_type(type)
270        , m_message(xmlStrdup(message))
271        , m_lineNumber(lineNumber)
272        , m_columnNumber(columnNumber)
273    {
274    }
275
276    virtual ~PendingErrorCallback()
277    {
278        xmlFree(m_message);
279    }
280
281    virtual void call(XMLDocumentParser* parser) OVERRIDE
282    {
283        parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
284    }
285
286private:
287    XMLErrors::ErrorType m_type;
288    xmlChar* m_message;
289    OrdinalNumber m_lineNumber;
290    OrdinalNumber m_columnNumber;
291};
292
293void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
294{
295    ASSERT(n);
296    ASSERT(m_currentNode);
297    if (n != document())
298        n->ref();
299    m_currentNodeStack.append(m_currentNode);
300    m_currentNode = n;
301    if (m_currentNodeStack.size() > maxXMLTreeDepth)
302        handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
303}
304
305void XMLDocumentParser::popCurrentNode()
306{
307    if (!m_currentNode)
308        return;
309    ASSERT(m_currentNodeStack.size());
310
311    if (m_currentNode != document())
312        m_currentNode->deref();
313
314    m_currentNode = m_currentNodeStack.last();
315    m_currentNodeStack.removeLast();
316}
317
318void XMLDocumentParser::clearCurrentNodeStack()
319{
320    if (m_currentNode && m_currentNode != document())
321        m_currentNode->deref();
322    m_currentNode = 0;
323    m_leafTextNode = 0;
324
325    if (m_currentNodeStack.size()) { // Aborted parsing.
326        for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
327            m_currentNodeStack[i]->deref();
328        if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
329            m_currentNodeStack[0]->deref();
330        m_currentNodeStack.clear();
331    }
332}
333
334void XMLDocumentParser::insert(const SegmentedString&)
335{
336    ASSERT_NOT_REACHED();
337}
338
339void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
340{
341    SegmentedString source(inputSource);
342    if (m_sawXSLTransform || !m_sawFirstElement)
343        m_originalSourceForTransform.append(source);
344
345    if (isStopped() || m_sawXSLTransform)
346        return;
347
348    if (m_parserPaused) {
349        m_pendingSrc.append(source);
350        return;
351    }
352
353    // JavaScript can detach the parser. Make sure this is not released
354    // before the end of this method.
355    RefPtr<XMLDocumentParser> protect(this);
356
357    doWrite(source.toString());
358
359    if (isStopped())
360        return;
361
362    if (document()->frame() && document()->frame()->script().canExecuteScripts(NotAboutToExecuteScript))
363        ImageLoader::dispatchPendingBeforeLoadEvents();
364}
365
366void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
367{
368    m_xmlErrors.handleError(type, formattedMessage, position);
369    if (type != XMLErrors::warning)
370        m_sawError = true;
371    if (type == XMLErrors::fatal)
372        stopParsing();
373}
374
375void XMLDocumentParser::enterText()
376{
377    ASSERT(m_bufferedText.size() == 0);
378    ASSERT(!m_leafTextNode);
379    m_leafTextNode = Text::create(m_currentNode->document(), "");
380    m_currentNode->parserAppendChild(m_leafTextNode.get());
381}
382
383void XMLDocumentParser::exitText()
384{
385    if (isStopped())
386        return;
387
388    if (!m_leafTextNode)
389        return;
390
391    m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
392    m_bufferedText.clear();
393    m_leafTextNode = 0;
394}
395
396void XMLDocumentParser::detach()
397{
398    clearCurrentNodeStack();
399    ScriptableDocumentParser::detach();
400}
401
402void XMLDocumentParser::end()
403{
404    // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
405    // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
406    ASSERT(!m_parsingFragment);
407
408    doEnd();
409
410    // doEnd() call above can detach the parser and null out its document.
411    // In that case, we just bail out.
412    if (isDetached())
413        return;
414
415    // doEnd() could process a script tag, thus pausing parsing.
416    if (m_parserPaused)
417        return;
418
419    if (m_sawError)
420        insertErrorMessageBlock();
421    else {
422        exitText();
423        document()->styleResolverChanged(RecalcStyleImmediately);
424    }
425
426    if (isParsing())
427        prepareToStopParsing();
428    document()->setReadyState(Document::Interactive);
429    clearCurrentNodeStack();
430    document()->finishedParsing();
431}
432
433void XMLDocumentParser::finish()
434{
435    // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
436    // makes sense to call any methods on DocumentParser once it's been stopped.
437    // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
438
439    if (m_parserPaused)
440        m_finishCalled = true;
441    else
442        end();
443}
444
445void XMLDocumentParser::insertErrorMessageBlock()
446{
447    m_xmlErrors.insertErrorMessageBlock();
448}
449
450void XMLDocumentParser::notifyFinished(Resource* unusedResource)
451{
452    ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
453    ASSERT(m_pendingScript->accessCount() > 0);
454
455    ScriptSourceCode sourceCode(m_pendingScript.get());
456    bool errorOccurred = m_pendingScript->errorOccurred();
457    bool wasCanceled = m_pendingScript->wasCanceled();
458
459    m_pendingScript->removeClient(this);
460    m_pendingScript = 0;
461
462    RefPtr<Element> e = m_scriptElement;
463    m_scriptElement = 0;
464
465    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
466    ASSERT(scriptLoader);
467
468    // JavaScript can detach this parser, make sure it's kept alive even if detached.
469    RefPtr<XMLDocumentParser> protect(this);
470
471    if (errorOccurred)
472        scriptLoader->dispatchErrorEvent();
473    else if (!wasCanceled) {
474        if (scriptLoader->executePotentiallyCrossOriginScript(sourceCode))
475            scriptLoader->dispatchLoadEvent();
476    }
477
478    m_scriptElement = 0;
479
480    if (!isDetached() && !m_requestingScript)
481        resumeParsing();
482}
483
484bool XMLDocumentParser::isWaitingForScripts() const
485{
486    return m_pendingScript;
487}
488
489void XMLDocumentParser::pauseParsing()
490{
491    if (m_parsingFragment)
492        return;
493
494    m_parserPaused = true;
495}
496
497bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
498{
499    if (!chunk.length())
500        return true;
501
502    // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
503    // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
504    // For now we have a hack for script/style innerHTML support:
505    if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
506        fragment->parserAppendChild(fragment->document().createTextNode(chunk));
507        return true;
508    }
509
510    RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
511    bool wellFormed = parser->appendFragmentSource(chunk);
512    // Do not call finish().  Current finish() and doEnd() implementations touch the main Document/loader
513    // and can cause crashes in the fragment case.
514    parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
515    return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
516}
517
518static int globalDescriptor = 0;
519static ThreadIdentifier libxmlLoaderThread = 0;
520
521static int matchFunc(const char*)
522{
523    // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
524    // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
525    return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
526}
527
528class OffsetBuffer {
529    WTF_MAKE_FAST_ALLOCATED;
530public:
531    OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
532
533    int readOutBytes(char* outputBuffer, unsigned askedToRead)
534    {
535        unsigned bytesLeft = m_buffer.size() - m_currentOffset;
536        unsigned lenToCopy = min(askedToRead, bytesLeft);
537        if (lenToCopy) {
538            memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
539            m_currentOffset += lenToCopy;
540        }
541        return lenToCopy;
542    }
543
544private:
545    Vector<char> m_buffer;
546    unsigned m_currentOffset;
547};
548
549static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
550{
551    if (!scriptingContentIsAllowed(parserContentPolicy))
552        element->stripScriptingAttributes(attributeVector);
553    element->parserSetAttributes(attributeVector);
554}
555
556static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
557{
558    // Hack around libxml2's lack of encoding overide support by manually
559    // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
560    // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
561    // and switch encodings, causing the parse to fail.
562    if (is8Bit) {
563        xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
564        return;
565    }
566
567    const UChar BOM = 0xFEFF;
568    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
569    xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
570}
571
572static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
573{
574    bool is8Bit = chunk.is8Bit();
575    switchEncoding(ctxt, is8Bit);
576    if (is8Bit)
577        xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
578    else
579        xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
580}
581
582static void finishParsing(xmlParserCtxtPtr ctxt)
583{
584    xmlParseChunk(ctxt, 0, 0, 1);
585}
586
587#define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
588
589static bool isLibxmlDefaultCatalogFile(const String& urlString)
590{
591    // On non-Windows platforms libxml asks for this URL, the
592    // "XML_XML_DEFAULT_CATALOG", on initialization.
593    if (urlString == "file:///etc/xml/catalog")
594        return true;
595
596    // On Windows, libxml computes a URL relative to where its DLL resides.
597    if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
598        return true;
599    return false;
600}
601
602static bool shouldAllowExternalLoad(const KURL& url)
603{
604    String urlString = url.string();
605
606    // This isn't really necessary now that initializeLibXMLIfNecessary
607    // disables catalog support in libxml, but keeping it for defense in depth.
608    if (isLibxmlDefaultCatalogFile(url))
609        return false;
610
611    // The most common DTD.  There isn't much point in hammering www.w3c.org
612    // by requesting this URL for every XHTML document.
613    if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
614        return false;
615
616    // Similarly, there isn't much point in requesting the SVG DTD.
617    if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
618        return false;
619
620    // The libxml doesn't give us a lot of context for deciding whether to
621    // allow this request.  In the worst case, this load could be for an
622    // external entity and the resulting document could simply read the
623    // retrieved content.  If we had more context, we could potentially allow
624    // the parser to load a DTD.  As things stand, we take the conservative
625    // route and allow same-origin requests only.
626    if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
627        XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
628        return false;
629    }
630
631    return true;
632}
633
634static void* openFunc(const char* uri)
635{
636    ASSERT(XMLDocumentParserScope::currentFetcher);
637    ASSERT(currentThread() == libxmlLoaderThread);
638
639    KURL url(KURL(), uri);
640
641    if (!shouldAllowExternalLoad(url))
642        return &globalDescriptor;
643
644    KURL finalURL;
645    Vector<char> data;
646
647    {
648        ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
649        XMLDocumentParserScope scope(0);
650        // FIXME: We should restore the original global error handler as well.
651
652        if (fetcher->frame()) {
653            FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
654            ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
655            if (resource && !resource->errorOccurred()) {
656                resource->resourceBuffer()->moveTo(data);
657                finalURL = resource->response().url();
658            }
659        }
660    }
661
662    // We have to check the URL again after the load to catch redirects.
663    // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
664    if (!shouldAllowExternalLoad(finalURL))
665        return &globalDescriptor;
666
667    return new OffsetBuffer(data);
668}
669
670static int readFunc(void* context, char* buffer, int len)
671{
672    // Do 0-byte reads in case of a null descriptor
673    if (context == &globalDescriptor)
674        return 0;
675
676    OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
677    return data->readOutBytes(buffer, len);
678}
679
680static int writeFunc(void*, const char*, int)
681{
682    // Always just do 0-byte writes
683    return 0;
684}
685
686static int closeFunc(void* context)
687{
688    if (context != &globalDescriptor) {
689        OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
690        delete data;
691    }
692    return 0;
693}
694
695static void errorFunc(void*, const char*, ...)
696{
697    // FIXME: It would be nice to display error messages somewhere.
698}
699
700static void initializeLibXMLIfNecessary()
701{
702    static bool didInit = false;
703    if (didInit)
704        return;
705
706    // We don't want libxml to try and load catalogs.
707    // FIXME: It's not nice to set global settings in libxml, embedders of Blink
708    // could be trying to use libxml themselves.
709    xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
710    xmlInitParser();
711    xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
712    xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
713    libxmlLoaderThread = currentThread();
714    didInit = true;
715}
716
717
718PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
719{
720    initializeLibXMLIfNecessary();
721    xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
722    parser->_private = userData;
723    parser->replaceEntities = true;
724    return adoptRef(new XMLParserContext(parser));
725}
726
727// Chunk should be encoded in UTF-8
728PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
729{
730    initializeLibXMLIfNecessary();
731
732    // appendFragmentSource() checks that the length doesn't overflow an int.
733    xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
734
735    if (!parser)
736        return 0;
737
738    // Copy the sax handler
739    memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
740
741    // Set parser options.
742    // XML_PARSE_NODICT: default dictionary option.
743    // XML_PARSE_NOENT: force entities substitutions.
744    xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
745
746    // Internal initialization
747    parser->sax2 = 1;
748    parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
749    parser->depth = 0;
750    parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
751    parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
752    parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
753    parser->_private = userData;
754
755    return adoptRef(new XMLParserContext(parser));
756}
757
758// --------------------------------
759
760bool XMLDocumentParser::supportsXMLVersion(const String& version)
761{
762    return version == "1.0";
763}
764
765XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
766    : ScriptableDocumentParser(document)
767    , m_view(frameView)
768    , m_context(0)
769    , m_currentNode(document)
770    , m_isCurrentlyParsing8BitChunk(false)
771    , m_sawError(false)
772    , m_sawCSS(false)
773    , m_sawXSLTransform(false)
774    , m_sawFirstElement(false)
775    , m_isXHTMLDocument(false)
776    , m_parserPaused(false)
777    , m_requestingScript(false)
778    , m_finishCalled(false)
779    , m_xmlErrors(document)
780    , m_pendingScript(0)
781    , m_scriptStartPosition(TextPosition::belowRangePosition())
782    , m_parsingFragment(false)
783{
784    // This is XML being used as a document resource.
785    UseCounter::count(*document, UseCounter::XMLDocument);
786}
787
788XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
789    : ScriptableDocumentParser(&fragment->document(), parserContentPolicy)
790    , m_view(0)
791    , m_context(0)
792    , m_currentNode(fragment)
793    , m_isCurrentlyParsing8BitChunk(false)
794    , m_sawError(false)
795    , m_sawCSS(false)
796    , m_sawXSLTransform(false)
797    , m_sawFirstElement(false)
798    , m_isXHTMLDocument(false)
799    , m_parserPaused(false)
800    , m_requestingScript(false)
801    , m_finishCalled(false)
802    , m_xmlErrors(&fragment->document())
803    , m_pendingScript(0)
804    , m_scriptStartPosition(TextPosition::belowRangePosition())
805    , m_parsingFragment(true)
806{
807    fragment->ref();
808
809    // Add namespaces based on the parent node
810    Vector<Element*> elemStack;
811    while (parentElement) {
812        elemStack.append(parentElement);
813
814        ContainerNode* n = parentElement->parentNode();
815        if (!n || !n->isElementNode())
816            break;
817        parentElement = toElement(n);
818    }
819
820    if (elemStack.isEmpty())
821        return;
822
823    for (; !elemStack.isEmpty(); elemStack.removeLast()) {
824        Element* element = elemStack.last();
825        if (element->hasAttributes()) {
826            for (unsigned i = 0; i < element->attributeCount(); i++) {
827                const Attribute* attribute = element->attributeItem(i);
828                if (attribute->localName() == xmlnsAtom)
829                    m_defaultNamespaceURI = attribute->value();
830                else if (attribute->prefix() == xmlnsAtom)
831                    m_prefixToNamespaceMap.set(attribute->localName(), attribute->value());
832            }
833        }
834    }
835
836    // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
837    if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
838        m_defaultNamespaceURI = parentElement->namespaceURI();
839}
840
841XMLParserContext::~XMLParserContext()
842{
843    if (m_context->myDoc)
844        xmlFreeDoc(m_context->myDoc);
845    xmlFreeParserCtxt(m_context);
846}
847
848XMLDocumentParser::~XMLDocumentParser()
849{
850    // The XMLDocumentParser will always be detached before being destroyed.
851    ASSERT(m_currentNodeStack.isEmpty());
852    ASSERT(!m_currentNode);
853
854    // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
855    if (m_pendingScript)
856        m_pendingScript->removeClient(this);
857}
858
859void XMLDocumentParser::doWrite(const String& parseString)
860{
861    ASSERT(!isDetached());
862    if (!m_context)
863        initializeParserContext();
864
865    // Protect the libxml context from deletion during a callback
866    RefPtr<XMLParserContext> context = m_context;
867
868    // libXML throws an error if you try to switch the encoding for an empty string.
869    if (parseString.length()) {
870        // JavaScript may cause the parser to detach during parseChunk
871        // keep this alive until this function is done.
872        RefPtr<XMLDocumentParser> protect(this);
873
874        XMLDocumentParserScope scope(document()->fetcher());
875        TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
876        parseChunk(context->context(), parseString);
877
878        // JavaScript (which may be run under the parseChunk callstack) may
879        // cause the parser to be stopped or detached.
880        if (isStopped())
881            return;
882    }
883
884    // FIXME: Why is this here?  And why is it after we process the passed source?
885    if (document()->sawDecodingError()) {
886        // If the decoder saw an error, report it as fatal (stops parsing)
887        TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
888        handleError(XMLErrors::fatal, "Encoding error", position);
889    }
890}
891
892struct _xmlSAX2Namespace {
893    const xmlChar* prefix;
894    const xmlChar* uri;
895};
896typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
897
898static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
899{
900    xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
901    for (int i = 0; i < nbNamespaces; i++) {
902        AtomicString namespaceQName = xmlnsAtom;
903        AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
904        if (namespaces[i].prefix)
905            namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
906
907        QualifiedName parsedName = anyName;
908        if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
909            return;
910
911        prefixedAttributes.append(Attribute(parsedName, namespaceURI));
912    }
913}
914
915struct _xmlSAX2Attributes {
916    const xmlChar* localname;
917    const xmlChar* prefix;
918    const xmlChar* uri;
919    const xmlChar* value;
920    const xmlChar* end;
921};
922typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
923
924static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
925{
926    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
927    for (int i = 0; i < nbAttributes; i++) {
928        int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
929        AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
930        String attrPrefix = toString(attributes[i].prefix);
931        AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
932        AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
933
934        QualifiedName parsedName = anyName;
935        if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
936            return;
937
938        prefixedAttributes.append(Attribute(parsedName, attrValue));
939    }
940}
941
942void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
943    const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
944{
945    if (isStopped())
946        return;
947
948    if (m_parserPaused) {
949        m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
950            nbAttributes, nbDefaulted, libxmlAttributes)));
951        return;
952    }
953
954    exitText();
955
956    AtomicString adjustedURI = uri;
957    if (m_parsingFragment && adjustedURI.isNull()) {
958        if (!prefix.isNull())
959            adjustedURI = m_prefixToNamespaceMap.get(prefix);
960        else
961            adjustedURI = m_defaultNamespaceURI;
962    }
963
964    bool isFirstElement = !m_sawFirstElement;
965    m_sawFirstElement = true;
966
967    QualifiedName qName(prefix, localName, adjustedURI);
968    RefPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
969    if (!newElement) {
970        stopParsing();
971        return;
972    }
973
974    Vector<Attribute> prefixedAttributes;
975    TrackExceptionState exceptionState;
976    handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
977    if (exceptionState.hadException()) {
978        setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
979        stopParsing();
980        return;
981    }
982
983    handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
984    setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
985    if (exceptionState.hadException()) {
986        stopParsing();
987        return;
988    }
989
990    newElement->beginParsingChildren();
991
992    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
993    if (scriptLoader)
994        m_scriptStartPosition = textPosition();
995
996    m_currentNode->parserAppendChild(newElement.get());
997
998    if (newElement->hasTagName(HTMLNames::templateTag))
999        pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
1000    else
1001        pushCurrentNode(newElement.get());
1002
1003    if (isHTMLHtmlElement(newElement.get()))
1004        toHTMLHtmlElement(newElement)->insertedByParser();
1005
1006    if (!m_parsingFragment && isFirstElement && document()->frame())
1007        document()->frame()->loader().dispatchDocumentElementAvailable();
1008}
1009
1010void XMLDocumentParser::endElementNs()
1011{
1012    if (isStopped())
1013        return;
1014
1015    if (m_parserPaused) {
1016        m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1017        return;
1018    }
1019
1020    // JavaScript can detach the parser.  Make sure this is not released
1021    // before the end of this method.
1022    RefPtr<XMLDocumentParser> protect(this);
1023
1024    exitText();
1025
1026    RefPtr<ContainerNode> n = m_currentNode;
1027    n->finishParsingChildren();
1028
1029    if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1030        popCurrentNode();
1031        n->remove(IGNORE_EXCEPTION);
1032        return;
1033    }
1034
1035    if (!n->isElementNode() || !m_view) {
1036        popCurrentNode();
1037        return;
1038    }
1039
1040    Element* element = toElement(n);
1041
1042    // The element's parent may have already been removed from document.
1043    // Parsing continues in this case, but scripts aren't executed.
1044    if (!element->inDocument()) {
1045        popCurrentNode();
1046        return;
1047    }
1048
1049    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1050    if (!scriptLoader) {
1051        popCurrentNode();
1052        return;
1053    }
1054
1055    // Don't load external scripts for standalone documents (for now).
1056    ASSERT(!m_pendingScript);
1057    m_requestingScript = true;
1058
1059    if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1060        // FIXME: Script execution should be shared between
1061        // the libxml2 and Qt XMLDocumentParser implementations.
1062
1063        if (scriptLoader->readyToBeParserExecuted()) {
1064            scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1065        } else if (scriptLoader->willBeParserExecuted()) {
1066            m_pendingScript = scriptLoader->resource();
1067            m_scriptElement = element;
1068            m_pendingScript->addClient(this);
1069
1070            // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
1071            if (m_pendingScript)
1072                pauseParsing();
1073        } else {
1074            m_scriptElement = 0;
1075        }
1076
1077        // JavaScript may have detached the parser
1078        if (isDetached())
1079            return;
1080    }
1081    m_requestingScript = false;
1082    popCurrentNode();
1083}
1084
1085void XMLDocumentParser::characters(const xmlChar* chars, int length)
1086{
1087    if (isStopped())
1088        return;
1089
1090    if (m_parserPaused) {
1091        m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1092        return;
1093    }
1094
1095    if (!m_leafTextNode)
1096        enterText();
1097    m_bufferedText.append(chars, length);
1098}
1099
1100void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1101{
1102    if (isStopped())
1103        return;
1104
1105#if HAVE(VASPRINTF)
1106    char* formattedMessage;
1107    if (vasprintf(&formattedMessage, message, args) == -1)
1108        return;
1109#else
1110    char formattedMessage[1024];
1111    vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1112#endif
1113
1114    if (m_parserPaused) {
1115        m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1116#if HAVE(VASPRINTF)
1117        free(formattedMessage);
1118#endif
1119        return;
1120    }
1121
1122    handleError(type, formattedMessage, textPosition());
1123
1124#if HAVE(VASPRINTF)
1125    free(formattedMessage);
1126#endif
1127}
1128
1129void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1130{
1131    if (isStopped())
1132        return;
1133
1134    if (m_parserPaused) {
1135        m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data)));
1136        return;
1137    }
1138
1139    exitText();
1140
1141    // ### handle exceptions
1142    TrackExceptionState exceptionState;
1143    RefPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1144    if (exceptionState.hadException())
1145        return;
1146
1147    pi->setCreatedByParser(true);
1148
1149    m_currentNode->parserAppendChild(pi.get());
1150
1151    pi->finishParsingChildren();
1152
1153    if (pi->isCSS())
1154        m_sawCSS = true;
1155
1156    if (!RuntimeEnabledFeatures::xsltEnabled())
1157        return;
1158
1159    m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1160    if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1161        // This behavior is very tricky. We call stopParsing() here because we want to stop processing the document
1162        // until we're ready to apply the transform, but we actually still want to be fed decoded string pieces to
1163        // accumulate in m_originalSourceForTransform. So, we call stopParsing() here and
1164        // check isStopped() in element callbacks.
1165        // FIXME: This contradicts the contract of DocumentParser.
1166        stopParsing();
1167    }
1168}
1169
1170void XMLDocumentParser::cdataBlock(const String& text)
1171{
1172    if (isStopped())
1173        return;
1174
1175    if (m_parserPaused) {
1176        m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1177        return;
1178    }
1179
1180    exitText();
1181
1182    RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text);
1183    m_currentNode->parserAppendChild(newNode.get());
1184}
1185
1186void XMLDocumentParser::comment(const String& text)
1187{
1188    if (isStopped())
1189        return;
1190
1191    if (m_parserPaused) {
1192        m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1193        return;
1194    }
1195
1196    exitText();
1197
1198    RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text);
1199    m_currentNode->parserAppendChild(newNode.get());
1200}
1201
1202enum StandaloneInfo {
1203    StandaloneUnspecified = -2,
1204    NoXMlDeclaration,
1205    StandaloneNo,
1206    StandaloneYes
1207};
1208
1209void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1210{
1211    StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
1212    if (standaloneInfo == NoXMlDeclaration) {
1213        document()->setHasXMLDeclaration(false);
1214        return;
1215    }
1216
1217    if (!version.isNull())
1218        document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1219    if (standalone != StandaloneUnspecified)
1220        document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1221    if (!encoding.isNull())
1222        document()->setXMLEncoding(encoding);
1223    document()->setHasXMLDeclaration(true);
1224}
1225
1226void XMLDocumentParser::endDocument()
1227{
1228    exitText();
1229}
1230
1231void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1232{
1233    if (isStopped())
1234        return;
1235
1236    if (m_parserPaused) {
1237        m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1238        return;
1239    }
1240
1241    if (document())
1242        document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1243}
1244
1245static inline XMLDocumentParser* getParser(void* closure)
1246{
1247    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1248    return static_cast<XMLDocumentParser*>(ctxt->_private);
1249}
1250
1251static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1252{
1253    getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1254}
1255
1256static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1257{
1258    getParser(closure)->endElementNs();
1259}
1260
1261static void charactersHandler(void* closure, const xmlChar* chars, int length)
1262{
1263    getParser(closure)->characters(chars, length);
1264}
1265
1266static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1267{
1268    getParser(closure)->processingInstruction(toString(target), toString(data));
1269}
1270
1271static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1272{
1273    getParser(closure)->cdataBlock(toString(text, length));
1274}
1275
1276static void commentHandler(void* closure, const xmlChar* text)
1277{
1278    getParser(closure)->comment(toString(text));
1279}
1280
1281WTF_ATTRIBUTE_PRINTF(2, 3)
1282static void warningHandler(void* closure, const char* message, ...)
1283{
1284    va_list args;
1285    va_start(args, message);
1286    getParser(closure)->error(XMLErrors::warning, message, args);
1287    va_end(args);
1288}
1289
1290WTF_ATTRIBUTE_PRINTF(2, 3)
1291static void fatalErrorHandler(void* closure, const char* message, ...)
1292{
1293    va_list args;
1294    va_start(args, message);
1295    getParser(closure)->error(XMLErrors::fatal, message, args);
1296    va_end(args);
1297}
1298
1299WTF_ATTRIBUTE_PRINTF(2, 3)
1300static void normalErrorHandler(void* closure, const char* message, ...)
1301{
1302    va_list args;
1303    va_start(args, message);
1304    getParser(closure)->error(XMLErrors::nonFatal, message, args);
1305    va_end(args);
1306}
1307
1308// Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1309// a hack to avoid malloc/free. Using a global variable like this could cause trouble
1310// if libxml implementation details were to change
1311static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1312
1313static xmlEntityPtr sharedXHTMLEntity()
1314{
1315    static xmlEntity entity;
1316    if (!entity.type) {
1317        entity.type = XML_ENTITY_DECL;
1318        entity.orig = sharedXHTMLEntityResult;
1319        entity.content = sharedXHTMLEntityResult;
1320        entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1321    }
1322    return &entity;
1323}
1324
1325static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1326{
1327    const char* originalTarget = target;
1328    WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1329        utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1330    if (conversionResult != WTF::Unicode::conversionOK)
1331        return 0;
1332
1333    // Even though we must pass the length, libxml expects the entity string to be null terminated.
1334    ASSERT(target > originalTarget + 1);
1335    *target = '\0';
1336    return target - originalTarget;
1337}
1338
1339static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1340{
1341    UChar utf16DecodedEntity[4];
1342    size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1343    if (!numberOfCodeUnits)
1344        return 0;
1345
1346    ASSERT(numberOfCodeUnits <= 4);
1347    size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1348        reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1349    if (!entityLengthInUTF8)
1350        return 0;
1351
1352    xmlEntityPtr entity = sharedXHTMLEntity();
1353    entity->length = entityLengthInUTF8;
1354    entity->name = name;
1355    return entity;
1356}
1357
1358static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1359{
1360    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1361    xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1362    if (ent) {
1363        ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1364        return ent;
1365    }
1366
1367    ent = xmlGetDocEntity(ctxt->myDoc, name);
1368    if (!ent && getParser(closure)->isXHTMLDocument()) {
1369        ent = getXHTMLEntity(name);
1370        if (ent)
1371            ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1372    }
1373
1374    return ent;
1375}
1376
1377static void startDocumentHandler(void* closure)
1378{
1379    xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1380    XMLDocumentParser* parser = getParser(closure);
1381    switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1382    parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1383    xmlSAX2StartDocument(closure);
1384}
1385
1386static void endDocumentHandler(void* closure)
1387{
1388    getParser(closure)->endDocument();
1389    xmlSAX2EndDocument(closure);
1390}
1391
1392static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1393{
1394    getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1395    xmlSAX2InternalSubset(closure, name, externalID, systemID);
1396}
1397
1398static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1399{
1400    String extId = toString(externalId);
1401    if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1402        || (extId == "-//W3C//DTD XHTML 1.1//EN")
1403        || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1404        || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1405        || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1406        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1407        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1408        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1409        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
1410        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
1411        getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1412}
1413
1414static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1415{
1416    // nothing to do, but we need this to work around a crasher
1417    // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1418    // http://bugs.webkit.org/show_bug.cgi?id=5792
1419}
1420
1421void XMLDocumentParser::initializeParserContext(const CString& chunk)
1422{
1423    xmlSAXHandler sax;
1424    memset(&sax, 0, sizeof(sax));
1425
1426    sax.error = normalErrorHandler;
1427    sax.fatalError = fatalErrorHandler;
1428    sax.characters = charactersHandler;
1429    sax.processingInstruction = processingInstructionHandler;
1430    sax.cdataBlock = cdataBlockHandler;
1431    sax.comment = commentHandler;
1432    sax.warning = warningHandler;
1433    sax.startElementNs = startElementNsHandler;
1434    sax.endElementNs = endElementNsHandler;
1435    sax.getEntity = getEntityHandler;
1436    sax.startDocument = startDocumentHandler;
1437    sax.endDocument = endDocumentHandler;
1438    sax.internalSubset = internalSubsetHandler;
1439    sax.externalSubset = externalSubsetHandler;
1440    sax.ignorableWhitespace = ignorableWhitespaceHandler;
1441    sax.entityDecl = xmlSAX2EntityDecl;
1442    sax.initialized = XML_SAX2_MAGIC;
1443    DocumentParser::startParsing();
1444    m_sawError = false;
1445    m_sawCSS = false;
1446    m_sawXSLTransform = false;
1447    m_sawFirstElement = false;
1448
1449    XMLDocumentParserScope scope(document()->fetcher());
1450    if (m_parsingFragment)
1451        m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1452    else {
1453        ASSERT(!chunk.data());
1454        m_context = XMLParserContext::createStringParser(&sax, this);
1455    }
1456}
1457
1458void XMLDocumentParser::doEnd()
1459{
1460    if (!isStopped()) {
1461        if (m_context) {
1462            // Tell libxml we're done.
1463            {
1464                XMLDocumentParserScope scope(document()->fetcher());
1465                finishParsing(context());
1466            }
1467
1468            m_context = 0;
1469        }
1470    }
1471
1472    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1473    if (xmlViewerMode) {
1474        XMLTreeViewer xmlTreeViewer(document());
1475        xmlTreeViewer.transformDocumentToTreeView();
1476    } else if (m_sawXSLTransform) {
1477        xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1478        document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1479
1480        document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1481        document()->styleResolverChanged(RecalcStyleImmediately);
1482
1483        // styleResolverChanged() call can detach the parser and null out its document.
1484        // In that case, we just bail out.
1485        if (isDetached())
1486            return;
1487
1488        document()->setParsing(true);
1489        DocumentParser::stopParsing();
1490    }
1491}
1492
1493xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1494{
1495    if (source.isEmpty())
1496        return 0;
1497    // Parse in a single chunk into an xmlDocPtr
1498    // FIXME: Hook up error handlers so that a failure to parse the main document results in
1499    // good error messages.
1500    XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1501    XMLParserInput input(source);
1502    return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1503}
1504
1505OrdinalNumber XMLDocumentParser::lineNumber() const
1506{
1507    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1508}
1509
1510OrdinalNumber XMLDocumentParser::columnNumber() const
1511{
1512    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1513}
1514
1515TextPosition XMLDocumentParser::textPosition() const
1516{
1517    xmlParserCtxtPtr context = this->context();
1518    if (!context)
1519        return TextPosition::minimumPosition();
1520    return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
1521                        OrdinalNumber::fromOneBasedInt(context->input->col));
1522}
1523
1524void XMLDocumentParser::stopParsing()
1525{
1526    DocumentParser::stopParsing();
1527    if (context())
1528        xmlStopParser(context());
1529}
1530
1531void XMLDocumentParser::resumeParsing()
1532{
1533    ASSERT(!isDetached());
1534    ASSERT(m_parserPaused);
1535
1536    m_parserPaused = false;
1537
1538    // First, execute any pending callbacks
1539    while (!m_pendingCallbacks.isEmpty()) {
1540        OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1541        callback->call(this);
1542
1543        // A callback paused the parser
1544        if (m_parserPaused)
1545            return;
1546    }
1547
1548    // Then, write any pending data
1549    SegmentedString rest = m_pendingSrc;
1550    m_pendingSrc.clear();
1551    // There is normally only one string left, so toString() shouldn't copy.
1552    // In any case, the XML parser runs on the main thread and it's OK if
1553    // the passed string has more than one reference.
1554    append(rest.toString().impl());
1555
1556    // Finally, if finish() has been called and write() didn't result
1557    // in any further callbacks being queued, call end()
1558    if (m_finishCalled && m_pendingCallbacks.isEmpty())
1559        end();
1560}
1561
1562bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1563{
1564    ASSERT(!m_context);
1565    ASSERT(m_parsingFragment);
1566
1567    CString chunkAsUtf8 = chunk.utf8();
1568
1569    // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1570    if (chunkAsUtf8.length() > INT_MAX)
1571        return false;
1572
1573    initializeParserContext(chunkAsUtf8);
1574    xmlParseContent(context());
1575    endDocument(); // Close any open text nodes.
1576
1577    // FIXME: If this code is actually needed, it should probably move to finish()
1578    // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1579    // Check if all the chunk has been processed.
1580    long bytesProcessed = xmlByteConsumed(context());
1581    if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1582        // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1583        // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1584        ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1585        return false;
1586    }
1587
1588    // No error if the chunk is well formed or it is not but we have no error.
1589    return context()->wellFormed || !xmlCtxtGetLastError(context());
1590}
1591
1592// --------------------------------
1593
1594struct AttributeParseState {
1595    HashMap<String, String> attributes;
1596    bool gotAttributes;
1597};
1598
1599static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1600    const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1601    int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1602{
1603    if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1604        return;
1605
1606    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1607    AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1608
1609    state->gotAttributes = true;
1610
1611    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1612    for (int i = 0; i < nbAttributes; i++) {
1613        String attrLocalName = toString(attributes[i].localname);
1614        int valueLength = (int) (attributes[i].end - attributes[i].value);
1615        String attrValue = toString(attributes[i].value, valueLength);
1616        String attrPrefix = toString(attributes[i].prefix);
1617        String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1618
1619        state->attributes.set(attrQName, attrValue);
1620    }
1621}
1622
1623HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1624{
1625    AttributeParseState state;
1626    state.gotAttributes = false;
1627
1628    xmlSAXHandler sax;
1629    memset(&sax, 0, sizeof(sax));
1630    sax.startElementNs = attributesStartElementNsHandler;
1631    sax.initialized = XML_SAX2_MAGIC;
1632    RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1633    String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1634    parseChunk(parser->context(), parseString);
1635    finishParsing(parser->context());
1636    attrsOK = state.gotAttributes;
1637    return state.attributes;
1638}
1639
1640} // namespace WebCore
1641