1/*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008, 2014 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB.  If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26#include "config.h"
27#include "core/xml/parser/XMLDocumentParser.h"
28
29#include "bindings/core/v8/ExceptionState.h"
30#include "bindings/core/v8/ExceptionStatePlaceholder.h"
31#include "bindings/core/v8/ScriptController.h"
32#include "bindings/core/v8/ScriptSourceCode.h"
33#include "bindings/core/v8/V8Document.h"
34#include "core/FetchInitiatorTypeNames.h"
35#include "core/HTMLNames.h"
36#include "core/XMLNSNames.h"
37#include "core/dom/CDATASection.h"
38#include "core/dom/Comment.h"
39#include "core/dom/Document.h"
40#include "core/dom/DocumentFragment.h"
41#include "core/dom/DocumentType.h"
42#include "core/dom/ProcessingInstruction.h"
43#include "core/dom/ScriptLoader.h"
44#include "core/dom/TransformSource.h"
45#include "core/fetch/ResourceFetcher.h"
46#include "core/fetch/ScriptResource.h"
47#include "core/frame/LocalFrame.h"
48#include "core/frame/UseCounter.h"
49#include "core/html/HTMLHtmlElement.h"
50#include "core/html/HTMLTemplateElement.h"
51#include "core/html/parser/HTMLEntityParser.h"
52#include "core/html/parser/TextResourceDecoder.h"
53#include "core/loader/FrameLoader.h"
54#include "core/loader/ImageLoader.h"
55#include "core/svg/graphics/SVGImage.h"
56#include "core/xml/parser/SharedBufferReader.h"
57#include "core/xml/parser/XMLDocumentParserScope.h"
58#include "core/xml/parser/XMLParserInput.h"
59#include "platform/RuntimeEnabledFeatures.h"
60#include "platform/SharedBuffer.h"
61#include "platform/TraceEvent.h"
62#include "platform/network/ResourceError.h"
63#include "platform/network/ResourceRequest.h"
64#include "platform/network/ResourceResponse.h"
65#include "platform/weborigin/SecurityOrigin.h"
66#include "wtf/StringExtras.h"
67#include "wtf/TemporaryChange.h"
68#include "wtf/Threading.h"
69#include "wtf/Vector.h"
70#include "wtf/unicode/UTF8.h"
71#include <libxml/catalog.h>
72#include <libxml/parser.h>
73#include <libxml/parserInternals.h>
74#include <libxslt/xslt.h>
75
76namespace blink {
77
78using namespace HTMLNames;
79
80// FIXME: HTMLConstructionSite has a limit of 512, should these match?
81static const unsigned maxXMLTreeDepth = 5000;
82
83static inline String toString(const xmlChar* string, size_t length)
84{
85    return String::fromUTF8(reinterpret_cast<const char*>(string), length);
86}
87
88static inline String toString(const xmlChar* string)
89{
90    return String::fromUTF8(reinterpret_cast<const char*>(string));
91}
92
93static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
94{
95    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
96}
97
98static inline AtomicString toAtomicString(const xmlChar* string)
99{
100    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
101}
102
103static inline bool hasNoStyleInformation(Document* document)
104{
105    if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
106        return false;
107
108    if (!document->frame() || !document->frame()->page())
109        return false;
110
111    if (document->frame()->tree().parent())
112        return false; // This document is not in a top frame
113
114    if (SVGImage::isInSVGImage(document))
115        return false;
116
117    return true;
118}
119
120class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
121public:
122    PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
123        int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
124        : m_localName(localName)
125        , m_prefix(prefix)
126        , m_uri(uri)
127        , m_namespaceCount(namespaceCount)
128        , m_attributeCount(attributeCount)
129        , m_defaultedCount(defaultedCount)
130    {
131        m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
132        for (int i = 0; i < namespaceCount * 2 ; ++i)
133            m_namespaces[i] = xmlStrdup(namespaces[i]);
134        m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
135        for (int i = 0; i < attributeCount; ++i) {
136            // Each attribute has 5 elements in the array:
137            // name, prefix, uri, value and an end pointer.
138            for (int j = 0; j < 3; ++j)
139                m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
140            int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
141            m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
142            m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
143        }
144    }
145
146    virtual ~PendingStartElementNSCallback()
147    {
148        for (int i = 0; i < m_namespaceCount * 2; ++i)
149            xmlFree(m_namespaces[i]);
150        xmlFree(m_namespaces);
151        for (int i = 0; i < m_attributeCount; ++i)
152            for (int j = 0; j < 4; ++j)
153                xmlFree(m_attributes[i * 5 + j]);
154        xmlFree(m_attributes);
155    }
156
157    virtual void call(XMLDocumentParser* parser) OVERRIDE
158    {
159        parser->startElementNs(m_localName, m_prefix, m_uri,
160            m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
161            m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
162    }
163
164private:
165    AtomicString m_localName;
166    AtomicString m_prefix;
167    AtomicString m_uri;
168    int m_namespaceCount;
169    xmlChar** m_namespaces;
170    int m_attributeCount;
171    int m_defaultedCount;
172    xmlChar** m_attributes;
173};
174
175class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
176public:
177    virtual void call(XMLDocumentParser* parser) OVERRIDE
178    {
179        parser->endElementNs();
180    }
181};
182
183class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
184public:
185    PendingCharactersCallback(const xmlChar* chars, int length)
186        : m_chars(xmlStrndup(chars, length))
187        , m_length(length)
188    {
189    }
190
191    virtual ~PendingCharactersCallback()
192    {
193        xmlFree(m_chars);
194    }
195
196    virtual void call(XMLDocumentParser* parser) OVERRIDE
197    {
198        parser->characters(m_chars, m_length);
199    }
200
201private:
202    xmlChar* m_chars;
203    int m_length;
204};
205
206class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
207public:
208    PendingProcessingInstructionCallback(const String& target, const String& data)
209        : m_target(target)
210        , m_data(data)
211    {
212    }
213
214    virtual void call(XMLDocumentParser* parser) OVERRIDE
215    {
216        parser->processingInstruction(m_target, m_data);
217    }
218
219private:
220    String m_target;
221    String m_data;
222};
223
224class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
225public:
226    explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
227
228    virtual void call(XMLDocumentParser* parser) OVERRIDE
229    {
230        parser->cdataBlock(m_text);
231    }
232
233private:
234    String m_text;
235};
236
237class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
238public:
239    explicit PendingCommentCallback(const String& text) : m_text(text) { }
240
241    virtual void call(XMLDocumentParser* parser) OVERRIDE
242    {
243        parser->comment(m_text);
244    }
245
246private:
247    String m_text;
248};
249
250class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
251public:
252    PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
253        : m_name(name)
254        , m_externalID(externalID)
255        , m_systemID(systemID)
256    {
257    }
258
259    virtual void call(XMLDocumentParser* parser) OVERRIDE
260    {
261        parser->internalSubset(m_name, m_externalID, m_systemID);
262    }
263
264private:
265    String m_name;
266    String m_externalID;
267    String m_systemID;
268};
269
270class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
271public:
272    PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
273        : m_type(type)
274        , m_message(xmlStrdup(message))
275        , m_lineNumber(lineNumber)
276        , m_columnNumber(columnNumber)
277    {
278    }
279
280    virtual ~PendingErrorCallback()
281    {
282        xmlFree(m_message);
283    }
284
285    virtual void call(XMLDocumentParser* parser) OVERRIDE
286    {
287        parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
288    }
289
290private:
291    XMLErrors::ErrorType m_type;
292    xmlChar* m_message;
293    OrdinalNumber m_lineNumber;
294    OrdinalNumber m_columnNumber;
295};
296
297void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
298{
299    ASSERT(n);
300    ASSERT(m_currentNode);
301#if !ENABLE(OILPAN)
302    if (n != document())
303        n->ref();
304#endif
305    m_currentNodeStack.append(m_currentNode);
306    m_currentNode = n;
307    if (m_currentNodeStack.size() > maxXMLTreeDepth)
308        handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
309}
310
311void XMLDocumentParser::popCurrentNode()
312{
313    if (!m_currentNode)
314        return;
315    ASSERT(m_currentNodeStack.size());
316#if !ENABLE(OILPAN)
317    if (m_currentNode != document())
318        m_currentNode->deref();
319#endif
320    m_currentNode = m_currentNodeStack.last();
321    m_currentNodeStack.removeLast();
322}
323
324void XMLDocumentParser::clearCurrentNodeStack()
325{
326#if !ENABLE(OILPAN)
327    if (m_currentNode && m_currentNode != document())
328        m_currentNode->deref();
329#endif
330    m_currentNode = nullptr;
331    m_leafTextNode = nullptr;
332
333    if (m_currentNodeStack.size()) { // Aborted parsing.
334#if !ENABLE(OILPAN)
335        for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
336            m_currentNodeStack[i]->deref();
337        if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
338            m_currentNodeStack[0]->deref();
339#endif
340        m_currentNodeStack.clear();
341    }
342}
343
344void XMLDocumentParser::insert(const SegmentedString&)
345{
346    ASSERT_NOT_REACHED();
347}
348
349void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
350{
351    SegmentedString source(inputSource);
352    if (m_sawXSLTransform || !m_sawFirstElement)
353        m_originalSourceForTransform.append(source);
354
355    if (isStopped() || m_sawXSLTransform)
356        return;
357
358    if (m_parserPaused) {
359        m_pendingSrc.append(source);
360        return;
361    }
362
363    // JavaScript can detach the parser. Make sure this is not released
364    // before the end of this method.
365    RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
366
367    doWrite(source.toString());
368}
369
370void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
371{
372    m_xmlErrors.handleError(type, formattedMessage, position);
373    if (type != XMLErrors::ErrorTypeWarning)
374        m_sawError = true;
375    if (type == XMLErrors::ErrorTypeFatal)
376        stopParsing();
377}
378
379void XMLDocumentParser::enterText()
380{
381    ASSERT(m_bufferedText.size() == 0);
382    ASSERT(!m_leafTextNode);
383    m_leafTextNode = Text::create(m_currentNode->document(), "");
384    m_currentNode->parserAppendChild(m_leafTextNode.get());
385}
386
387void XMLDocumentParser::exitText()
388{
389    if (isStopped())
390        return;
391
392    if (!m_leafTextNode)
393        return;
394
395    m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
396    m_bufferedText.clear();
397    m_leafTextNode = nullptr;
398}
399
400void XMLDocumentParser::detach()
401{
402    clearCurrentNodeStack();
403    ScriptableDocumentParser::detach();
404}
405
406void XMLDocumentParser::end()
407{
408    TRACE_EVENT0("blink", "XMLDocumentParser::end");
409    // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
410    // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
411    ASSERT(!m_parsingFragment);
412
413    doEnd();
414
415    // doEnd() call above can detach the parser and null out its document.
416    // In that case, we just bail out.
417    if (isDetached())
418        return;
419
420    // doEnd() could process a script tag, thus pausing parsing.
421    if (m_parserPaused)
422        return;
423
424    if (m_sawError) {
425        insertErrorMessageBlock();
426    } else {
427        exitText();
428        document()->styleResolverChanged();
429    }
430
431    if (isParsing())
432        prepareToStopParsing();
433    document()->setReadyState(Document::Interactive);
434    clearCurrentNodeStack();
435    document()->finishedParsing();
436}
437
438void XMLDocumentParser::finish()
439{
440    // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
441    // makes sense to call any methods on DocumentParser once it's been stopped.
442    // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
443
444    // flush may ending up executing arbitrary script, and possibly detach the parser.
445    RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
446    flush();
447    if (isDetached())
448        return;
449
450    if (m_parserPaused)
451        m_finishCalled = true;
452    else
453        end();
454}
455
456void XMLDocumentParser::insertErrorMessageBlock()
457{
458    m_xmlErrors.insertErrorMessageBlock();
459}
460
461void XMLDocumentParser::notifyFinished(Resource* unusedResource)
462{
463    ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
464
465    ScriptSourceCode sourceCode(m_pendingScript.get());
466    bool errorOccurred = m_pendingScript->errorOccurred();
467    bool wasCanceled = m_pendingScript->wasCanceled();
468
469    m_pendingScript->removeClient(this);
470    m_pendingScript = 0;
471
472    RefPtrWillBeRawPtr<Element> e = m_scriptElement;
473    m_scriptElement = nullptr;
474
475    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
476    ASSERT(scriptLoader);
477
478    // JavaScript can detach this parser, make sure it's kept alive even if
479    // detached.
480    RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
481
482    if (errorOccurred) {
483        scriptLoader->dispatchErrorEvent();
484    } else if (!wasCanceled) {
485        scriptLoader->executeScript(sourceCode);
486        scriptLoader->dispatchLoadEvent();
487    }
488
489    m_scriptElement = nullptr;
490
491    if (!isDetached() && !m_requestingScript)
492        resumeParsing();
493}
494
495bool XMLDocumentParser::isWaitingForScripts() const
496{
497    return m_pendingScript;
498}
499
500void XMLDocumentParser::pauseParsing()
501{
502    if (!m_parsingFragment)
503        m_parserPaused = true;
504}
505
506bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
507{
508    if (!chunk.length())
509        return true;
510
511    // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
512    // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
513    // For now we have a hack for script/style innerHTML support:
514    if (contextElement && (contextElement->hasLocalName(scriptTag.localName()) || contextElement->hasLocalName(styleTag.localName()))) {
515        fragment->parserAppendChild(fragment->document().createTextNode(chunk));
516        return true;
517    }
518
519    RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
520    bool wellFormed = parser->appendFragmentSource(chunk);
521
522    // Do not call finish(). Current finish() and doEnd() implementations touch
523    // the main Document/loader and can cause crashes in the fragment case.
524
525    // Allows ~DocumentParser to assert it was detached before destruction.
526    parser->detach();
527    // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
528    return wellFormed;
529}
530
531static int globalDescriptor = 0;
532static ThreadIdentifier libxmlLoaderThread = 0;
533
534static int matchFunc(const char*)
535{
536    // Only match loads initiated due to uses of libxml2 from within
537    // XMLDocumentParser to avoid interfering with client applications that also
538    // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
539    return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
540}
541
542static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
543{
544    if (!scriptingContentIsAllowed(parserContentPolicy))
545        element->stripScriptingAttributes(attributeVector);
546    element->parserSetAttributes(attributeVector);
547}
548
549static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
550{
551    // Hack around libxml2's lack of encoding overide support by manually
552    // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
553    // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
554    // switch encodings, causing the parse to fail.
555    if (is8Bit) {
556        xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
557        return;
558    }
559
560    const UChar BOM = 0xFEFF;
561    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
562    xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
563}
564
565static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
566{
567    bool is8Bit = chunk.is8Bit();
568    switchEncoding(ctxt, is8Bit);
569    if (is8Bit)
570        xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
571    else
572        xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
573}
574
575static void finishParsing(xmlParserCtxtPtr ctxt)
576{
577    xmlParseChunk(ctxt, 0, 0, 1);
578}
579
580#define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
581
582static bool isLibxmlDefaultCatalogFile(const String& urlString)
583{
584    // On non-Windows platforms libxml asks for this URL, the
585    // "XML_XML_DEFAULT_CATALOG", on initialization.
586    if (urlString == "file:///etc/xml/catalog")
587        return true;
588
589    // On Windows, libxml computes a URL relative to where its DLL resides.
590    if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
591        return true;
592    return false;
593}
594
595static bool shouldAllowExternalLoad(const KURL& url)
596{
597    String urlString = url.string();
598
599    // This isn't really necessary now that initializeLibXMLIfNecessary
600    // disables catalog support in libxml, but keeping it for defense in depth.
601    if (isLibxmlDefaultCatalogFile(url))
602        return false;
603
604    // The most common DTD. There isn't much point in hammering www.w3c.org by
605    // requesting this URL for every XHTML document.
606    if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
607        return false;
608
609    // Similarly, there isn't much point in requesting the SVG DTD.
610    if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
611        return false;
612
613    // The libxml doesn't give us a lot of context for deciding whether to allow
614    // this request. In the worst case, this load could be for an external
615    // entity and the resulting document could simply read the retrieved
616    // content. If we had more context, we could potentially allow the parser to
617    // load a DTD. As things stand, we take the conservative route and allow
618    // same-origin requests only.
619    if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
620        XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
621        return false;
622    }
623
624    return true;
625}
626
627static void* openFunc(const char* uri)
628{
629    ASSERT(XMLDocumentParserScope::currentFetcher);
630    ASSERT(currentThread() == libxmlLoaderThread);
631
632    KURL url(KURL(), uri);
633
634    if (!shouldAllowExternalLoad(url))
635        return &globalDescriptor;
636
637    KURL finalURL;
638    RefPtr<SharedBuffer> data;
639
640    {
641        ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
642        XMLDocumentParserScope scope(0);
643        // FIXME: We should restore the original global error handler as well.
644
645        if (fetcher->frame()) {
646            FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
647            ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
648            if (resource && !resource->errorOccurred()) {
649                data = resource->resourceBuffer();
650                finalURL = resource->response().url();
651            }
652        }
653    }
654
655    // We have to check the URL again after the load to catch redirects.
656    // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
657    if (!shouldAllowExternalLoad(finalURL))
658        return &globalDescriptor;
659
660    UseCounter::count(XMLDocumentParserScope::currentFetcher->document(), UseCounter::XMLExternalResourceLoad);
661
662    return new SharedBufferReader(data);
663}
664
665static int readFunc(void* context, char* buffer, int len)
666{
667    // Do 0-byte reads in case of a null descriptor
668    if (context == &globalDescriptor)
669        return 0;
670
671    SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
672    return data->readData(buffer, len);
673}
674
675static int writeFunc(void*, const char*, int)
676{
677    // Always just do 0-byte writes
678    return 0;
679}
680
681static int closeFunc(void* context)
682{
683    if (context != &globalDescriptor) {
684        SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
685        delete data;
686    }
687    return 0;
688}
689
690static void errorFunc(void*, const char*, ...)
691{
692    // FIXME: It would be nice to display error messages somewhere.
693}
694
695static void initializeLibXMLIfNecessary()
696{
697    static bool didInit = false;
698    if (didInit)
699        return;
700
701    // We don't want libxml to try and load catalogs.
702    // FIXME: It's not nice to set global settings in libxml, embedders of Blink
703    // could be trying to use libxml themselves.
704    xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
705    xmlInitParser();
706    xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
707    xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
708    libxmlLoaderThread = currentThread();
709    didInit = true;
710}
711
712
713PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
714{
715    initializeLibXMLIfNecessary();
716    xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
717    parser->_private = userData;
718    parser->replaceEntities = true;
719    return adoptRef(new XMLParserContext(parser));
720}
721
722// Chunk should be encoded in UTF-8
723PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
724{
725    initializeLibXMLIfNecessary();
726
727    // appendFragmentSource() checks that the length doesn't overflow an int.
728    xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
729
730    if (!parser)
731        return nullptr;
732
733    // Copy the sax handler
734    memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
735
736    // Set parser options.
737    // XML_PARSE_NODICT: default dictionary option.
738    // XML_PARSE_NOENT: force entities substitutions.
739    xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
740
741    // Internal initialization
742    parser->sax2 = 1;
743    parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
744    parser->depth = 0;
745    parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
746    parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
747    parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
748    parser->_private = userData;
749
750    return adoptRef(new XMLParserContext(parser));
751}
752
753// --------------------------------
754
755bool XMLDocumentParser::supportsXMLVersion(const String& version)
756{
757    return version == "1.0";
758}
759
760XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
761    : ScriptableDocumentParser(document)
762    , m_hasView(frameView)
763    , m_context(nullptr)
764    , m_currentNode(&document)
765    , m_isCurrentlyParsing8BitChunk(false)
766    , m_sawError(false)
767    , m_sawCSS(false)
768    , m_sawXSLTransform(false)
769    , m_sawFirstElement(false)
770    , m_isXHTMLDocument(false)
771    , m_parserPaused(false)
772    , m_requestingScript(false)
773    , m_finishCalled(false)
774    , m_xmlErrors(&document)
775    , m_pendingScript(0)
776    , m_scriptStartPosition(TextPosition::belowRangePosition())
777    , m_parsingFragment(false)
778{
779    // This is XML being used as a document resource.
780    if (frameView && document.isXMLDocument())
781        UseCounter::count(document, UseCounter::XMLDocument);
782}
783
784XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
785    : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
786    , m_hasView(false)
787    , m_context(nullptr)
788    , m_currentNode(fragment)
789    , m_isCurrentlyParsing8BitChunk(false)
790    , m_sawError(false)
791    , m_sawCSS(false)
792    , m_sawXSLTransform(false)
793    , m_sawFirstElement(false)
794    , m_isXHTMLDocument(false)
795    , m_parserPaused(false)
796    , m_requestingScript(false)
797    , m_finishCalled(false)
798    , m_xmlErrors(&fragment->document())
799    , m_pendingScript(0)
800    , m_scriptStartPosition(TextPosition::belowRangePosition())
801    , m_parsingFragment(true)
802{
803#if !ENABLE(OILPAN)
804    fragment->ref();
805#endif
806
807    // Add namespaces based on the parent node
808    WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
809    while (parentElement) {
810        elemStack.append(parentElement);
811
812        Element* grandParentElement = parentElement->parentElement();
813        if (!grandParentElement)
814            break;
815        parentElement = grandParentElement;
816    }
817
818    if (elemStack.isEmpty())
819        return;
820
821    for (; !elemStack.isEmpty(); elemStack.removeLast()) {
822        Element* element = elemStack.last();
823        AttributeCollection attributes = element->attributes();
824        AttributeCollection::iterator end = attributes.end();
825        for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
826            if (it->localName() == xmlnsAtom)
827                m_defaultNamespaceURI = it->value();
828            else if (it->prefix() == xmlnsAtom)
829                m_prefixToNamespaceMap.set(it->localName(), it->value());
830        }
831    }
832
833    // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
834    if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
835        m_defaultNamespaceURI = parentElement->namespaceURI();
836}
837
838XMLParserContext::~XMLParserContext()
839{
840    if (m_context->myDoc)
841        xmlFreeDoc(m_context->myDoc);
842    xmlFreeParserCtxt(m_context);
843}
844
845XMLDocumentParser::~XMLDocumentParser()
846{
847#if !ENABLE(OILPAN)
848    // The XMLDocumentParser will always be detached before being destroyed.
849    ASSERT(m_currentNodeStack.isEmpty());
850    ASSERT(!m_currentNode);
851#endif
852
853    // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
854    if (m_pendingScript)
855        m_pendingScript->removeClient(this);
856}
857
858void XMLDocumentParser::trace(Visitor* visitor)
859{
860    visitor->trace(m_currentNode);
861#if ENABLE(OILPAN)
862    visitor->trace(m_currentNodeStack);
863#endif
864    visitor->trace(m_leafTextNode);
865    visitor->trace(m_xmlErrors);
866    visitor->trace(m_scriptElement);
867    ScriptableDocumentParser::trace(visitor);
868}
869
870void XMLDocumentParser::doWrite(const String& parseString)
871{
872    TRACE_EVENT0("blink", "XMLDocumentParser::doWrite");
873    ASSERT(!isDetached());
874    if (!m_context)
875        initializeParserContext();
876
877    // Protect the libxml context from deletion during a callback
878    RefPtr<XMLParserContext> context = m_context;
879
880    // libXML throws an error if you try to switch the encoding for an empty
881    // string.
882    if (parseString.length()) {
883        // JavaScript may cause the parser to detach during parseChunk
884        // keep this alive until this function is done.
885        RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
886
887        XMLDocumentParserScope scope(document()->fetcher());
888        TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
889        parseChunk(context->context(), parseString);
890
891        // JavaScript (which may be run under the parseChunk callstack) may
892        // cause the parser to be stopped or detached.
893        if (isStopped())
894            return;
895    }
896
897    // FIXME: Why is this here? And why is it after we process the passed
898    // source?
899    if (document()->sawDecodingError()) {
900        // If the decoder saw an error, report it as fatal (stops parsing)
901        TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
902        handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
903    }
904}
905
906struct xmlSAX2Namespace {
907    const xmlChar* prefix;
908    const xmlChar* uri;
909};
910
911static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
912{
913    xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
914    for (int i = 0; i < nbNamespaces; ++i) {
915        AtomicString namespaceQName = xmlnsAtom;
916        AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
917        if (namespaces[i].prefix)
918            namespaceQName = WTF::xmlnsWithColon + namespaces[i].prefix;
919
920        QualifiedName parsedName = anyName;
921        if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
922            return;
923
924        prefixedAttributes.append(Attribute(parsedName, namespaceURI));
925    }
926}
927
928struct xmlSAX2Attributes {
929    const xmlChar* localname;
930    const xmlChar* prefix;
931    const xmlChar* uri;
932    const xmlChar* value;
933    const xmlChar* end;
934};
935
936static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
937{
938    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
939    for (int i = 0; i < nbAttributes; ++i) {
940        int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
941        AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
942        String attrPrefix = toString(attributes[i].prefix);
943        AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
944        AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
945
946        QualifiedName parsedName = anyName;
947        if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
948            return;
949
950        prefixedAttributes.append(Attribute(parsedName, attrValue));
951    }
952}
953
954void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
955    const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
956{
957    if (isStopped())
958        return;
959
960    if (m_parserPaused) {
961        m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
962            nbAttributes, nbDefaulted, libxmlAttributes)));
963        return;
964    }
965
966    exitText();
967
968    AtomicString adjustedURI = uri;
969    if (m_parsingFragment && adjustedURI.isNull()) {
970        if (!prefix.isNull())
971            adjustedURI = m_prefixToNamespaceMap.get(prefix);
972        else
973            adjustedURI = m_defaultNamespaceURI;
974    }
975
976    bool isFirstElement = !m_sawFirstElement;
977    m_sawFirstElement = true;
978
979    QualifiedName qName(prefix, localName, adjustedURI);
980    RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
981    if (!newElement) {
982        stopParsing();
983        return;
984    }
985
986    Vector<Attribute> prefixedAttributes;
987    TrackExceptionState exceptionState;
988    handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
989    if (exceptionState.hadException()) {
990        setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
991        stopParsing();
992        return;
993    }
994
995    handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
996    setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
997    if (exceptionState.hadException()) {
998        stopParsing();
999        return;
1000    }
1001
1002    newElement->beginParsingChildren();
1003
1004    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
1005    if (scriptLoader)
1006        m_scriptStartPosition = textPosition();
1007
1008    m_currentNode->parserAppendChild(newElement.get());
1009
1010    // Event handlers may synchronously trigger removal of the
1011    // document and cancellation of this parser.
1012    if (isStopped()) {
1013        stopParsing();
1014        return;
1015    }
1016
1017    if (isHTMLTemplateElement(*newElement))
1018        pushCurrentNode(toHTMLTemplateElement(*newElement).content());
1019    else
1020        pushCurrentNode(newElement.get());
1021
1022    if (isHTMLHtmlElement(*newElement))
1023        toHTMLHtmlElement(*newElement).insertedByParser();
1024
1025    if (!m_parsingFragment && isFirstElement && document()->frame())
1026        document()->frame()->loader().dispatchDocumentElementAvailable();
1027}
1028
1029void XMLDocumentParser::endElementNs()
1030{
1031    if (isStopped())
1032        return;
1033
1034    if (m_parserPaused) {
1035        m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1036        return;
1037    }
1038
1039    // JavaScript can detach the parser. Make sure this is not released before
1040    // the end of this method.
1041    RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
1042
1043    exitText();
1044
1045    RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
1046    if (m_currentNode->isElementNode())
1047        toElement(n.get())->finishParsingChildren();
1048
1049    if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1050        popCurrentNode();
1051        n->remove(IGNORE_EXCEPTION);
1052        return;
1053    }
1054
1055    if (!n->isElementNode() || !m_hasView) {
1056        popCurrentNode();
1057        return;
1058    }
1059
1060    Element* element = toElement(n);
1061
1062    // The element's parent may have already been removed from document.
1063    // Parsing continues in this case, but scripts aren't executed.
1064    if (!element->inDocument()) {
1065        popCurrentNode();
1066        return;
1067    }
1068
1069    ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1070    if (!scriptLoader) {
1071        popCurrentNode();
1072        return;
1073    }
1074
1075    // Don't load external scripts for standalone documents (for now).
1076    ASSERT(!m_pendingScript);
1077    m_requestingScript = true;
1078
1079    if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1080        // FIXME: Script execution should be shared between
1081        // the libxml2 and Qt XMLDocumentParser implementations.
1082
1083        if (scriptLoader->readyToBeParserExecuted()) {
1084            scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1085        } else if (scriptLoader->willBeParserExecuted()) {
1086            m_pendingScript = scriptLoader->resource();
1087            m_scriptElement = element;
1088            m_pendingScript->addClient(this);
1089
1090            // m_pendingScript will be 0 if script was already loaded and
1091            // addClient() executed it.
1092            if (m_pendingScript)
1093                pauseParsing();
1094        } else {
1095            m_scriptElement = nullptr;
1096        }
1097
1098        // JavaScript may have detached the parser
1099        if (isDetached())
1100            return;
1101    }
1102    m_requestingScript = false;
1103    popCurrentNode();
1104}
1105
1106void XMLDocumentParser::characters(const xmlChar* chars, int length)
1107{
1108    if (isStopped())
1109        return;
1110
1111    if (m_parserPaused) {
1112        m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1113        return;
1114    }
1115
1116    if (!m_leafTextNode)
1117        enterText();
1118    m_bufferedText.append(chars, length);
1119}
1120
1121void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1122{
1123    if (isStopped())
1124        return;
1125
1126    char formattedMessage[1024];
1127    vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1128
1129    if (m_parserPaused) {
1130        m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1131        return;
1132    }
1133
1134    handleError(type, formattedMessage, textPosition());
1135}
1136
1137void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1138{
1139    if (isStopped())
1140        return;
1141
1142    if (m_parserPaused) {
1143        m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
1144        return;
1145    }
1146
1147    exitText();
1148
1149    // ### handle exceptions
1150    TrackExceptionState exceptionState;
1151    RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1152    if (exceptionState.hadException())
1153        return;
1154
1155    pi->setCreatedByParser(true);
1156
1157    m_currentNode->parserAppendChild(pi.get());
1158
1159    pi->setCreatedByParser(false);
1160
1161    if (pi->isCSS())
1162        m_sawCSS = true;
1163
1164    if (!RuntimeEnabledFeatures::xsltEnabled())
1165        return;
1166
1167    m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1168    if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1169        // This behavior is very tricky. We call stopParsing() here because we
1170        // want to stop processing the document until we're ready to apply the
1171        // transform, but we actually still want to be fed decoded string pieces
1172        // to accumulate in m_originalSourceForTransform. So, we call
1173        // stopParsing() here and check isStopped() in element callbacks.
1174        // FIXME: This contradicts the contract of DocumentParser.
1175        stopParsing();
1176    }
1177}
1178
1179void XMLDocumentParser::cdataBlock(const String& text)
1180{
1181    if (isStopped())
1182        return;
1183
1184    if (m_parserPaused) {
1185        m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1186        return;
1187    }
1188
1189    exitText();
1190
1191    m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
1192}
1193
1194void XMLDocumentParser::comment(const String& text)
1195{
1196    if (isStopped())
1197        return;
1198
1199    if (m_parserPaused) {
1200        m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1201        return;
1202    }
1203
1204    exitText();
1205
1206    m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
1207}
1208
1209enum StandaloneInfo {
1210    StandaloneUnspecified = -2,
1211    NoXMlDeclaration,
1212    StandaloneNo,
1213    StandaloneYes
1214};
1215
1216void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1217{
1218    StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
1219    if (standaloneInfo == NoXMlDeclaration) {
1220        document()->setHasXMLDeclaration(false);
1221        return;
1222    }
1223
1224    if (!version.isNull())
1225        document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1226    if (standalone != StandaloneUnspecified)
1227        document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1228    if (!encoding.isNull())
1229        document()->setXMLEncoding(encoding);
1230    document()->setHasXMLDeclaration(true);
1231}
1232
1233void XMLDocumentParser::endDocument()
1234{
1235    exitText();
1236}
1237
1238void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1239{
1240    if (isStopped())
1241        return;
1242
1243    if (m_parserPaused) {
1244        m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1245        return;
1246    }
1247
1248    if (document())
1249        document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1250}
1251
1252static inline XMLDocumentParser* getParser(void* closure)
1253{
1254    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1255    return static_cast<XMLDocumentParser*>(ctxt->_private);
1256}
1257
1258static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1259{
1260    getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1261}
1262
1263static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1264{
1265    getParser(closure)->endElementNs();
1266}
1267
1268static void charactersHandler(void* closure, const xmlChar* chars, int length)
1269{
1270    getParser(closure)->characters(chars, length);
1271}
1272
1273static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1274{
1275    getParser(closure)->processingInstruction(toString(target), toString(data));
1276}
1277
1278static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1279{
1280    getParser(closure)->cdataBlock(toString(text, length));
1281}
1282
1283static void commentHandler(void* closure, const xmlChar* text)
1284{
1285    getParser(closure)->comment(toString(text));
1286}
1287
1288WTF_ATTRIBUTE_PRINTF(2, 3)
1289static void warningHandler(void* closure, const char* message, ...)
1290{
1291    va_list args;
1292    va_start(args, message);
1293    getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
1294    va_end(args);
1295}
1296
1297WTF_ATTRIBUTE_PRINTF(2, 3)
1298static void fatalErrorHandler(void* closure, const char* message, ...)
1299{
1300    va_list args;
1301    va_start(args, message);
1302    getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
1303    va_end(args);
1304}
1305
1306WTF_ATTRIBUTE_PRINTF(2, 3)
1307static void normalErrorHandler(void* closure, const char* message, ...)
1308{
1309    va_list args;
1310    va_start(args, message);
1311    getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
1312    va_end(args);
1313}
1314
1315// Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
1316// to avoid malloc/free. Using a global variable like this could cause trouble
1317// if libxml implementation details were to change
1318static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1319
1320static xmlEntityPtr sharedXHTMLEntity()
1321{
1322    static xmlEntity entity;
1323    if (!entity.type) {
1324        entity.type = XML_ENTITY_DECL;
1325        entity.orig = sharedXHTMLEntityResult;
1326        entity.content = sharedXHTMLEntityResult;
1327        entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1328    }
1329    return &entity;
1330}
1331
1332static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1333{
1334    const char* originalTarget = target;
1335    WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1336        utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1337    if (conversionResult != WTF::Unicode::conversionOK)
1338        return 0;
1339
1340    // Even though we must pass the length, libxml expects the entity string to be null terminated.
1341    ASSERT(target > originalTarget + 1);
1342    *target = '\0';
1343    return target - originalTarget;
1344}
1345
1346static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1347{
1348    UChar utf16DecodedEntity[4];
1349    size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1350    if (!numberOfCodeUnits)
1351        return 0;
1352
1353    ASSERT(numberOfCodeUnits <= 4);
1354    size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1355        reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1356    if (!entityLengthInUTF8)
1357        return 0;
1358
1359    xmlEntityPtr entity = sharedXHTMLEntity();
1360    entity->length = entityLengthInUTF8;
1361    entity->name = name;
1362    return entity;
1363}
1364
1365static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1366{
1367    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1368    xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1369    if (ent) {
1370        ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1371        return ent;
1372    }
1373
1374    ent = xmlGetDocEntity(ctxt->myDoc, name);
1375    if (!ent && getParser(closure)->isXHTMLDocument()) {
1376        ent = getXHTMLEntity(name);
1377        if (ent)
1378            ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1379    }
1380
1381    return ent;
1382}
1383
1384static void startDocumentHandler(void* closure)
1385{
1386    xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1387    XMLDocumentParser* parser = getParser(closure);
1388    switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1389    parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1390    xmlSAX2StartDocument(closure);
1391}
1392
1393static void endDocumentHandler(void* closure)
1394{
1395    getParser(closure)->endDocument();
1396    xmlSAX2EndDocument(closure);
1397}
1398
1399static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1400{
1401    getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1402    xmlSAX2InternalSubset(closure, name, externalID, systemID);
1403}
1404
1405static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1406{
1407    String extId = toString(externalId);
1408    if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
1409        || extId == "-//W3C//DTD XHTML 1.1//EN"
1410        || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
1411        || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
1412        || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
1413        || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
1414        || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
1415        || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1416        || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
1417        || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
1418        // Controls if we replace entities or not.
1419        getParser(closure)->setIsXHTMLDocument(true);
1420    }
1421}
1422
1423static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1424{
1425    // Nothing to do, but we need this to work around a crasher.
1426    // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1427    // http://bugs.webkit.org/show_bug.cgi?id=5792
1428}
1429
1430void XMLDocumentParser::initializeParserContext(const CString& chunk)
1431{
1432    xmlSAXHandler sax;
1433    memset(&sax, 0, sizeof(sax));
1434
1435    sax.error = normalErrorHandler;
1436    sax.fatalError = fatalErrorHandler;
1437    sax.characters = charactersHandler;
1438    sax.processingInstruction = processingInstructionHandler;
1439    sax.cdataBlock = cdataBlockHandler;
1440    sax.comment = commentHandler;
1441    sax.warning = warningHandler;
1442    sax.startElementNs = startElementNsHandler;
1443    sax.endElementNs = endElementNsHandler;
1444    sax.getEntity = getEntityHandler;
1445    sax.startDocument = startDocumentHandler;
1446    sax.endDocument = endDocumentHandler;
1447    sax.internalSubset = internalSubsetHandler;
1448    sax.externalSubset = externalSubsetHandler;
1449    sax.ignorableWhitespace = ignorableWhitespaceHandler;
1450    sax.entityDecl = xmlSAX2EntityDecl;
1451    sax.initialized = XML_SAX2_MAGIC;
1452    m_sawError = false;
1453    m_sawCSS = false;
1454    m_sawXSLTransform = false;
1455    m_sawFirstElement = false;
1456
1457    XMLDocumentParserScope scope(document()->fetcher());
1458    if (m_parsingFragment) {
1459        m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1460    } else {
1461        ASSERT(!chunk.data());
1462        m_context = XMLParserContext::createStringParser(&sax, this);
1463    }
1464}
1465
1466void XMLDocumentParser::doEnd()
1467{
1468    if (!isStopped()) {
1469        if (m_context) {
1470            // Tell libxml we're done.
1471            {
1472                XMLDocumentParserScope scope(document()->fetcher());
1473                finishParsing(context());
1474            }
1475
1476            m_context = nullptr;
1477        }
1478    }
1479
1480    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1481    if (xmlViewerMode) {
1482        const char noStyleMessage[] = "This XML file does not appear to have any style information associated with it. The document tree is shown below.";
1483        document()->setIsViewSource(true);
1484        V8Document::PrivateScript::transformDocumentToTreeViewMethod(document()->frame(), document(), noStyleMessage);
1485    } else if (m_sawXSLTransform) {
1486        xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1487        document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1488        // Make the document think it's done, so it will apply XSL stylesheets.
1489        document()->setParsing(false);
1490        document()->styleResolverChanged();
1491
1492        // styleResolverChanged() call can detach the parser and null out its
1493        // document. In that case, we just bail out.
1494        if (isDetached())
1495            return;
1496
1497        document()->setParsing(true);
1498        DocumentParser::stopParsing();
1499    }
1500}
1501
1502xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1503{
1504    if (source.isEmpty())
1505        return 0;
1506    // Parse in a single chunk into an xmlDocPtr
1507    // FIXME: Hook up error handlers so that a failure to parse the main
1508    // document results in good error messages.
1509    XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1510    XMLParserInput input(source);
1511    return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1512}
1513
1514OrdinalNumber XMLDocumentParser::lineNumber() const
1515{
1516    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1517}
1518
1519OrdinalNumber XMLDocumentParser::columnNumber() const
1520{
1521    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1522}
1523
1524TextPosition XMLDocumentParser::textPosition() const
1525{
1526    xmlParserCtxtPtr context = this->context();
1527    if (!context)
1528        return TextPosition::minimumPosition();
1529    return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
1530}
1531
1532void XMLDocumentParser::stopParsing()
1533{
1534    DocumentParser::stopParsing();
1535    if (context())
1536        xmlStopParser(context());
1537}
1538
1539void XMLDocumentParser::resumeParsing()
1540{
1541    ASSERT(!isDetached());
1542    ASSERT(m_parserPaused);
1543
1544    m_parserPaused = false;
1545
1546    // First, execute any pending callbacks
1547    while (!m_pendingCallbacks.isEmpty()) {
1548        OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1549        callback->call(this);
1550
1551        // A callback paused the parser
1552        if (m_parserPaused)
1553            return;
1554    }
1555
1556    // Then, write any pending data
1557    SegmentedString rest = m_pendingSrc;
1558    m_pendingSrc.clear();
1559    // There is normally only one string left, so toString() shouldn't copy.
1560    // In any case, the XML parser runs on the main thread and it's OK if
1561    // the passed string has more than one reference.
1562    append(rest.toString().impl());
1563
1564    // Finally, if finish() has been called and write() didn't result
1565    // in any further callbacks being queued, call end()
1566    if (m_finishCalled && m_pendingCallbacks.isEmpty())
1567        end();
1568}
1569
1570bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1571{
1572    ASSERT(!m_context);
1573    ASSERT(m_parsingFragment);
1574
1575    CString chunkAsUtf8 = chunk.utf8();
1576
1577    // libxml2 takes an int for a length, and therefore can't handle XML chunks
1578    // larger than 2 GiB.
1579    if (chunkAsUtf8.length() > INT_MAX)
1580        return false;
1581
1582    TRACE_EVENT0("blink", "XMLDocumentParser::appendFragmentSource");
1583    initializeParserContext(chunkAsUtf8);
1584    xmlParseContent(context());
1585    endDocument(); // Close any open text nodes.
1586
1587    // FIXME: If this code is actually needed, it should probably move to
1588    // finish()
1589    // XMLDocumentParserQt has a similar check (m_stream.error() ==
1590    // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
1591    // the chunk has been processed.
1592    long bytesProcessed = xmlByteConsumed(context());
1593    if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
1594        // FIXME: I don't believe we can hit this case without also having seen
1595        // an error or a null byte. If we hit this ASSERT, we've found a test
1596        // case which demonstrates the need for this code.
1597        ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1598        return false;
1599    }
1600
1601    // No error if the chunk is well formed or it is not but we have no error.
1602    return context()->wellFormed || !xmlCtxtGetLastError(context());
1603}
1604
1605// --------------------------------
1606
1607struct AttributeParseState {
1608    HashMap<String, String> attributes;
1609    bool gotAttributes;
1610};
1611
1612static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1613    const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1614    int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1615{
1616    if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1617        return;
1618
1619    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1620    AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1621
1622    state->gotAttributes = true;
1623
1624    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1625    for (int i = 0; i < nbAttributes; ++i) {
1626        String attrLocalName = toString(attributes[i].localname);
1627        int valueLength = (int) (attributes[i].end - attributes[i].value);
1628        String attrValue = toString(attributes[i].value, valueLength);
1629        String attrPrefix = toString(attributes[i].prefix);
1630        String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1631
1632        state->attributes.set(attrQName, attrValue);
1633    }
1634}
1635
1636HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1637{
1638    AttributeParseState state;
1639    state.gotAttributes = false;
1640
1641    xmlSAXHandler sax;
1642    memset(&sax, 0, sizeof(sax));
1643    sax.startElementNs = attributesStartElementNsHandler;
1644    sax.initialized = XML_SAX2_MAGIC;
1645    RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1646    String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1647    parseChunk(parser->context(), parseString);
1648    finishParsing(parser->context());
1649    attrsOK = state.gotAttributes;
1650    return state.attributes;
1651}
1652
1653} // namespace blink
1654