1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "core/html/parser/HTMLConstructionSite.h"
29
30#include "core/HTMLElementFactory.h"
31#include "core/HTMLNames.h"
32#include "core/dom/Comment.h"
33#include "core/dom/DocumentFragment.h"
34#include "core/dom/DocumentType.h"
35#include "core/dom/Element.h"
36#include "core/dom/ScriptLoader.h"
37#include "core/dom/Text.h"
38#include "core/frame/LocalFrame.h"
39#include "core/html/HTMLFormElement.h"
40#include "core/html/HTMLHtmlElement.h"
41#include "core/html/HTMLPlugInElement.h"
42#include "core/html/HTMLScriptElement.h"
43#include "core/html/HTMLTemplateElement.h"
44#include "core/html/parser/AtomicHTMLToken.h"
45#include "core/html/parser/HTMLParserIdioms.h"
46#include "core/html/parser/HTMLStackItem.h"
47#include "core/html/parser/HTMLToken.h"
48#include "core/loader/FrameLoader.h"
49#include "core/loader/FrameLoaderClient.h"
50#include "core/svg/SVGScriptElement.h"
51#include "platform/NotImplemented.h"
52#include "platform/text/TextBreakIterator.h"
53#include <limits>
54
55namespace blink {
56
57using namespace HTMLNames;
58
59static const unsigned maximumHTMLParserDOMTreeDepth = 512;
60
61static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
62{
63    if (!scriptingContentIsAllowed(parserContentPolicy))
64        element->stripScriptingAttributes(token->attributes());
65    element->parserSetAttributes(token->attributes());
66}
67
68static bool hasImpliedEndTag(const HTMLStackItem* item)
69{
70    return item->hasTagName(ddTag)
71        || item->hasTagName(dtTag)
72        || item->hasTagName(liTag)
73        || item->hasTagName(optionTag)
74        || item->hasTagName(optgroupTag)
75        || item->hasTagName(pTag)
76        || item->hasTagName(rbTag)
77        || item->hasTagName(rpTag)
78        || item->hasTagName(rtTag)
79        || item->hasTagName(rtcTag);
80}
81
82static bool shouldUseLengthLimit(const ContainerNode& node)
83{
84    return !isHTMLScriptElement(node)
85        && !isHTMLStyleElement(node)
86        && !isSVGScriptElement(node);
87}
88
89static unsigned textLengthLimitForContainer(const ContainerNode& node)
90{
91    return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
92}
93
94static inline bool isAllWhitespace(const String& string)
95{
96    return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
97}
98
99static inline void insert(HTMLConstructionSiteTask& task)
100{
101    if (isHTMLTemplateElement(*task.parent))
102        task.parent = toHTMLTemplateElement(task.parent.get())->content();
103
104    if (ContainerNode* parent = task.child->parentNode())
105        parent->parserRemoveChild(*task.child);
106
107    if (task.nextChild)
108        task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
109    else
110        task.parent->parserAppendChild(task.child.get());
111}
112
113static inline void executeInsertTask(HTMLConstructionSiteTask& task)
114{
115    ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
116
117    insert(task);
118
119    if (task.child->isElementNode()) {
120        Element& child = toElement(*task.child);
121        child.beginParsingChildren();
122        if (task.selfClosing)
123            child.finishParsingChildren();
124    }
125}
126
127static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
128{
129    ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
130    ASSERT(task.child->isTextNode());
131
132    // Merge text nodes into previous ones if possible:
133    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
134    Text* newText = toText(task.child.get());
135    Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
136    if (previousChild && previousChild->isTextNode()) {
137        Text* previousText = toText(previousChild);
138        unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
139        if (previousText->length() + newText->length() < lengthLimit) {
140            previousText->parserAppendData(newText->data());
141            return;
142        }
143    }
144
145    insert(task);
146}
147
148static inline void executeReparentTask(HTMLConstructionSiteTask& task)
149{
150    ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
151
152    if (ContainerNode* parent = task.child->parentNode())
153        parent->parserRemoveChild(*task.child);
154
155    task.parent->parserAppendChild(task.child);
156}
157
158static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
159{
160    ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
161
162    insert(task);
163}
164
165static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
166{
167    ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
168
169    task.parent->parserTakeAllChildrenFrom(*task.oldParent());
170}
171
172void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
173{
174    ASSERT(m_taskQueue.isEmpty());
175    if (task.operation == HTMLConstructionSiteTask::Insert)
176        return executeInsertTask(task);
177
178    if (task.operation == HTMLConstructionSiteTask::InsertText)
179        return executeInsertTextTask(task);
180
181    // All the cases below this point are only used by the adoption agency.
182
183    if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
184        return executeInsertAlreadyParsedChildTask(task);
185
186    if (task.operation == HTMLConstructionSiteTask::Reparent)
187        return executeReparentTask(task);
188
189    if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
190        return executeTakeAllChildrenTask(task);
191
192    ASSERT_NOT_REACHED();
193}
194
195// This is only needed for TextDocuments where we might have text nodes
196// approaching the default length limit (~64k) and we don't want to
197// break a text node in the middle of a combining character.
198static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
199{
200    ASSERT(currentPosition < proposedBreakIndex);
201    ASSERT(proposedBreakIndex <= string.length());
202    // The end of the string is always a valid break.
203    if (proposedBreakIndex == string.length())
204        return proposedBreakIndex;
205
206    // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
207    if (string.is8Bit())
208        return proposedBreakIndex;
209
210    const UChar* breakSearchCharacters = string.characters16() + currentPosition;
211    // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
212    unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
213    NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
214
215    if (it.isBreak(proposedBreakIndex - currentPosition))
216        return proposedBreakIndex;
217
218    int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
219    if (adjustedBreakIndexInSubstring > 0)
220        return currentPosition + adjustedBreakIndexInSubstring;
221    // We failed to find a breakable point, let the caller figure out what to do.
222    return 0;
223}
224
225static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
226{
227    // Strings composed entirely of whitespace are likely to be repeated.
228    // Turn them into AtomicString so we share a single string for each.
229    if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
230        return AtomicString(string).string();
231    return string;
232}
233
234void HTMLConstructionSite::flushPendingText(FlushMode mode)
235{
236    if (m_pendingText.isEmpty())
237        return;
238
239    if (mode == FlushIfAtTextLimit
240        && !shouldUseLengthLimit(*m_pendingText.parent))
241        return;
242
243    PendingText pendingText;
244    // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
245    m_pendingText.swap(pendingText);
246    ASSERT(m_pendingText.isEmpty());
247
248    // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
249    // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
250    unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
251
252    unsigned currentPosition = 0;
253    const StringBuilder& string = pendingText.stringBuilder;
254    while (currentPosition < string.length()) {
255        unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
256        unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
257        ASSERT(breakIndex <= string.length());
258        String substring = string.substring(currentPosition, breakIndex - currentPosition);
259        substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
260
261        HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
262        task.parent = pendingText.parent;
263        task.nextChild = pendingText.nextChild;
264        task.child = Text::create(task.parent->document(), substring);
265        queueTask(task);
266
267        ASSERT(breakIndex > currentPosition);
268        ASSERT(breakIndex - currentPosition == substring.length());
269        ASSERT(toText(task.child.get())->length() == substring.length());
270        currentPosition = breakIndex;
271    }
272}
273
274void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
275{
276    flushPendingText(FlushAlways);
277    ASSERT(m_pendingText.isEmpty());
278    m_taskQueue.append(task);
279}
280
281void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
282{
283    ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
284    ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
285
286    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
287    task.parent = parent;
288    task.child = prpChild;
289    task.selfClosing = selfClosing;
290
291    if (shouldFosterParent()) {
292        fosterParent(task.child);
293        return;
294    }
295
296    // Add as a sibling of the parent if we have reached the maximum depth allowed.
297    if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
298        task.parent = task.parent->parentNode();
299
300    ASSERT(task.parent);
301    queueTask(task);
302}
303
304void HTMLConstructionSite::executeQueuedTasks()
305{
306    // This has no affect on pendingText, and we may have pendingText
307    // remaining after executing all other queued tasks.
308    const size_t size = m_taskQueue.size();
309    if (!size)
310        return;
311
312    // Copy the task queue into a local variable in case executeTask
313    // re-enters the parser.
314    TaskQueue queue;
315    queue.swap(m_taskQueue);
316
317    for (size_t i = 0; i < size; ++i)
318        executeTask(queue[i]);
319
320    // We might be detached now.
321}
322
323HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
324    : m_document(document)
325    , m_attachmentRoot(document)
326    , m_parserContentPolicy(parserContentPolicy)
327    , m_isParsingFragment(false)
328    , m_redirectAttachToFosterParent(false)
329    , m_inQuirksMode(document->inQuirksMode())
330{
331    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
332}
333
334HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
335    : m_document(&fragment->document())
336    , m_attachmentRoot(fragment)
337    , m_parserContentPolicy(parserContentPolicy)
338    , m_isParsingFragment(true)
339    , m_redirectAttachToFosterParent(false)
340    , m_inQuirksMode(fragment->document().inQuirksMode())
341{
342    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
343}
344
345HTMLConstructionSite::~HTMLConstructionSite()
346{
347    // Depending on why we're being destroyed it might be OK
348    // to forget queued tasks, but currently we don't expect to.
349    ASSERT(m_taskQueue.isEmpty());
350    // Currently we assume that text will never be the last token in the
351    // document and that we'll always queue some additional task to cause it to flush.
352    ASSERT(m_pendingText.isEmpty());
353}
354
355void HTMLConstructionSite::trace(Visitor* visitor)
356{
357    visitor->trace(m_document);
358    visitor->trace(m_attachmentRoot);
359    visitor->trace(m_head);
360    visitor->trace(m_form);
361    visitor->trace(m_openElements);
362    visitor->trace(m_activeFormattingElements);
363    visitor->trace(m_taskQueue);
364    visitor->trace(m_pendingText);
365}
366
367void HTMLConstructionSite::detach()
368{
369    // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
370    // text that really should have made it into the DOM earlier, but there
371    // doesn't seem to be a nice way to do that.
372    m_pendingText.discard();
373    m_document = nullptr;
374    m_attachmentRoot = nullptr;
375}
376
377void HTMLConstructionSite::setForm(HTMLFormElement* form)
378{
379    // This method should only be needed for HTMLTreeBuilder in the fragment case.
380    ASSERT(!m_form);
381    m_form = form;
382}
383
384PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
385{
386    return m_form.release();
387}
388
389void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
390{
391    ASSERT(m_document);
392    if (m_document->frame() && !m_isParsingFragment)
393        m_document->frame()->loader().dispatchDocumentElementAvailable();
394}
395
396void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
397{
398    ASSERT(m_document);
399    RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
400    setAttributes(element.get(), token, m_parserContentPolicy);
401    attachLater(m_attachmentRoot, element);
402    m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
403
404    executeQueuedTasks();
405    element->insertedByParser();
406    dispatchDocumentElementAvailableIfNeeded();
407}
408
409void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
410{
411    if (token->attributes().isEmpty())
412        return;
413
414    for (unsigned i = 0; i < token->attributes().size(); ++i) {
415        const Attribute& tokenAttribute = token->attributes().at(i);
416        if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound)
417            element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
418    }
419}
420
421void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
422{
423    // Fragments do not have a root HTML element, so any additional HTML elements
424    // encountered during fragment parsing should be ignored.
425    if (m_isParsingFragment)
426        return;
427
428    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
429}
430
431void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
432{
433    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
434}
435
436void HTMLConstructionSite::setDefaultCompatibilityMode()
437{
438    if (m_isParsingFragment)
439        return;
440    setCompatibilityMode(Document::QuirksMode);
441}
442
443void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
444{
445    m_inQuirksMode = (mode == Document::QuirksMode);
446    m_document->setCompatibilityMode(mode);
447}
448
449void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
450{
451    // There are three possible compatibility modes:
452    // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
453    // be omitted from numbers.
454    // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
455    // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
456
457    // Check for Quirks Mode.
458    if (name != "html"
459        || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
460        || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
461        || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
462        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
463        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
464        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
465        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
466        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
467        || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
468        || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
469        || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
470        || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
471        || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
472        || publicId.startsWith("-//IETF//DTD HTML 3//", false)
473        || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
474        || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
475        || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
476        || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
477        || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
478        || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
479        || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
480        || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
481        || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
482        || publicId.startsWith("-//IETF//DTD HTML//", false)
483        || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
484        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
485        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
486        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
487        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
488        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
489        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
490        || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
491        || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
492        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
493        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
494        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
495        || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
496        || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
497        || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
498        || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
499        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
500        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
501        || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
502        || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
503        || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
504        || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
505        || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
506        || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
507        || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
508        || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
509        || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
510        || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
511        || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
512        || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
513        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
514        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
515        || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
516        || equalIgnoringCase(publicId, "HTML")
517        || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
518        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
519        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
520        setCompatibilityMode(Document::QuirksMode);
521        return;
522    }
523
524    // Check for Limited Quirks Mode.
525    if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
526        || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
527        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
528        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
529        setCompatibilityMode(Document::LimitedQuirksMode);
530        return;
531    }
532
533    // Otherwise we are No Quirks Mode.
534    setCompatibilityMode(Document::NoQuirksMode);
535}
536
537void HTMLConstructionSite::processEndOfFile()
538{
539    ASSERT(currentNode());
540    flush(FlushAlways);
541    openElements()->popAll();
542}
543
544void HTMLConstructionSite::finishedParsing()
545{
546    // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
547    ASSERT(m_taskQueue.isEmpty());
548    flush(FlushAlways);
549    m_document->finishedParsing();
550}
551
552void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
553{
554    ASSERT(token->type() == HTMLToken::DOCTYPE);
555
556    const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
557    const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
558    RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
559    attachLater(m_attachmentRoot, doctype.release());
560
561    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
562    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
563    // because context-less fragments can determine their own quirks mode, and thus change
564    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
565    // in a fragment, as changing the owning document's compatibility mode would be wrong.
566    ASSERT(!m_isParsingFragment);
567    if (m_isParsingFragment)
568        return;
569
570    if (token->forceQuirks())
571        setCompatibilityMode(Document::QuirksMode);
572    else {
573        setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
574    }
575}
576
577void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
578{
579    ASSERT(token->type() == HTMLToken::Comment);
580    attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
581}
582
583void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
584{
585    ASSERT(token->type() == HTMLToken::Comment);
586    ASSERT(m_document);
587    attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
588}
589
590void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
591{
592    ASSERT(token->type() == HTMLToken::Comment);
593    ContainerNode* parent = m_openElements.rootNode();
594    attachLater(parent, Comment::create(parent->document(), token->comment()));
595}
596
597void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
598{
599    ASSERT(!shouldFosterParent());
600    m_head = HTMLStackItem::create(createHTMLElement(token), token);
601    attachLater(currentNode(), m_head->element());
602    m_openElements.pushHTMLHeadElement(m_head);
603}
604
605void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
606{
607    ASSERT(!shouldFosterParent());
608    RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token);
609    attachLater(currentNode(), body);
610    m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
611    if (LocalFrame* frame = m_document->frame())
612        frame->loader().client()->dispatchWillInsertBody();
613}
614
615void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
616{
617    RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
618    ASSERT(isHTMLFormElement(element));
619    m_form = static_pointer_cast<HTMLFormElement>(element.release());
620    m_form->setDemoted(isDemoted);
621    attachLater(currentNode(), m_form.get());
622    m_openElements.push(HTMLStackItem::create(m_form.get(), token));
623}
624
625void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
626{
627    RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
628    attachLater(currentNode(), element);
629    m_openElements.push(HTMLStackItem::create(element.release(), token));
630}
631
632void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
633{
634    ASSERT(token->type() == HTMLToken::StartTag);
635    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
636    // but self-closing elements are never in the element stack so the stack
637    // doesn't get a chance to tell them that we're done parsing their children.
638    attachLater(currentNode(), createHTMLElement(token), true);
639    // FIXME: Do we want to acknowledge the token's self-closing flag?
640    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
641}
642
643void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
644{
645    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
646    // Possible active formatting elements include:
647    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
648    insertHTMLElement(token);
649    m_activeFormattingElements.append(currentElementRecord()->stackItem());
650}
651
652void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
653{
654    // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
655    // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
656    // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
657    // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
658    // those flags or effects thereof.
659    const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
660    const bool alreadyStarted = m_isParsingFragment && parserInserted;
661    RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
662    setAttributes(element.get(), token, m_parserContentPolicy);
663    if (scriptingContentIsAllowed(m_parserContentPolicy))
664        attachLater(currentNode(), element);
665    m_openElements.push(HTMLStackItem::create(element.release(), token));
666}
667
668void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
669{
670    ASSERT(token->type() == HTMLToken::StartTag);
671    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
672
673    RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
674    if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
675        attachLater(currentNode(), element, token->selfClosing());
676    if (!token->selfClosing())
677        m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
678}
679
680void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
681{
682    HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
683    dummyTask.parent = currentNode();
684
685    if (shouldFosterParent())
686        findFosterSite(dummyTask);
687
688    // FIXME: This probably doesn't need to be done both here and in insert(Task).
689    if (isHTMLTemplateElement(*dummyTask.parent))
690        dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
691
692    // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
693    // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
694    // In either case we have to flush the pending text into the task queue before making more.
695    if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
696        flushPendingText(FlushAlways);
697    m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
698}
699
700void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
701{
702    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
703    task.parent = newParent->node();
704    task.child = child->node();
705    queueTask(task);
706}
707
708void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
709{
710    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
711    task.parent = newParent->node();
712    task.child = child->node();
713    queueTask(task);
714}
715
716void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
717{
718    if (newParent->causesFosterParenting()) {
719        fosterParent(child->node());
720        return;
721    }
722
723    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
724    task.parent = newParent->node();
725    task.child = child->node();
726    queueTask(task);
727}
728
729void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
730{
731    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
732    task.parent = newParent->node();
733    task.child = oldParent->node();
734    queueTask(task);
735}
736
737PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
738{
739    QualifiedName tagName(nullAtom, token->name(), namespaceURI);
740    RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
741    setAttributes(element.get(), token, m_parserContentPolicy);
742    return element.release();
743}
744
745inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
746{
747    if (isHTMLTemplateElement(*currentNode()))
748        return toHTMLTemplateElement(currentElement())->content()->document();
749    return currentNode()->document();
750}
751
752PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
753{
754    Document& document = ownerDocumentForCurrentNode();
755    // Only associate the element with the current form if we're creating the new element
756    // in a document with a browsing context (rather than in <template> contents).
757    HTMLFormElement* form = document.frame() ? m_form.get() : 0;
758    // FIXME: This can't use HTMLConstructionSite::createElement because we
759    // have to pass the current form element.  We should rework form association
760    // to occur after construction to allow better code sharing here.
761    RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
762    setAttributes(element.get(), token, m_parserContentPolicy);
763    return element.release();
764}
765
766PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
767{
768    RefPtrWillBeRawPtr<Element> element;
769    // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
770    AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
771    if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
772        element = createHTMLElement(&fakeToken);
773    else
774        element = createElement(&fakeToken, item->namespaceURI());
775    return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
776}
777
778bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
779{
780    if (m_activeFormattingElements.isEmpty())
781        return false;
782    unsigned index = m_activeFormattingElements.size();
783    do {
784        --index;
785        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
786        if (entry.isMarker() || m_openElements.contains(entry.element())) {
787            firstUnopenElementIndex = index + 1;
788            return firstUnopenElementIndex < m_activeFormattingElements.size();
789        }
790    } while (index);
791    firstUnopenElementIndex = index;
792    return true;
793}
794
795void HTMLConstructionSite::reconstructTheActiveFormattingElements()
796{
797    unsigned firstUnopenElementIndex;
798    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
799        return;
800
801    unsigned unopenEntryIndex = firstUnopenElementIndex;
802    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
803    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
804        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
805        RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
806        attachLater(currentNode(), reconstructed->node());
807        m_openElements.push(reconstructed);
808        unopenedEntry.replaceElement(reconstructed.release());
809    }
810}
811
812void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
813{
814    while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
815        m_openElements.pop();
816}
817
818void HTMLConstructionSite::generateImpliedEndTags()
819{
820    while (hasImpliedEndTag(currentStackItem()))
821        m_openElements.pop();
822}
823
824bool HTMLConstructionSite::inQuirksMode()
825{
826    return m_inQuirksMode;
827}
828
829void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
830{
831    // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
832    HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
833    if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
834        task.parent = lastTemplateElement->element();
835        return;
836    }
837
838    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
839    if (lastTableElementRecord) {
840        Element* lastTableElement = lastTableElementRecord->element();
841        ContainerNode* parent;
842        if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
843            parent = lastTableElementRecord->next()->element();
844        else
845            parent = lastTableElement->parentNode();
846
847        // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
848        // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
849        if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
850            task.parent = parent;
851            task.nextChild = lastTableElement;
852            return;
853        }
854        task.parent = lastTableElementRecord->next()->element();
855        return;
856    }
857    // Fragment case
858    task.parent = m_openElements.rootNode(); // DocumentFragment
859}
860
861bool HTMLConstructionSite::shouldFosterParent() const
862{
863    return m_redirectAttachToFosterParent
864        && currentStackItem()->isElementNode()
865        && currentStackItem()->causesFosterParenting();
866}
867
868void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
869{
870    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
871    findFosterSite(task);
872    task.child = node;
873    ASSERT(task.parent);
874    queueTask(task);
875}
876
877void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
878{
879    visitor->trace(parent);
880    visitor->trace(nextChild);
881}
882
883
884}
885