1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "core/html/parser/HTMLConstructionSite.h"
29
30#include "core/HTMLElementFactory.h"
31#include "core/HTMLNames.h"
32#include "core/dom/Comment.h"
33#include "core/dom/DocumentFragment.h"
34#include "core/dom/DocumentType.h"
35#include "core/dom/Element.h"
36#include "core/dom/ScriptLoader.h"
37#include "core/dom/Text.h"
38#include "core/frame/LocalFrame.h"
39#include "core/html/HTMLFormElement.h"
40#include "core/html/HTMLHtmlElement.h"
41#include "core/html/HTMLPlugInElement.h"
42#include "core/html/HTMLScriptElement.h"
43#include "core/html/HTMLTemplateElement.h"
44#include "core/html/parser/AtomicHTMLToken.h"
45#include "core/html/parser/HTMLParserIdioms.h"
46#include "core/html/parser/HTMLStackItem.h"
47#include "core/html/parser/HTMLToken.h"
48#include "core/loader/FrameLoader.h"
49#include "core/loader/FrameLoaderClient.h"
50#include "core/svg/SVGScriptElement.h"
51#include "platform/NotImplemented.h"
52#include "platform/text/TextBreakIterator.h"
53#include <limits>
54
55namespace WebCore {
56
57using namespace HTMLNames;
58
59static const unsigned maximumHTMLParserDOMTreeDepth = 512;
60
61static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
62{
63    if (!scriptingContentIsAllowed(parserContentPolicy))
64        element->stripScriptingAttributes(token->attributes());
65    element->parserSetAttributes(token->attributes());
66}
67
68static bool hasImpliedEndTag(const HTMLStackItem* item)
69{
70    return item->hasTagName(ddTag)
71        || item->hasTagName(dtTag)
72        || item->hasTagName(liTag)
73        || item->hasTagName(optionTag)
74        || item->hasTagName(optgroupTag)
75        || item->hasTagName(pTag)
76        || item->hasTagName(rpTag)
77        || item->hasTagName(rtTag);
78}
79
80static bool shouldUseLengthLimit(const ContainerNode& node)
81{
82    return !isHTMLScriptElement(node)
83        && !isHTMLStyleElement(node)
84        && !isSVGScriptElement(node);
85}
86
87static unsigned textLengthLimitForContainer(const ContainerNode& node)
88{
89    return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
90}
91
92static inline bool isAllWhitespace(const String& string)
93{
94    return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
95}
96
97static inline void insert(HTMLConstructionSiteTask& task)
98{
99    if (isHTMLTemplateElement(*task.parent))
100        task.parent = toHTMLTemplateElement(task.parent.get())->content();
101
102    if (ContainerNode* parent = task.child->parentNode())
103        parent->parserRemoveChild(*task.child);
104
105    if (task.nextChild)
106        task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
107    else
108        task.parent->parserAppendChild(task.child.get());
109}
110
111static inline void executeInsertTask(HTMLConstructionSiteTask& task)
112{
113    ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
114
115    insert(task);
116
117    if (task.child->isElementNode()) {
118        Element& child = toElement(*task.child);
119        child.beginParsingChildren();
120        if (task.selfClosing)
121            child.finishParsingChildren();
122    }
123}
124
125static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
126{
127    ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
128    ASSERT(task.child->isTextNode());
129
130    // Merge text nodes into previous ones if possible:
131    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
132    Text* newText = toText(task.child.get());
133    Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
134    if (previousChild && previousChild->isTextNode()) {
135        Text* previousText = toText(previousChild);
136        unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
137        if (previousText->length() + newText->length() < lengthLimit) {
138            previousText->parserAppendData(newText->data());
139            return;
140        }
141    }
142
143    insert(task);
144}
145
146static inline void executeReparentTask(HTMLConstructionSiteTask& task)
147{
148    ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
149
150    if (ContainerNode* parent = task.child->parentNode())
151        parent->parserRemoveChild(*task.child);
152
153    task.parent->parserAppendChild(task.child);
154}
155
156static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
157{
158    ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
159
160    insert(task);
161}
162
163static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
164{
165    ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
166
167    task.parent->parserTakeAllChildrenFrom(*task.oldParent());
168}
169
170void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
171{
172    ASSERT(m_taskQueue.isEmpty());
173    if (task.operation == HTMLConstructionSiteTask::Insert)
174        return executeInsertTask(task);
175
176    if (task.operation == HTMLConstructionSiteTask::InsertText)
177        return executeInsertTextTask(task);
178
179    // All the cases below this point are only used by the adoption agency.
180
181    if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
182        return executeInsertAlreadyParsedChildTask(task);
183
184    if (task.operation == HTMLConstructionSiteTask::Reparent)
185        return executeReparentTask(task);
186
187    if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
188        return executeTakeAllChildrenTask(task);
189
190    ASSERT_NOT_REACHED();
191}
192
193// This is only needed for TextDocuments where we might have text nodes
194// approaching the default length limit (~64k) and we don't want to
195// break a text node in the middle of a combining character.
196static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
197{
198    ASSERT(currentPosition < proposedBreakIndex);
199    ASSERT(proposedBreakIndex <= string.length());
200    // The end of the string is always a valid break.
201    if (proposedBreakIndex == string.length())
202        return proposedBreakIndex;
203
204    // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
205    if (string.is8Bit())
206        return proposedBreakIndex;
207
208    const UChar* breakSearchCharacters = string.characters16() + currentPosition;
209    // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
210    unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
211    NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
212
213    if (it.isBreak(proposedBreakIndex - currentPosition))
214        return proposedBreakIndex;
215
216    int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
217    if (adjustedBreakIndexInSubstring > 0)
218        return currentPosition + adjustedBreakIndexInSubstring;
219    // We failed to find a breakable point, let the caller figure out what to do.
220    return 0;
221}
222
223static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
224{
225    // Strings composed entirely of whitespace are likely to be repeated.
226    // Turn them into AtomicString so we share a single string for each.
227    if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
228        return AtomicString(string).string();
229    return string;
230}
231
232void HTMLConstructionSite::flushPendingText()
233{
234    if (m_pendingText.isEmpty())
235        return;
236
237    PendingText pendingText;
238    // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
239    m_pendingText.swap(pendingText);
240    ASSERT(m_pendingText.isEmpty());
241
242    // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
243    // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
244    unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
245
246    unsigned currentPosition = 0;
247    const StringBuilder& string = pendingText.stringBuilder;
248    while (currentPosition < string.length()) {
249        unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
250        unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
251        ASSERT(breakIndex <= string.length());
252        String substring = string.substring(currentPosition, breakIndex - currentPosition);
253        substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
254
255        HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
256        task.parent = pendingText.parent;
257        task.nextChild = pendingText.nextChild;
258        task.child = Text::create(task.parent->document(), substring);
259        queueTask(task);
260
261        ASSERT(breakIndex > currentPosition);
262        ASSERT(breakIndex - currentPosition == substring.length());
263        ASSERT(toText(task.child.get())->length() == substring.length());
264        currentPosition = breakIndex;
265    }
266}
267
268void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
269{
270    flushPendingText();
271    ASSERT(m_pendingText.isEmpty());
272    m_taskQueue.append(task);
273}
274
275void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
276{
277    ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
278    ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
279
280    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
281    task.parent = parent;
282    task.child = prpChild;
283    task.selfClosing = selfClosing;
284
285    if (shouldFosterParent()) {
286        fosterParent(task.child);
287        return;
288    }
289
290    // Add as a sibling of the parent if we have reached the maximum depth allowed.
291    if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
292        task.parent = task.parent->parentNode();
293
294    ASSERT(task.parent);
295    queueTask(task);
296}
297
298void HTMLConstructionSite::executeQueuedTasks()
299{
300    // This has no affect on pendingText, and we may have pendingText
301    // remaining after executing all other queued tasks.
302    const size_t size = m_taskQueue.size();
303    if (!size)
304        return;
305
306    // Copy the task queue into a local variable in case executeTask
307    // re-enters the parser.
308    TaskQueue queue;
309    queue.swap(m_taskQueue);
310
311    for (size_t i = 0; i < size; ++i)
312        executeTask(queue[i]);
313
314    // We might be detached now.
315}
316
317HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
318    : m_document(document)
319    , m_attachmentRoot(document)
320    , m_parserContentPolicy(parserContentPolicy)
321    , m_isParsingFragment(false)
322    , m_redirectAttachToFosterParent(false)
323    , m_inQuirksMode(document->inQuirksMode())
324{
325    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
326}
327
328HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
329    : m_document(&fragment->document())
330    , m_attachmentRoot(fragment)
331    , m_parserContentPolicy(parserContentPolicy)
332    , m_isParsingFragment(true)
333    , m_redirectAttachToFosterParent(false)
334    , m_inQuirksMode(fragment->document().inQuirksMode())
335{
336    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
337}
338
339HTMLConstructionSite::~HTMLConstructionSite()
340{
341    // Depending on why we're being destroyed it might be OK
342    // to forget queued tasks, but currently we don't expect to.
343    ASSERT(m_taskQueue.isEmpty());
344    // Currently we assume that text will never be the last token in the
345    // document and that we'll always queue some additional task to cause it to flush.
346    ASSERT(m_pendingText.isEmpty());
347}
348
349void HTMLConstructionSite::trace(Visitor* visitor)
350{
351    visitor->trace(m_document);
352    visitor->trace(m_attachmentRoot);
353    visitor->trace(m_head);
354    visitor->trace(m_form);
355    visitor->trace(m_openElements);
356    visitor->trace(m_activeFormattingElements);
357    visitor->trace(m_taskQueue);
358    visitor->trace(m_pendingText);
359}
360
361void HTMLConstructionSite::detach()
362{
363    // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
364    // text that really should have made it into the DOM earlier, but there
365    // doesn't seem to be a nice way to do that.
366    m_pendingText.discard();
367    m_document = nullptr;
368    m_attachmentRoot = nullptr;
369}
370
371void HTMLConstructionSite::setForm(HTMLFormElement* form)
372{
373    // This method should only be needed for HTMLTreeBuilder in the fragment case.
374    ASSERT(!m_form);
375    m_form = form;
376}
377
378PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
379{
380    return m_form.release();
381}
382
383void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
384{
385    ASSERT(m_document);
386    if (m_document->frame() && !m_isParsingFragment)
387        m_document->frame()->loader().dispatchDocumentElementAvailable();
388}
389
390void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
391{
392    ASSERT(m_document);
393    RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
394    setAttributes(element.get(), token, m_parserContentPolicy);
395    attachLater(m_attachmentRoot, element);
396    m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
397
398    executeQueuedTasks();
399    element->insertedByParser();
400    dispatchDocumentElementAvailableIfNeeded();
401}
402
403void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
404{
405    if (token->attributes().isEmpty())
406        return;
407
408    for (unsigned i = 0; i < token->attributes().size(); ++i) {
409        const Attribute& tokenAttribute = token->attributes().at(i);
410        if (!element->elementData() || !element->findAttributeByName(tokenAttribute.name()))
411            element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
412    }
413}
414
415void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
416{
417    // Fragments do not have a root HTML element, so any additional HTML elements
418    // encountered during fragment parsing should be ignored.
419    if (m_isParsingFragment)
420        return;
421
422    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
423}
424
425void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
426{
427    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
428}
429
430void HTMLConstructionSite::setDefaultCompatibilityMode()
431{
432    if (m_isParsingFragment)
433        return;
434    setCompatibilityMode(Document::QuirksMode);
435}
436
437void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
438{
439    m_inQuirksMode = (mode == Document::QuirksMode);
440    m_document->setCompatibilityMode(mode);
441}
442
443void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
444{
445    // There are three possible compatibility modes:
446    // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
447    // be omitted from numbers.
448    // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
449    // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
450
451    // Check for Quirks Mode.
452    if (name != "html"
453        || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
454        || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
455        || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
456        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
457        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
458        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
459        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
460        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
461        || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
462        || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
463        || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
464        || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
465        || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
466        || publicId.startsWith("-//IETF//DTD HTML 3//", false)
467        || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
468        || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
469        || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
470        || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
471        || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
472        || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
473        || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
474        || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
475        || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
476        || publicId.startsWith("-//IETF//DTD HTML//", false)
477        || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
478        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
479        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
480        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
481        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
482        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
483        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
484        || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
485        || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
486        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
487        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
488        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
489        || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
490        || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
491        || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
492        || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
493        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
494        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
495        || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
496        || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
497        || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
498        || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
499        || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
500        || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
501        || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
502        || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
503        || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
504        || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
505        || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
506        || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
507        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
508        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
509        || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
510        || equalIgnoringCase(publicId, "HTML")
511        || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
512        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
513        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
514        setCompatibilityMode(Document::QuirksMode);
515        return;
516    }
517
518    // Check for Limited Quirks Mode.
519    if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
520        || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
521        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
522        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
523        setCompatibilityMode(Document::LimitedQuirksMode);
524        return;
525    }
526
527    // Otherwise we are No Quirks Mode.
528    setCompatibilityMode(Document::NoQuirksMode);
529}
530
531void HTMLConstructionSite::processEndOfFile()
532{
533    ASSERT(currentNode());
534    flush();
535    openElements()->popAll();
536}
537
538void HTMLConstructionSite::finishedParsing()
539{
540    // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
541    ASSERT(m_taskQueue.isEmpty());
542    flush();
543    m_document->finishedParsing();
544}
545
546void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
547{
548    ASSERT(token->type() == HTMLToken::DOCTYPE);
549
550    const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
551    const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
552    RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
553    attachLater(m_attachmentRoot, doctype.release());
554
555    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
556    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
557    // because context-less fragments can determine their own quirks mode, and thus change
558    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
559    // in a fragment, as changing the owning document's compatibility mode would be wrong.
560    ASSERT(!m_isParsingFragment);
561    if (m_isParsingFragment)
562        return;
563
564    if (token->forceQuirks())
565        setCompatibilityMode(Document::QuirksMode);
566    else {
567        setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
568    }
569}
570
571void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
572{
573    ASSERT(token->type() == HTMLToken::Comment);
574    attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
575}
576
577void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
578{
579    ASSERT(token->type() == HTMLToken::Comment);
580    ASSERT(m_document);
581    attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
582}
583
584void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
585{
586    ASSERT(token->type() == HTMLToken::Comment);
587    ContainerNode* parent = m_openElements.rootNode();
588    attachLater(parent, Comment::create(parent->document(), token->comment()));
589}
590
591void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
592{
593    ASSERT(!shouldFosterParent());
594    m_head = HTMLStackItem::create(createHTMLElement(token), token);
595    attachLater(currentNode(), m_head->element());
596    m_openElements.pushHTMLHeadElement(m_head);
597}
598
599void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
600{
601    ASSERT(!shouldFosterParent());
602    RefPtrWillBeRawPtr<Element> body = createHTMLElement(token);
603    attachLater(currentNode(), body);
604    m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
605    if (LocalFrame* frame = m_document->frame())
606        frame->loader().client()->dispatchWillInsertBody();
607}
608
609void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
610{
611    RefPtrWillBeRawPtr<Element> element = createHTMLElement(token);
612    ASSERT(isHTMLFormElement(element));
613    m_form = static_pointer_cast<HTMLFormElement>(element.release());
614    m_form->setDemoted(isDemoted);
615    attachLater(currentNode(), m_form.get());
616    m_openElements.push(HTMLStackItem::create(m_form.get(), token));
617}
618
619void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
620{
621    RefPtrWillBeRawPtr<Element> element = createHTMLElement(token);
622    attachLater(currentNode(), element);
623    m_openElements.push(HTMLStackItem::create(element.release(), token));
624}
625
626void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
627{
628    ASSERT(token->type() == HTMLToken::StartTag);
629    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
630    // but self-closing elements are never in the element stack so the stack
631    // doesn't get a chance to tell them that we're done parsing their children.
632    attachLater(currentNode(), createHTMLElement(token), true);
633    // FIXME: Do we want to acknowledge the token's self-closing flag?
634    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
635}
636
637void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
638{
639    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
640    // Possible active formatting elements include:
641    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
642    insertHTMLElement(token);
643    m_activeFormattingElements.append(currentElementRecord()->stackItem());
644}
645
646void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
647{
648    // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
649    // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
650    // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
651    // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
652    // those flags or effects thereof.
653    const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
654    const bool alreadyStarted = m_isParsingFragment && parserInserted;
655    RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
656    setAttributes(element.get(), token, m_parserContentPolicy);
657    if (scriptingContentIsAllowed(m_parserContentPolicy))
658        attachLater(currentNode(), element);
659    m_openElements.push(HTMLStackItem::create(element.release(), token));
660}
661
662void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
663{
664    ASSERT(token->type() == HTMLToken::StartTag);
665    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
666
667    RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
668    if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
669        attachLater(currentNode(), element, token->selfClosing());
670    if (!token->selfClosing())
671        m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
672}
673
674void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
675{
676    HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
677    dummyTask.parent = currentNode();
678
679    if (shouldFosterParent())
680        findFosterSite(dummyTask);
681
682    // FIXME: This probably doesn't need to be done both here and in insert(Task).
683    if (isHTMLTemplateElement(*dummyTask.parent))
684        dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
685
686    // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
687    // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
688    // In either case we have to flush the pending text into the task queue before making more.
689    if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
690        flushPendingText();
691    m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
692}
693
694void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
695{
696    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
697    task.parent = newParent->node();
698    task.child = child->node();
699    queueTask(task);
700}
701
702void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
703{
704    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
705    task.parent = newParent->node();
706    task.child = child->node();
707    queueTask(task);
708}
709
710void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
711{
712    if (newParent->causesFosterParenting()) {
713        fosterParent(child->node());
714        return;
715    }
716
717    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
718    task.parent = newParent->node();
719    task.child = child->node();
720    queueTask(task);
721}
722
723void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
724{
725    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
726    task.parent = newParent->node();
727    task.child = oldParent->node();
728    queueTask(task);
729}
730
731PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
732{
733    QualifiedName tagName(nullAtom, token->name(), namespaceURI);
734    RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
735    setAttributes(element.get(), token, m_parserContentPolicy);
736    return element.release();
737}
738
739inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
740{
741    if (isHTMLTemplateElement(*currentNode()))
742        return toHTMLTemplateElement(currentElement())->content()->document();
743    return currentNode()->document();
744}
745
746PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
747{
748    Document& document = ownerDocumentForCurrentNode();
749    // Only associate the element with the current form if we're creating the new element
750    // in a document with a browsing context (rather than in <template> contents).
751    HTMLFormElement* form = document.frame() ? m_form.get() : 0;
752    // FIXME: This can't use HTMLConstructionSite::createElement because we
753    // have to pass the current form element.  We should rework form association
754    // to occur after construction to allow better code sharing here.
755    RefPtrWillBeRawPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
756    setAttributes(element.get(), token, m_parserContentPolicy);
757    ASSERT(element->isHTMLElement());
758    return element.release();
759}
760
761PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
762{
763    RefPtrWillBeRawPtr<Element> element;
764    // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
765    AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
766    if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
767        element = createHTMLElement(&fakeToken);
768    else
769        element = createElement(&fakeToken, item->namespaceURI());
770    return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
771}
772
773bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
774{
775    if (m_activeFormattingElements.isEmpty())
776        return false;
777    unsigned index = m_activeFormattingElements.size();
778    do {
779        --index;
780        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
781        if (entry.isMarker() || m_openElements.contains(entry.element())) {
782            firstUnopenElementIndex = index + 1;
783            return firstUnopenElementIndex < m_activeFormattingElements.size();
784        }
785    } while (index);
786    firstUnopenElementIndex = index;
787    return true;
788}
789
790void HTMLConstructionSite::reconstructTheActiveFormattingElements()
791{
792    unsigned firstUnopenElementIndex;
793    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
794        return;
795
796    unsigned unopenEntryIndex = firstUnopenElementIndex;
797    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
798    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
799        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
800        RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
801        attachLater(currentNode(), reconstructed->node());
802        m_openElements.push(reconstructed);
803        unopenedEntry.replaceElement(reconstructed.release());
804    }
805}
806
807void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
808{
809    while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
810        m_openElements.pop();
811}
812
813void HTMLConstructionSite::generateImpliedEndTags()
814{
815    while (hasImpliedEndTag(currentStackItem()))
816        m_openElements.pop();
817}
818
819bool HTMLConstructionSite::inQuirksMode()
820{
821    return m_inQuirksMode;
822}
823
824void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
825{
826    // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
827    HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
828    if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
829        task.parent = lastTemplateElement->element();
830        return;
831    }
832
833    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
834    if (lastTableElementRecord) {
835        Element* lastTableElement = lastTableElementRecord->element();
836        ContainerNode* parent;
837        if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
838            parent = lastTableElementRecord->next()->element();
839        else
840            parent = lastTableElement->parentNode();
841
842        // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
843        // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
844        if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
845            task.parent = parent;
846            task.nextChild = lastTableElement;
847            return;
848        }
849        task.parent = lastTableElementRecord->next()->element();
850        return;
851    }
852    // Fragment case
853    task.parent = m_openElements.rootNode(); // DocumentFragment
854}
855
856bool HTMLConstructionSite::shouldFosterParent() const
857{
858    return m_redirectAttachToFosterParent
859        && currentStackItem()->isElementNode()
860        && currentStackItem()->causesFosterParenting();
861}
862
863void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
864{
865    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
866    findFosterSite(task);
867    task.child = node;
868    ASSERT(task.parent);
869    queueTask(task);
870}
871
872void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
873{
874    visitor->trace(parent);
875    visitor->trace(nextChild);
876}
877
878
879}
880