1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "core/editing/MarkupAccumulator.h"
29
30#include "core/HTMLNames.h"
31#include "core/XLinkNames.h"
32#include "core/XMLNSNames.h"
33#include "core/XMLNames.h"
34#include "core/dom/CDATASection.h"
35#include "core/dom/Comment.h"
36#include "core/dom/Document.h"
37#include "core/dom/DocumentFragment.h"
38#include "core/dom/DocumentType.h"
39#include "core/dom/ProcessingInstruction.h"
40#include "core/editing/Editor.h"
41#include "core/html/HTMLElement.h"
42#include "core/html/HTMLTemplateElement.h"
43#include "platform/weborigin/KURL.h"
44#include "wtf/unicode/CharacterNames.h"
45
46namespace blink {
47
48using namespace HTMLNames;
49
50struct EntityDescription {
51    UChar entity;
52    const CString& reference;
53    EntityMask mask;
54};
55
56template <typename CharType>
57static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& result, CharType* text, unsigned length, const EntityDescription entityMaps[], unsigned entityMapsCount, EntityMask entityMask)
58{
59    unsigned positionAfterLastEntity = 0;
60    for (unsigned i = 0; i < length; ++i) {
61        for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIndex) {
62            if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
63                result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
64                const CString& replacement = entityMaps[entityIndex].reference;
65                result.append(replacement.data(), replacement.length());
66                positionAfterLastEntity = i + 1;
67                break;
68            }
69        }
70    }
71    result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
72}
73
74void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask)
75{
76    DEFINE_STATIC_LOCAL(const CString, ampReference, ("&amp;"));
77    DEFINE_STATIC_LOCAL(const CString, ltReference, ("&lt;"));
78    DEFINE_STATIC_LOCAL(const CString, gtReference, ("&gt;"));
79    DEFINE_STATIC_LOCAL(const CString, quotReference, ("&quot;"));
80    DEFINE_STATIC_LOCAL(const CString, nbspReference, ("&nbsp;"));
81
82    static const EntityDescription entityMaps[] = {
83        { '&', ampReference, EntityAmp },
84        { '<', ltReference, EntityLt },
85        { '>', gtReference, EntityGt },
86        { '"', quotReference, EntityQuot },
87        { noBreakSpace, nbspReference, EntityNbsp },
88    };
89
90    if (!(offset + length))
91        return;
92
93    ASSERT(offset + length <= source.length());
94    if (source.is8Bit())
95        appendCharactersReplacingEntitiesInternal(result, source.characters8() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask);
96    else
97        appendCharactersReplacingEntitiesInternal(result, source.characters16() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask);
98}
99
100MarkupAccumulator::MarkupAccumulator(WillBeHeapVector<RawPtrWillBeMember<Node> >* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range, SerializationType serializationType)
101    : m_nodes(nodes)
102    , m_range(range)
103    , m_resolveURLsMethod(resolveUrlsMethod)
104    , m_serializationType(serializationType)
105{
106}
107
108MarkupAccumulator::~MarkupAccumulator()
109{
110}
111
112String MarkupAccumulator::serializeNodes(Node& targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
113{
114    Namespaces* namespaces = 0;
115    Namespaces namespaceHash;
116    if (!serializeAsHTMLDocument(targetNode)) {
117        // Add pre-bound namespaces for XML fragments.
118        namespaceHash.set(xmlAtom, XMLNames::xmlNamespaceURI);
119        namespaces = &namespaceHash;
120    }
121
122    serializeNodesWithNamespaces(targetNode, childrenOnly, namespaces, tagNamesToSkip);
123    return m_markup.toString();
124}
125
126void MarkupAccumulator::serializeNodesWithNamespaces(Node& targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
127{
128    if (tagNamesToSkip && targetNode.isElementNode()) {
129        for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
130            if (toElement(targetNode).hasTagName(tagNamesToSkip->at(i)))
131                return;
132        }
133    }
134
135    Namespaces namespaceHash;
136    if (namespaces)
137        namespaceHash = *namespaces;
138
139    if (!childrenOnly)
140        appendStartTag(targetNode, &namespaceHash);
141
142    if (!(serializeAsHTMLDocument(targetNode) && elementCannotHaveEndTag(targetNode))) {
143        Node* current = isHTMLTemplateElement(targetNode) ? toHTMLTemplateElement(targetNode).content()->firstChild() : targetNode.firstChild();
144        for ( ; current; current = current->nextSibling())
145            serializeNodesWithNamespaces(*current, IncludeNode, &namespaceHash, tagNamesToSkip);
146    }
147
148    if (!childrenOnly && targetNode.isElementNode())
149        appendEndTag(toElement(targetNode));
150}
151
152String MarkupAccumulator::resolveURLIfNeeded(const Element& element, const String& urlString) const
153{
154    switch (m_resolveURLsMethod) {
155    case ResolveAllURLs:
156        return element.document().completeURL(urlString).string();
157
158    case ResolveNonLocalURLs:
159        if (!element.document().url().isLocalFile())
160            return element.document().completeURL(urlString).string();
161        break;
162
163    case DoNotResolveURLs:
164        break;
165    }
166    return urlString;
167}
168
169void MarkupAccumulator::appendString(const String& string)
170{
171    m_markup.append(string);
172}
173
174void MarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces)
175{
176    appendStartMarkup(m_markup, node, namespaces);
177    if (m_nodes)
178        m_nodes->append(&node);
179}
180
181void MarkupAccumulator::appendEndTag(const Element& element)
182{
183    appendEndMarkup(m_markup, element);
184}
185
186size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
187{
188    size_t length = 0;
189    for (size_t i = 0; i < strings.size(); ++i)
190        length += strings[i].length();
191    return length;
192}
193
194void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
195{
196    result.append(m_markup);
197}
198
199void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
200{
201    appendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
202        documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
203}
204
205void MarkupAccumulator::appendCustomAttributes(StringBuilder&, const Element&, Namespaces*)
206{
207}
208
209void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element& element, const Attribute& attribute)
210{
211    ASSERT(element.isURLAttribute(attribute));
212    const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
213    UChar quoteChar = '"';
214    String strippedURLString = resolvedURLString.stripWhiteSpace();
215    if (protocolIsJavaScript(strippedURLString)) {
216        // minimal escaping for javascript urls
217        if (strippedURLString.contains('"')) {
218            if (strippedURLString.contains('\''))
219                strippedURLString.replaceWithLiteral('"', "&quot;");
220            else
221                quoteChar = '\'';
222        }
223        result.append(quoteChar);
224        result.append(strippedURLString);
225        result.append(quoteChar);
226        return;
227    }
228
229    // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
230    result.append(quoteChar);
231    appendAttributeValue(result, resolvedURLString, false);
232    result.append(quoteChar);
233}
234
235bool MarkupAccumulator::shouldAddNamespaceElement(const Element& element, Namespaces& namespaces)
236{
237    // Don't add namespace attribute if it is already defined for this elem.
238    const AtomicString& prefix = element.prefix();
239    if (prefix.isEmpty()) {
240        if (element.hasAttribute(xmlnsAtom)) {
241            namespaces.set(emptyAtom, element.namespaceURI());
242            return false;
243        }
244        return true;
245    }
246
247    return !element.hasAttribute(WTF::xmlnsWithColon + prefix);
248}
249
250bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, const Element& element)
251{
252    // xmlns and xmlns:prefix attributes should be handled by another branch in appendAttribute.
253    ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI);
254
255    // Attributes are in the null namespace by default.
256    if (!attribute.namespaceURI())
257        return false;
258
259    // Attributes without a prefix will need one generated for them, and an xmlns attribute for that prefix.
260    if (!attribute.prefix())
261        return true;
262
263    return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix());
264}
265
266void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
267{
268    if (namespaceURI.isEmpty())
269        return;
270
271    const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix;
272    AtomicString foundURI = namespaces.get(lookupKey);
273    if (foundURI != namespaceURI) {
274        namespaces.set(lookupKey, namespaceURI);
275        result.append(' ');
276        result.append(xmlnsAtom.string());
277        if (!prefix.isEmpty()) {
278            result.append(':');
279            result.append(prefix);
280        }
281
282        result.appendLiteral("=\"");
283        appendAttributeValue(result, namespaceURI, false);
284        result.append('"');
285    }
286}
287
288EntityMask MarkupAccumulator::entityMaskForText(const Text& text) const
289{
290    if (!serializeAsHTMLDocument(text))
291        return EntityMaskInPCDATA;
292
293    const QualifiedName* parentName = 0;
294    if (text.parentElement())
295        parentName = &(text.parentElement())->tagQName();
296
297    if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
298        return EntityMaskInCDATA;
299    return EntityMaskInHTMLPCDATA;
300}
301
302void MarkupAccumulator::appendText(StringBuilder& result, Text& text)
303{
304    const String& str = text.data();
305    unsigned length = str.length();
306    unsigned start = 0;
307
308    if (m_range) {
309        if (text == m_range->endContainer())
310            length = m_range->endOffset();
311        if (text == m_range->startContainer()) {
312            start = m_range->startOffset();
313            length -= start;
314        }
315    }
316    appendCharactersReplacingEntities(result, str, start, length, entityMaskForText(text));
317}
318
319void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
320{
321    // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
322    result.appendLiteral("<!--");
323    result.append(comment);
324    result.appendLiteral("-->");
325}
326
327void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document& document)
328{
329    if (!document.hasXMLDeclaration())
330        return;
331
332    result.appendLiteral("<?xml version=\"");
333    result.append(document.xmlVersion());
334    const String& encoding = document.xmlEncoding();
335    if (!encoding.isEmpty()) {
336        result.appendLiteral("\" encoding=\"");
337        result.append(encoding);
338    }
339    if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) {
340        result.appendLiteral("\" standalone=\"");
341        if (document.xmlStandalone())
342            result.appendLiteral("yes");
343        else
344            result.appendLiteral("no");
345    }
346
347    result.appendLiteral("\"?>");
348}
349
350void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType& n)
351{
352    if (n.name().isEmpty())
353        return;
354
355    result.appendLiteral("<!DOCTYPE ");
356    result.append(n.name());
357    if (!n.publicId().isEmpty()) {
358        result.appendLiteral(" PUBLIC \"");
359        result.append(n.publicId());
360        result.append('"');
361        if (!n.systemId().isEmpty()) {
362            result.appendLiteral(" \"");
363            result.append(n.systemId());
364            result.append('"');
365        }
366    } else if (!n.systemId().isEmpty()) {
367        result.appendLiteral(" SYSTEM \"");
368        result.append(n.systemId());
369        result.append('"');
370    }
371    result.append('>');
372}
373
374void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
375{
376    // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
377    result.appendLiteral("<?");
378    result.append(target);
379    result.append(' ');
380    result.append(data);
381    result.appendLiteral("?>");
382}
383
384void MarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
385{
386    appendOpenTag(result, element, namespaces);
387
388    AttributeCollection attributes = element.attributes();
389    AttributeCollection::iterator end = attributes.end();
390    for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it)
391        appendAttribute(result, element, *it, namespaces);
392
393    // Give an opportunity to subclasses to add their own attributes.
394    appendCustomAttributes(result, element, namespaces);
395
396    appendCloseTag(result, element);
397}
398
399void MarkupAccumulator::appendOpenTag(StringBuilder& result, const Element& element, Namespaces* namespaces)
400{
401    result.append('<');
402    result.append(element.tagQName().toString());
403    if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceElement(element, *namespaces))
404        appendNamespace(result, element.prefix(), element.namespaceURI(), *namespaces);
405}
406
407void MarkupAccumulator::appendCloseTag(StringBuilder& result, const Element& element)
408{
409    if (shouldSelfClose(element)) {
410        if (element.isHTMLElement())
411            result.append(' '); // XHTML 1.0 <-> HTML compatibility.
412        result.append('/');
413    }
414    result.append('>');
415}
416
417static inline bool attributeIsInSerializedNamespace(const Attribute& attribute)
418{
419    return attribute.namespaceURI() == XMLNames::xmlNamespaceURI
420        || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI
421        || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI;
422}
423
424void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces)
425{
426    bool documentIsHTML = serializeAsHTMLDocument(element);
427
428    QualifiedName prefixedName = attribute.name();
429    if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) {
430        result.append(' ');
431        result.append(attribute.name().localName());
432    } else {
433        if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) {
434            if (!attribute.prefix() && attribute.localName() != xmlnsAtom)
435                prefixedName.setPrefix(xmlnsAtom);
436            if (namespaces) { // Account for the namespace attribute we're about to append.
437                const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAtom : attribute.localName();
438                namespaces->set(lookupKey, attribute.value());
439            }
440        } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) {
441            if (!attribute.prefix())
442                prefixedName.setPrefix(xmlAtom);
443        } else {
444            if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) {
445                if (!attribute.prefix())
446                    prefixedName.setPrefix(xlinkAtom);
447            }
448
449            if (namespaces && shouldAddNamespaceAttribute(attribute, element)) {
450                if (!prefixedName.prefix()) {
451                    // This behavior is in process of being standardized. See crbug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208
452                    String prefixPrefix("ns", 2);
453                    for (unsigned i = attribute.namespaceURI().impl()->existingHash(); ; ++i) {
454                        AtomicString newPrefix(String(prefixPrefix + String::number(i)));
455                        AtomicString foundURI = namespaces->get(newPrefix);
456                        if (foundURI == attribute.namespaceURI() || foundURI == nullAtom) {
457                            // We already generated a prefix for this namespace.
458                            prefixedName.setPrefix(newPrefix);
459                            break;
460                        }
461                    }
462                }
463                ASSERT(prefixedName.prefix());
464                appendNamespace(result, prefixedName.prefix(), attribute.namespaceURI(), *namespaces);
465            }
466        }
467        result.append(' ');
468        result.append(prefixedName.toString());
469    }
470
471    result.append('=');
472
473    if (element.isURLAttribute(attribute)) {
474        appendQuotedURLAttributeValue(result, element, attribute);
475    } else {
476        result.append('"');
477        appendAttributeValue(result, attribute.value(), documentIsHTML);
478        result.append('"');
479    }
480}
481
482void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
483{
484    // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
485    result.appendLiteral("<![CDATA[");
486    result.append(section);
487    result.appendLiteral("]]>");
488}
489
490void MarkupAccumulator::appendStartMarkup(StringBuilder& result, Node& node, Namespaces* namespaces)
491{
492    switch (node.nodeType()) {
493    case Node::TEXT_NODE:
494        appendText(result, toText(node));
495        break;
496    case Node::COMMENT_NODE:
497        appendComment(result, toComment(node).data());
498        break;
499    case Node::DOCUMENT_NODE:
500        appendXMLDeclaration(result, toDocument(node));
501        break;
502    case Node::DOCUMENT_FRAGMENT_NODE:
503        break;
504    case Node::DOCUMENT_TYPE_NODE:
505        appendDocumentType(result, toDocumentType(node));
506        break;
507    case Node::PROCESSING_INSTRUCTION_NODE:
508        appendProcessingInstruction(result, toProcessingInstruction(node).target(), toProcessingInstruction(node).data());
509        break;
510    case Node::ELEMENT_NODE:
511        appendElement(result, toElement(node), namespaces);
512        break;
513    case Node::CDATA_SECTION_NODE:
514        appendCDATASection(result, toCDATASection(node).data());
515        break;
516    case Node::ATTRIBUTE_NODE:
517        ASSERT_NOT_REACHED();
518        break;
519    }
520}
521
522// Rules of self-closure
523// 1. No elements in HTML documents use the self-closing syntax.
524// 2. Elements w/ children never self-close because they use a separate end tag.
525// 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
526// 4. Other elements self-close.
527bool MarkupAccumulator::shouldSelfClose(const Element& element)
528{
529    if (serializeAsHTMLDocument(element))
530        return false;
531    if (element.hasChildren())
532        return false;
533    if (element.isHTMLElement() && !elementCannotHaveEndTag(element))
534        return false;
535    return true;
536}
537
538bool MarkupAccumulator::elementCannotHaveEndTag(const Node& node)
539{
540    if (!node.isHTMLElement())
541        return false;
542
543    // FIXME: ieForbidsInsertHTML may not be the right function to call here
544    // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
545    // or createContextualFragment.  It does not necessarily align with
546    // which elements should be serialized w/o end tags.
547    return toHTMLElement(node).ieForbidsInsertHTML();
548}
549
550void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Element& element)
551{
552    if (shouldSelfClose(element) || (!element.hasChildren() && elementCannotHaveEndTag(element)))
553        return;
554
555    result.appendLiteral("</");
556    result.append(element.tagQName().toString());
557    result.append('>');
558}
559
560bool MarkupAccumulator::serializeAsHTMLDocument(const Node& node) const
561{
562    if (m_serializationType == ForcedXML)
563        return false;
564    return node.document().isHTMLDocument();
565}
566
567}
568