1/* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "core/editing/MarkupAccumulator.h" 29 30#include "core/HTMLNames.h" 31#include "core/XLinkNames.h" 32#include "core/XMLNSNames.h" 33#include "core/XMLNames.h" 34#include "core/dom/CDATASection.h" 35#include "core/dom/Comment.h" 36#include "core/dom/Document.h" 37#include "core/dom/DocumentFragment.h" 38#include "core/dom/DocumentType.h" 39#include "core/dom/ProcessingInstruction.h" 40#include "core/editing/Editor.h" 41#include "core/html/HTMLElement.h" 42#include "core/html/HTMLTemplateElement.h" 43#include "platform/weborigin/KURL.h" 44#include "wtf/unicode/CharacterNames.h" 45 46namespace blink { 47 48using namespace HTMLNames; 49 50struct EntityDescription { 51 UChar entity; 52 const CString& reference; 53 EntityMask mask; 54}; 55 56template <typename CharType> 57static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& result, CharType* text, unsigned length, const EntityDescription entityMaps[], unsigned entityMapsCount, EntityMask entityMask) 58{ 59 unsigned positionAfterLastEntity = 0; 60 for (unsigned i = 0; i < length; ++i) { 61 for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIndex) { 62 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 63 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 64 const CString& replacement = entityMaps[entityIndex].reference; 65 result.append(replacement.data(), replacement.length()); 66 positionAfterLastEntity = i + 1; 67 break; 68 } 69 } 70 } 71 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 72} 73 74void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) 75{ 76 DEFINE_STATIC_LOCAL(const CString, ampReference, ("&")); 77 DEFINE_STATIC_LOCAL(const CString, ltReference, ("<")); 78 DEFINE_STATIC_LOCAL(const CString, gtReference, (">")); 79 DEFINE_STATIC_LOCAL(const CString, quotReference, (""")); 80 DEFINE_STATIC_LOCAL(const CString, nbspReference, (" ")); 81 82 static const EntityDescription entityMaps[] = { 83 { '&', ampReference, EntityAmp }, 84 { '<', ltReference, EntityLt }, 85 { '>', gtReference, EntityGt }, 86 { '"', quotReference, EntityQuot }, 87 { noBreakSpace, nbspReference, EntityNbsp }, 88 }; 89 90 if (!(offset + length)) 91 return; 92 93 ASSERT(offset + length <= source.length()); 94 if (source.is8Bit()) 95 appendCharactersReplacingEntitiesInternal(result, source.characters8() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); 96 else 97 appendCharactersReplacingEntitiesInternal(result, source.characters16() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); 98} 99 100MarkupAccumulator::MarkupAccumulator(WillBeHeapVector<RawPtrWillBeMember<Node> >* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range, SerializationType serializationType) 101 : m_nodes(nodes) 102 , m_range(range) 103 , m_resolveURLsMethod(resolveUrlsMethod) 104 , m_serializationType(serializationType) 105{ 106} 107 108MarkupAccumulator::~MarkupAccumulator() 109{ 110} 111 112String MarkupAccumulator::serializeNodes(Node& targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip) 113{ 114 Namespaces* namespaces = 0; 115 Namespaces namespaceHash; 116 if (!serializeAsHTMLDocument(targetNode)) { 117 // Add pre-bound namespaces for XML fragments. 118 namespaceHash.set(xmlAtom, XMLNames::xmlNamespaceURI); 119 namespaces = &namespaceHash; 120 } 121 122 serializeNodesWithNamespaces(targetNode, childrenOnly, namespaces, tagNamesToSkip); 123 return m_markup.toString(); 124} 125 126void MarkupAccumulator::serializeNodesWithNamespaces(Node& targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) 127{ 128 if (tagNamesToSkip && targetNode.isElementNode()) { 129 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) { 130 if (toElement(targetNode).hasTagName(tagNamesToSkip->at(i))) 131 return; 132 } 133 } 134 135 Namespaces namespaceHash; 136 if (namespaces) 137 namespaceHash = *namespaces; 138 139 if (!childrenOnly) 140 appendStartTag(targetNode, &namespaceHash); 141 142 if (!(serializeAsHTMLDocument(targetNode) && elementCannotHaveEndTag(targetNode))) { 143 Node* current = isHTMLTemplateElement(targetNode) ? toHTMLTemplateElement(targetNode).content()->firstChild() : targetNode.firstChild(); 144 for ( ; current; current = current->nextSibling()) 145 serializeNodesWithNamespaces(*current, IncludeNode, &namespaceHash, tagNamesToSkip); 146 } 147 148 if (!childrenOnly && targetNode.isElementNode()) 149 appendEndTag(toElement(targetNode)); 150} 151 152String MarkupAccumulator::resolveURLIfNeeded(const Element& element, const String& urlString) const 153{ 154 switch (m_resolveURLsMethod) { 155 case ResolveAllURLs: 156 return element.document().completeURL(urlString).string(); 157 158 case ResolveNonLocalURLs: 159 if (!element.document().url().isLocalFile()) 160 return element.document().completeURL(urlString).string(); 161 break; 162 163 case DoNotResolveURLs: 164 break; 165 } 166 return urlString; 167} 168 169void MarkupAccumulator::appendString(const String& string) 170{ 171 m_markup.append(string); 172} 173 174void MarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces) 175{ 176 appendStartMarkup(m_markup, node, namespaces); 177 if (m_nodes) 178 m_nodes->append(&node); 179} 180 181void MarkupAccumulator::appendEndTag(const Element& element) 182{ 183 appendEndMarkup(m_markup, element); 184} 185 186size_t MarkupAccumulator::totalLength(const Vector<String>& strings) 187{ 188 size_t length = 0; 189 for (size_t i = 0; i < strings.size(); ++i) 190 length += strings[i].length(); 191 return length; 192} 193 194void MarkupAccumulator::concatenateMarkup(StringBuilder& result) 195{ 196 result.append(m_markup); 197} 198 199void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML) 200{ 201 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), 202 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); 203} 204 205void MarkupAccumulator::appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) 206{ 207} 208 209void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element& element, const Attribute& attribute) 210{ 211 ASSERT(element.isURLAttribute(attribute)); 212 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); 213 UChar quoteChar = '"'; 214 String strippedURLString = resolvedURLString.stripWhiteSpace(); 215 if (protocolIsJavaScript(strippedURLString)) { 216 // minimal escaping for javascript urls 217 if (strippedURLString.contains('"')) { 218 if (strippedURLString.contains('\'')) 219 strippedURLString.replaceWithLiteral('"', """); 220 else 221 quoteChar = '\''; 222 } 223 result.append(quoteChar); 224 result.append(strippedURLString); 225 result.append(quoteChar); 226 return; 227 } 228 229 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. 230 result.append(quoteChar); 231 appendAttributeValue(result, resolvedURLString, false); 232 result.append(quoteChar); 233} 234 235bool MarkupAccumulator::shouldAddNamespaceElement(const Element& element, Namespaces& namespaces) 236{ 237 // Don't add namespace attribute if it is already defined for this elem. 238 const AtomicString& prefix = element.prefix(); 239 if (prefix.isEmpty()) { 240 if (element.hasAttribute(xmlnsAtom)) { 241 namespaces.set(emptyAtom, element.namespaceURI()); 242 return false; 243 } 244 return true; 245 } 246 247 return !element.hasAttribute(WTF::xmlnsWithColon + prefix); 248} 249 250bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, const Element& element) 251{ 252 // xmlns and xmlns:prefix attributes should be handled by another branch in appendAttribute. 253 ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI); 254 255 // Attributes are in the null namespace by default. 256 if (!attribute.namespaceURI()) 257 return false; 258 259 // Attributes without a prefix will need one generated for them, and an xmlns attribute for that prefix. 260 if (!attribute.prefix()) 261 return true; 262 263 return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix()); 264} 265 266void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) 267{ 268 if (namespaceURI.isEmpty()) 269 return; 270 271 const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix; 272 AtomicString foundURI = namespaces.get(lookupKey); 273 if (foundURI != namespaceURI) { 274 namespaces.set(lookupKey, namespaceURI); 275 result.append(' '); 276 result.append(xmlnsAtom.string()); 277 if (!prefix.isEmpty()) { 278 result.append(':'); 279 result.append(prefix); 280 } 281 282 result.appendLiteral("=\""); 283 appendAttributeValue(result, namespaceURI, false); 284 result.append('"'); 285 } 286} 287 288EntityMask MarkupAccumulator::entityMaskForText(const Text& text) const 289{ 290 if (!serializeAsHTMLDocument(text)) 291 return EntityMaskInPCDATA; 292 293 const QualifiedName* parentName = 0; 294 if (text.parentElement()) 295 parentName = &(text.parentElement())->tagQName(); 296 297 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) 298 return EntityMaskInCDATA; 299 return EntityMaskInHTMLPCDATA; 300} 301 302void MarkupAccumulator::appendText(StringBuilder& result, Text& text) 303{ 304 const String& str = text.data(); 305 unsigned length = str.length(); 306 unsigned start = 0; 307 308 if (m_range) { 309 if (text == m_range->endContainer()) 310 length = m_range->endOffset(); 311 if (text == m_range->startContainer()) { 312 start = m_range->startOffset(); 313 length -= start; 314 } 315 } 316 appendCharactersReplacingEntities(result, str, start, length, entityMaskForText(text)); 317} 318 319void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment) 320{ 321 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". 322 result.appendLiteral("<!--"); 323 result.append(comment); 324 result.appendLiteral("-->"); 325} 326 327void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document& document) 328{ 329 if (!document.hasXMLDeclaration()) 330 return; 331 332 result.appendLiteral("<?xml version=\""); 333 result.append(document.xmlVersion()); 334 const String& encoding = document.xmlEncoding(); 335 if (!encoding.isEmpty()) { 336 result.appendLiteral("\" encoding=\""); 337 result.append(encoding); 338 } 339 if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) { 340 result.appendLiteral("\" standalone=\""); 341 if (document.xmlStandalone()) 342 result.appendLiteral("yes"); 343 else 344 result.appendLiteral("no"); 345 } 346 347 result.appendLiteral("\"?>"); 348} 349 350void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType& n) 351{ 352 if (n.name().isEmpty()) 353 return; 354 355 result.appendLiteral("<!DOCTYPE "); 356 result.append(n.name()); 357 if (!n.publicId().isEmpty()) { 358 result.appendLiteral(" PUBLIC \""); 359 result.append(n.publicId()); 360 result.append('"'); 361 if (!n.systemId().isEmpty()) { 362 result.appendLiteral(" \""); 363 result.append(n.systemId()); 364 result.append('"'); 365 } 366 } else if (!n.systemId().isEmpty()) { 367 result.appendLiteral(" SYSTEM \""); 368 result.append(n.systemId()); 369 result.append('"'); 370 } 371 result.append('>'); 372} 373 374void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) 375{ 376 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". 377 result.appendLiteral("<?"); 378 result.append(target); 379 result.append(' '); 380 result.append(data); 381 result.appendLiteral("?>"); 382} 383 384void MarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces) 385{ 386 appendOpenTag(result, element, namespaces); 387 388 AttributeCollection attributes = element.attributes(); 389 AttributeCollection::iterator end = attributes.end(); 390 for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) 391 appendAttribute(result, element, *it, namespaces); 392 393 // Give an opportunity to subclasses to add their own attributes. 394 appendCustomAttributes(result, element, namespaces); 395 396 appendCloseTag(result, element); 397} 398 399void MarkupAccumulator::appendOpenTag(StringBuilder& result, const Element& element, Namespaces* namespaces) 400{ 401 result.append('<'); 402 result.append(element.tagQName().toString()); 403 if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceElement(element, *namespaces)) 404 appendNamespace(result, element.prefix(), element.namespaceURI(), *namespaces); 405} 406 407void MarkupAccumulator::appendCloseTag(StringBuilder& result, const Element& element) 408{ 409 if (shouldSelfClose(element)) { 410 if (element.isHTMLElement()) 411 result.append(' '); // XHTML 1.0 <-> HTML compatibility. 412 result.append('/'); 413 } 414 result.append('>'); 415} 416 417static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) 418{ 419 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI 420 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI 421 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; 422} 423 424void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces) 425{ 426 bool documentIsHTML = serializeAsHTMLDocument(element); 427 428 QualifiedName prefixedName = attribute.name(); 429 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) { 430 result.append(' '); 431 result.append(attribute.name().localName()); 432 } else { 433 if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { 434 if (!attribute.prefix() && attribute.localName() != xmlnsAtom) 435 prefixedName.setPrefix(xmlnsAtom); 436 if (namespaces) { // Account for the namespace attribute we're about to append. 437 const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAtom : attribute.localName(); 438 namespaces->set(lookupKey, attribute.value()); 439 } 440 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { 441 if (!attribute.prefix()) 442 prefixedName.setPrefix(xmlAtom); 443 } else { 444 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { 445 if (!attribute.prefix()) 446 prefixedName.setPrefix(xlinkAtom); 447 } 448 449 if (namespaces && shouldAddNamespaceAttribute(attribute, element)) { 450 if (!prefixedName.prefix()) { 451 // This behavior is in process of being standardized. See crbug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208 452 String prefixPrefix("ns", 2); 453 for (unsigned i = attribute.namespaceURI().impl()->existingHash(); ; ++i) { 454 AtomicString newPrefix(String(prefixPrefix + String::number(i))); 455 AtomicString foundURI = namespaces->get(newPrefix); 456 if (foundURI == attribute.namespaceURI() || foundURI == nullAtom) { 457 // We already generated a prefix for this namespace. 458 prefixedName.setPrefix(newPrefix); 459 break; 460 } 461 } 462 } 463 ASSERT(prefixedName.prefix()); 464 appendNamespace(result, prefixedName.prefix(), attribute.namespaceURI(), *namespaces); 465 } 466 } 467 result.append(' '); 468 result.append(prefixedName.toString()); 469 } 470 471 result.append('='); 472 473 if (element.isURLAttribute(attribute)) { 474 appendQuotedURLAttributeValue(result, element, attribute); 475 } else { 476 result.append('"'); 477 appendAttributeValue(result, attribute.value(), documentIsHTML); 478 result.append('"'); 479 } 480} 481 482void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) 483{ 484 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". 485 result.appendLiteral("<![CDATA["); 486 result.append(section); 487 result.appendLiteral("]]>"); 488} 489 490void MarkupAccumulator::appendStartMarkup(StringBuilder& result, Node& node, Namespaces* namespaces) 491{ 492 switch (node.nodeType()) { 493 case Node::TEXT_NODE: 494 appendText(result, toText(node)); 495 break; 496 case Node::COMMENT_NODE: 497 appendComment(result, toComment(node).data()); 498 break; 499 case Node::DOCUMENT_NODE: 500 appendXMLDeclaration(result, toDocument(node)); 501 break; 502 case Node::DOCUMENT_FRAGMENT_NODE: 503 break; 504 case Node::DOCUMENT_TYPE_NODE: 505 appendDocumentType(result, toDocumentType(node)); 506 break; 507 case Node::PROCESSING_INSTRUCTION_NODE: 508 appendProcessingInstruction(result, toProcessingInstruction(node).target(), toProcessingInstruction(node).data()); 509 break; 510 case Node::ELEMENT_NODE: 511 appendElement(result, toElement(node), namespaces); 512 break; 513 case Node::CDATA_SECTION_NODE: 514 appendCDATASection(result, toCDATASection(node).data()); 515 break; 516 case Node::ATTRIBUTE_NODE: 517 ASSERT_NOT_REACHED(); 518 break; 519 } 520} 521 522// Rules of self-closure 523// 1. No elements in HTML documents use the self-closing syntax. 524// 2. Elements w/ children never self-close because they use a separate end tag. 525// 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. 526// 4. Other elements self-close. 527bool MarkupAccumulator::shouldSelfClose(const Element& element) 528{ 529 if (serializeAsHTMLDocument(element)) 530 return false; 531 if (element.hasChildren()) 532 return false; 533 if (element.isHTMLElement() && !elementCannotHaveEndTag(element)) 534 return false; 535 return true; 536} 537 538bool MarkupAccumulator::elementCannotHaveEndTag(const Node& node) 539{ 540 if (!node.isHTMLElement()) 541 return false; 542 543 // FIXME: ieForbidsInsertHTML may not be the right function to call here 544 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML 545 // or createContextualFragment. It does not necessarily align with 546 // which elements should be serialized w/o end tags. 547 return toHTMLElement(node).ieForbidsInsertHTML(); 548} 549 550void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Element& element) 551{ 552 if (shouldSelfClose(element) || (!element.hasChildren() && elementCannotHaveEndTag(element))) 553 return; 554 555 result.appendLiteral("</"); 556 result.append(element.tagQName().toString()); 557 result.append('>'); 558} 559 560bool MarkupAccumulator::serializeAsHTMLDocument(const Node& node) const 561{ 562 if (m_serializationType == ForcedXML) 563 return false; 564 return node.document().isHTMLDocument(); 565} 566 567} 568