1/* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 * Copyright (C) 2010 Google Inc. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "config.h" 29#include "core/html/parser/HTMLPreloadScanner.h" 30 31#include "core/HTMLNames.h" 32#include "core/InputTypeNames.h" 33#include "core/css/MediaList.h" 34#include "core/css/MediaQueryEvaluator.h" 35#include "core/css/MediaValues.h" 36#include "core/css/parser/SizesAttributeParser.h" 37#include "core/html/LinkRelAttribute.h" 38#include "core/html/parser/HTMLParserIdioms.h" 39#include "core/html/parser/HTMLSrcsetParser.h" 40#include "core/html/parser/HTMLTokenizer.h" 41#include "platform/RuntimeEnabledFeatures.h" 42#include "platform/TraceEvent.h" 43#include "wtf/MainThread.h" 44 45namespace blink { 46 47using namespace HTMLNames; 48 49static bool match(const StringImpl* impl, const QualifiedName& qName) 50{ 51 return impl == qName.localName().impl(); 52} 53 54static bool match(const AtomicString& name, const QualifiedName& qName) 55{ 56 ASSERT(isMainThread()); 57 return qName.localName() == name; 58} 59 60static bool match(const String& name, const QualifiedName& qName) 61{ 62 return threadSafeMatch(name, qName); 63} 64 65static const StringImpl* tagImplFor(const HTMLToken::DataVector& data) 66{ 67 AtomicString tagName(data); 68 const StringImpl* result = tagName.impl(); 69 if (result->isStatic()) 70 return result; 71 return 0; 72} 73 74static const StringImpl* tagImplFor(const String& tagName) 75{ 76 const StringImpl* result = tagName.impl(); 77 if (result->isStatic()) 78 return result; 79 return 0; 80} 81 82static String initiatorFor(const StringImpl* tagImpl) 83{ 84 ASSERT(tagImpl); 85 if (match(tagImpl, imgTag)) 86 return imgTag.localName(); 87 if (match(tagImpl, inputTag)) 88 return inputTag.localName(); 89 if (match(tagImpl, linkTag)) 90 return linkTag.localName(); 91 if (match(tagImpl, scriptTag)) 92 return scriptTag.localName(); 93 ASSERT_NOT_REACHED(); 94 return emptyString(); 95} 96 97static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue) 98{ 99 RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue); 100 MediaQueryEvaluator mediaQueryEvaluator(mediaValues); 101 return mediaQueryEvaluator.eval(mediaQueries.get()); 102} 103 104class TokenPreloadScanner::StartTagScanner { 105public: 106 StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues) 107 : m_tagImpl(tagImpl) 108 , m_linkIsStyleSheet(false) 109 , m_matchedMediaAttribute(true) 110 , m_inputIsImage(false) 111 , m_sourceSize(0) 112 , m_sourceSizeSet(false) 113 , m_isCORSEnabled(false) 114 , m_defer(FetchRequest::NoDefer) 115 , m_allowCredentials(DoNotAllowStoredCredentials) 116 , m_mediaValues(mediaValues) 117 { 118 if (match(m_tagImpl, imgTag) 119 || match(m_tagImpl, sourceTag)) { 120 if (RuntimeEnabledFeatures::pictureSizesEnabled()) 121 m_sourceSize = SizesAttributeParser(m_mediaValues, String()).length(); 122 return; 123 } 124 if ( !match(m_tagImpl, inputTag) 125 && !match(m_tagImpl, linkTag) 126 && !match(m_tagImpl, scriptTag)) 127 m_tagImpl = 0; 128 } 129 130 enum URLReplacement { 131 AllowURLReplacement, 132 DisallowURLReplacement 133 }; 134 135 void processAttributes(const HTMLToken::AttributeList& attributes) 136 { 137 ASSERT(isMainThread()); 138 if (!m_tagImpl) 139 return; 140 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { 141 AtomicString attributeName(iter->name); 142 String attributeValue = StringImpl::create8BitIfPossible(iter->value); 143 processAttribute(attributeName, attributeValue); 144 } 145 } 146 147 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes) 148 { 149 if (!m_tagImpl) 150 return; 151 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) 152 processAttribute(iter->name, iter->value); 153 } 154 155 void handlePictureSourceURL(String& sourceURL) 156 { 157 if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty()) 158 sourceURL = m_srcsetImageCandidate.toString(); 159 else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty()) 160 setUrlToLoad(sourceURL, AllowURLReplacement); 161 } 162 163 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source) 164 { 165 if (!shouldPreload() || !m_matchedMediaAttribute) 166 return nullptr; 167 168 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii()); 169 TextPosition position = TextPosition(source.currentLine(), source.currentColumn()); 170 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType()); 171 if (isCORSEnabled()) 172 request->setCrossOriginEnabled(allowStoredCredentials()); 173 request->setCharset(charset()); 174 request->setDefer(m_defer); 175 return request.release(); 176 } 177 178private: 179 template<typename NameType> 180 void processScriptAttribute(const NameType& attributeName, const String& attributeValue) 181 { 182 // FIXME - Don't set crossorigin multiple times. 183 if (match(attributeName, srcAttr)) 184 setUrlToLoad(attributeValue, DisallowURLReplacement); 185 else if (match(attributeName, crossoriginAttr)) 186 setCrossOriginAllowed(attributeValue); 187 else if (match(attributeName, asyncAttr)) 188 setDefer(FetchRequest::LazyLoad); 189 else if (match(attributeName, deferAttr)) 190 setDefer(FetchRequest::LazyLoad); 191 } 192 193 template<typename NameType> 194 void processImgAttribute(const NameType& attributeName, const String& attributeValue) 195 { 196 if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) { 197 m_imgSrcUrl = attributeValue; 198 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement); 199 } else if (match(attributeName, crossoriginAttr)) { 200 setCrossOriginAllowed(attributeValue); 201 } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { 202 m_srcsetAttributeValue = attributeValue; 203 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); 204 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); 205 } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) { 206 m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length(); 207 m_sourceSizeSet = true; 208 if (!m_srcsetImageCandidate.isEmpty()) { 209 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); 210 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); 211 } 212 } 213 } 214 215 template<typename NameType> 216 void processLinkAttribute(const NameType& attributeName, const String& attributeValue) 217 { 218 // FIXME - Don't set rel/media/crossorigin multiple times. 219 if (match(attributeName, hrefAttr)) 220 setUrlToLoad(attributeValue, DisallowURLReplacement); 221 else if (match(attributeName, relAttr)) 222 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); 223 else if (match(attributeName, mediaAttr)) 224 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); 225 else if (match(attributeName, crossoriginAttr)) 226 setCrossOriginAllowed(attributeValue); 227 } 228 229 template<typename NameType> 230 void processInputAttribute(const NameType& attributeName, const String& attributeValue) 231 { 232 // FIXME - Don't set type multiple times. 233 if (match(attributeName, srcAttr)) 234 setUrlToLoad(attributeValue, DisallowURLReplacement); 235 else if (match(attributeName, typeAttr)) 236 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image); 237 } 238 239 template<typename NameType> 240 void processSourceAttribute(const NameType& attributeName, const String& attributeValue) 241 { 242 if (!RuntimeEnabledFeatures::pictureEnabled()) 243 return; 244 if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { 245 m_srcsetAttributeValue = attributeValue; 246 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); 247 } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) { 248 m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length(); 249 m_sourceSizeSet = true; 250 if (!m_srcsetImageCandidate.isEmpty()) { 251 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); 252 } 253 } else if (match(attributeName, mediaAttr)) { 254 // FIXME - Don't match media multiple times. 255 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); 256 } 257 258 } 259 260 template<typename NameType> 261 void processAttribute(const NameType& attributeName, const String& attributeValue) 262 { 263 if (match(attributeName, charsetAttr)) 264 m_charset = attributeValue; 265 266 if (match(m_tagImpl, scriptTag)) 267 processScriptAttribute(attributeName, attributeValue); 268 else if (match(m_tagImpl, imgTag)) 269 processImgAttribute(attributeName, attributeValue); 270 else if (match(m_tagImpl, linkTag)) 271 processLinkAttribute(attributeName, attributeValue); 272 else if (match(m_tagImpl, inputTag)) 273 processInputAttribute(attributeName, attributeValue); 274 else if (match(m_tagImpl, sourceTag)) 275 processSourceAttribute(attributeName, attributeValue); 276 } 277 278 static bool relAttributeIsStyleSheet(const String& attributeValue) 279 { 280 LinkRelAttribute rel(attributeValue); 281 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch(); 282 } 283 284 void setUrlToLoad(const String& value, URLReplacement replacement) 285 { 286 // We only respect the first src/href, per HTML5: 287 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state 288 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty()) 289 return; 290 String url = stripLeadingAndTrailingHTMLSpaces(value); 291 if (url.isEmpty()) 292 return; 293 m_urlToLoad = url; 294 } 295 296 const String& charset() const 297 { 298 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway. 299 if (match(m_tagImpl, imgTag)) 300 return emptyString(); 301 return m_charset; 302 } 303 304 Resource::Type resourceType() const 305 { 306 if (match(m_tagImpl, scriptTag)) 307 return Resource::Script; 308 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage)) 309 return Resource::Image; 310 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet) 311 return Resource::CSSStyleSheet; 312 ASSERT_NOT_REACHED(); 313 return Resource::Raw; 314 } 315 316 bool shouldPreload() const 317 { 318 if (m_urlToLoad.isEmpty()) 319 return false; 320 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet) 321 return false; 322 if (match(m_tagImpl, inputTag) && !m_inputIsImage) 323 return false; 324 return true; 325 } 326 327 bool isCORSEnabled() const 328 { 329 return m_isCORSEnabled; 330 } 331 332 StoredCredentials allowStoredCredentials() const 333 { 334 return m_allowCredentials; 335 } 336 337 void setCrossOriginAllowed(const String& corsSetting) 338 { 339 m_isCORSEnabled = true; 340 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials")) 341 m_allowCredentials = AllowStoredCredentials; 342 else 343 m_allowCredentials = DoNotAllowStoredCredentials; 344 } 345 346 void setDefer(FetchRequest::DeferOption defer) 347 { 348 m_defer = defer; 349 } 350 351 bool defer() const 352 { 353 return m_defer; 354 } 355 356 const StringImpl* m_tagImpl; 357 String m_urlToLoad; 358 ImageCandidate m_srcsetImageCandidate; 359 String m_charset; 360 bool m_linkIsStyleSheet; 361 bool m_matchedMediaAttribute; 362 bool m_inputIsImage; 363 String m_imgSrcUrl; 364 String m_srcsetAttributeValue; 365 unsigned m_sourceSize; 366 bool m_sourceSizeSet; 367 bool m_isCORSEnabled; 368 FetchRequest::DeferOption m_defer; 369 StoredCredentials m_allowCredentials; 370 RefPtr<MediaValues> m_mediaValues; 371}; 372 373TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) 374 : m_documentURL(documentURL) 375 , m_inStyle(false) 376 , m_inPicture(false) 377 , m_templateCount(0) 378 , m_mediaValues(mediaValues) 379{ 380} 381 382TokenPreloadScanner::~TokenPreloadScanner() 383{ 384} 385 386TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint() 387{ 388 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size(); 389 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount)); 390 return checkpoint; 391} 392 393void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex) 394{ 395 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid. 396 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex]; 397 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL; 398 m_inStyle = checkpoint.inStyle; 399 m_templateCount = checkpoint.templateCount; 400 m_cssScanner.reset(); 401 m_checkpoints.clear(); 402} 403 404void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 405{ 406 scanCommon(token, source, requests); 407} 408 409void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 410{ 411 scanCommon(token, source, requests); 412} 413 414template<typename Token> 415void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests) 416{ 417 switch (token.type()) { 418 case HTMLToken::Character: { 419 if (!m_inStyle) 420 return; 421 m_cssScanner.scan(token.data(), source, requests); 422 return; 423 } 424 case HTMLToken::EndTag: { 425 const StringImpl* tagImpl = tagImplFor(token.data()); 426 if (match(tagImpl, templateTag)) { 427 if (m_templateCount) 428 --m_templateCount; 429 return; 430 } 431 if (match(tagImpl, styleTag)) { 432 if (m_inStyle) 433 m_cssScanner.reset(); 434 m_inStyle = false; 435 return; 436 } 437 if (match(tagImpl, pictureTag)) 438 m_inPicture = false; 439 return; 440 } 441 case HTMLToken::StartTag: { 442 if (m_templateCount) 443 return; 444 const StringImpl* tagImpl = tagImplFor(token.data()); 445 if (match(tagImpl, templateTag)) { 446 ++m_templateCount; 447 return; 448 } 449 if (match(tagImpl, styleTag)) { 450 m_inStyle = true; 451 return; 452 } 453 if (match(tagImpl, baseTag)) { 454 // The first <base> element is the one that wins. 455 if (!m_predictedBaseElementURL.isEmpty()) 456 return; 457 updatePredictedBaseURL(token); 458 return; 459 } 460 if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) { 461 m_inPicture = true; 462 m_pictureSourceURL = String(); 463 return; 464 } 465 466 StartTagScanner scanner(tagImpl, m_mediaValues); 467 scanner.processAttributes(token.attributes()); 468 if (m_inPicture) 469 scanner.handlePictureSourceURL(m_pictureSourceURL); 470 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source); 471 if (request) 472 requests.append(request.release()); 473 return; 474 } 475 default: { 476 return; 477 } 478 } 479} 480 481template<typename Token> 482void TokenPreloadScanner::updatePredictedBaseURL(const Token& token) 483{ 484 ASSERT(m_predictedBaseElementURL.isEmpty()); 485 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr)) 486 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy(); 487} 488 489HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) 490 : m_scanner(documentURL, mediaValues) 491 , m_tokenizer(HTMLTokenizer::create(options)) 492{ 493} 494 495HTMLPreloadScanner::~HTMLPreloadScanner() 496{ 497} 498 499void HTMLPreloadScanner::appendToEnd(const SegmentedString& source) 500{ 501 m_source.append(source); 502} 503 504void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL) 505{ 506 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. 507 508 TRACE_EVENT1("blink", "HTMLPreloadScanner::scan", "source_length", m_source.length()); 509 510 // When we start scanning, our best prediction of the baseElementURL is the real one! 511 if (!startingBaseElementURL.isEmpty()) 512 m_scanner.setPredictedBaseElementURL(startingBaseElementURL); 513 514 PreloadRequestStream requests; 515 516 while (m_tokenizer->nextToken(m_source, m_token)) { 517 if (m_token.type() == HTMLToken::StartTag) 518 m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit)); 519 m_scanner.scan(m_token, m_source, requests); 520 m_token.clear(); 521 } 522 523 preloader->takeAndPreload(requests); 524} 525 526} 527