1/* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 * Copyright (C) 2010 Google Inc. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "config.h" 29#include "core/html/parser/HTMLPreloadScanner.h" 30 31#include "HTMLNames.h" 32#include "InputTypeNames.h" 33#include "RuntimeEnabledFeatures.h" 34#include "core/html/LinkRelAttribute.h" 35#include "core/html/parser/HTMLParserIdioms.h" 36#include "core/html/parser/HTMLSrcsetParser.h" 37#include "core/html/parser/HTMLTokenizer.h" 38#include "platform/TraceEvent.h" 39#include "wtf/MainThread.h" 40 41namespace WebCore { 42 43using namespace HTMLNames; 44 45static bool match(const StringImpl* impl, const QualifiedName& qName) 46{ 47 return impl == qName.localName().impl(); 48} 49 50static bool match(const AtomicString& name, const QualifiedName& qName) 51{ 52 ASSERT(isMainThread()); 53 return qName.localName() == name; 54} 55 56static bool match(const String& name, const QualifiedName& qName) 57{ 58 return threadSafeMatch(name, qName); 59} 60 61static const StringImpl* tagImplFor(const HTMLToken::DataVector& data) 62{ 63 AtomicString tagName(data); 64 const StringImpl* result = tagName.impl(); 65 if (result->isStatic()) 66 return result; 67 return 0; 68} 69 70static const StringImpl* tagImplFor(const String& tagName) 71{ 72 const StringImpl* result = tagName.impl(); 73 if (result->isStatic()) 74 return result; 75 return 0; 76} 77 78static String initiatorFor(const StringImpl* tagImpl) 79{ 80 ASSERT(tagImpl); 81 if (match(tagImpl, imgTag)) 82 return imgTag.localName(); 83 if (match(tagImpl, inputTag)) 84 return inputTag.localName(); 85 if (match(tagImpl, linkTag)) 86 return linkTag.localName(); 87 if (match(tagImpl, scriptTag)) 88 return scriptTag.localName(); 89 ASSERT_NOT_REACHED(); 90 return emptyString(); 91} 92 93class TokenPreloadScanner::StartTagScanner { 94public: 95 StartTagScanner(const StringImpl* tagImpl, float deviceScaleFactor) 96 : m_tagImpl(tagImpl) 97 , m_linkIsStyleSheet(false) 98 , m_inputIsImage(false) 99 , m_deviceScaleFactor(deviceScaleFactor) 100 , m_encounteredImgSrc(false) 101 , m_isCORSEnabled(false) 102 , m_allowCredentials(DoNotAllowStoredCredentials) 103 { 104 if (!match(m_tagImpl, imgTag) 105 && !match(m_tagImpl, inputTag) 106 && !match(m_tagImpl, linkTag) 107 && !match(m_tagImpl, scriptTag)) 108 m_tagImpl = 0; 109 } 110 111 enum URLReplacement { 112 AllowURLReplacement, 113 DisallowURLReplacement 114 }; 115 116 void processAttributes(const HTMLToken::AttributeList& attributes) 117 { 118 ASSERT(isMainThread()); 119 if (!m_tagImpl) 120 return; 121 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { 122 AtomicString attributeName(iter->name); 123 String attributeValue = StringImpl::create8BitIfPossible(iter->value); 124 processAttribute(attributeName, attributeValue); 125 } 126 } 127 128 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes) 129 { 130 if (!m_tagImpl) 131 return; 132 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) 133 processAttribute(iter->name, iter->value); 134 } 135 136 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source) 137 { 138 if (!shouldPreload()) 139 return nullptr; 140 141 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii()); 142 TextPosition position = TextPosition(source.currentLine(), source.currentColumn()); 143 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute); 144 if (isCORSEnabled()) 145 request->setCrossOriginEnabled(allowCredentials()); 146 request->setCharset(charset()); 147 return request.release(); 148 } 149 150private: 151 template<typename NameType> 152 void processAttribute(const NameType& attributeName, const String& attributeValue) 153 { 154 if (match(attributeName, charsetAttr)) 155 m_charset = attributeValue; 156 157 if (match(m_tagImpl, scriptTag)) { 158 if (match(attributeName, srcAttr)) 159 setUrlToLoad(attributeValue, DisallowURLReplacement); 160 else if (match(attributeName, crossoriginAttr)) 161 setCrossOriginAllowed(attributeValue); 162 } else if (match(m_tagImpl, imgTag)) { 163 if (match(attributeName, srcAttr) && !m_encounteredImgSrc) { 164 m_encounteredImgSrc = true; 165 setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, attributeValue, m_srcsetImageCandidate), AllowURLReplacement); 166 } else if (match(attributeName, crossoriginAttr)) { 167 setCrossOriginAllowed(attributeValue); 168 } else if (RuntimeEnabledFeatures::srcsetEnabled() 169 && match(attributeName, srcsetAttr) 170 && m_srcsetImageCandidate.isEmpty()) { 171 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_deviceScaleFactor, attributeValue); 172 setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcsetImageCandidate), AllowURLReplacement); 173 } 174 } else if (match(m_tagImpl, linkTag)) { 175 if (match(attributeName, hrefAttr)) 176 setUrlToLoad(attributeValue, DisallowURLReplacement); 177 else if (match(attributeName, relAttr)) 178 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); 179 else if (match(attributeName, mediaAttr)) 180 m_mediaAttribute = attributeValue; 181 } else if (match(m_tagImpl, inputTag)) { 182 if (match(attributeName, srcAttr)) 183 setUrlToLoad(attributeValue, DisallowURLReplacement); 184 else if (match(attributeName, typeAttr)) 185 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image); 186 } 187 } 188 189 static bool relAttributeIsStyleSheet(const String& attributeValue) 190 { 191 LinkRelAttribute rel(attributeValue); 192 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch(); 193 } 194 195 void setUrlToLoad(const String& value, URLReplacement replacement) 196 { 197 // We only respect the first src/href, per HTML5: 198 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state 199 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty()) 200 return; 201 String url = stripLeadingAndTrailingHTMLSpaces(value); 202 if (url.isEmpty()) 203 return; 204 m_urlToLoad = url; 205 } 206 207 const String& charset() const 208 { 209 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway. 210 if (match(m_tagImpl, imgTag)) 211 return emptyString(); 212 return m_charset; 213 } 214 215 Resource::Type resourceType() const 216 { 217 if (match(m_tagImpl, scriptTag)) 218 return Resource::Script; 219 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage)) 220 return Resource::Image; 221 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet) 222 return Resource::CSSStyleSheet; 223 ASSERT_NOT_REACHED(); 224 return Resource::Raw; 225 } 226 227 bool shouldPreload() const 228 { 229 if (m_urlToLoad.isEmpty()) 230 return false; 231 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet) 232 return false; 233 if (match(m_tagImpl, inputTag) && !m_inputIsImage) 234 return false; 235 return true; 236 } 237 238 bool isCORSEnabled() const 239 { 240 return m_isCORSEnabled; 241 } 242 243 StoredCredentials allowCredentials() const 244 { 245 return m_allowCredentials; 246 } 247 248 void setCrossOriginAllowed(const String& corsSetting) 249 { 250 m_isCORSEnabled = true; 251 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials")) 252 m_allowCredentials = AllowStoredCredentials; 253 else 254 m_allowCredentials = DoNotAllowStoredCredentials; 255 } 256 257 const StringImpl* m_tagImpl; 258 String m_urlToLoad; 259 ImageCandidate m_srcsetImageCandidate; 260 String m_charset; 261 bool m_linkIsStyleSheet; 262 String m_mediaAttribute; 263 bool m_inputIsImage; 264 float m_deviceScaleFactor; 265 bool m_encounteredImgSrc; 266 bool m_isCORSEnabled; 267 StoredCredentials m_allowCredentials; 268}; 269 270TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, float deviceScaleFactor) 271 : m_documentURL(documentURL) 272 , m_inStyle(false) 273 , m_deviceScaleFactor(deviceScaleFactor) 274 , m_templateCount(0) 275{ 276} 277 278TokenPreloadScanner::~TokenPreloadScanner() 279{ 280} 281 282TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint() 283{ 284 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size(); 285 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount)); 286 return checkpoint; 287} 288 289void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex) 290{ 291 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid. 292 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex]; 293 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL; 294 m_inStyle = checkpoint.inStyle; 295 m_templateCount = checkpoint.templateCount; 296 m_cssScanner.reset(); 297 m_checkpoints.clear(); 298} 299 300void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 301{ 302 scanCommon(token, source, requests); 303} 304 305void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 306{ 307 scanCommon(token, source, requests); 308} 309 310template<typename Token> 311void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests) 312{ 313 switch (token.type()) { 314 case HTMLToken::Character: { 315 if (!m_inStyle) 316 return; 317 m_cssScanner.scan(token.data(), source, requests); 318 return; 319 } 320 case HTMLToken::EndTag: { 321 const StringImpl* tagImpl = tagImplFor(token.data()); 322 if (match(tagImpl, templateTag)) { 323 if (m_templateCount) 324 --m_templateCount; 325 return; 326 } 327 if (match(tagImpl, styleTag)) { 328 if (m_inStyle) 329 m_cssScanner.reset(); 330 m_inStyle = false; 331 } 332 return; 333 } 334 case HTMLToken::StartTag: { 335 if (m_templateCount) 336 return; 337 const StringImpl* tagImpl = tagImplFor(token.data()); 338 if (match(tagImpl, templateTag)) { 339 ++m_templateCount; 340 return; 341 } 342 if (match(tagImpl, styleTag)) { 343 m_inStyle = true; 344 return; 345 } 346 if (match(tagImpl, baseTag)) { 347 // The first <base> element is the one that wins. 348 if (!m_predictedBaseElementURL.isEmpty()) 349 return; 350 updatePredictedBaseURL(token); 351 return; 352 } 353 354 StartTagScanner scanner(tagImpl, m_deviceScaleFactor); 355 scanner.processAttributes(token.attributes()); 356 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source); 357 if (request) 358 requests.append(request.release()); 359 return; 360 } 361 default: { 362 return; 363 } 364 } 365} 366 367template<typename Token> 368void TokenPreloadScanner::updatePredictedBaseURL(const Token& token) 369{ 370 ASSERT(m_predictedBaseElementURL.isEmpty()); 371 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr)) 372 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy(); 373} 374 375HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, float deviceScaleFactor) 376 : m_scanner(documentURL, deviceScaleFactor) 377 , m_tokenizer(HTMLTokenizer::create(options)) 378{ 379} 380 381HTMLPreloadScanner::~HTMLPreloadScanner() 382{ 383} 384 385void HTMLPreloadScanner::appendToEnd(const SegmentedString& source) 386{ 387 m_source.append(source); 388} 389 390void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL) 391{ 392 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. 393 394 // When we start scanning, our best prediction of the baseElementURL is the real one! 395 if (!startingBaseElementURL.isEmpty()) 396 m_scanner.setPredictedBaseElementURL(startingBaseElementURL); 397 398 PreloadRequestStream requests; 399 400 while (m_tokenizer->nextToken(m_source, m_token)) { 401 if (m_token.type() == HTMLToken::StartTag) 402 m_tokenizer->updateStateFor(AtomicString(m_token.name())); 403 m_scanner.scan(m_token, m_source, requests); 404 m_token.clear(); 405 } 406 407 preloader->takeAndPreload(requests); 408} 409 410} 411