1/*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "core/html/parser/HTMLPreloadScanner.h"
30
31#include "core/HTMLNames.h"
32#include "core/InputTypeNames.h"
33#include "core/css/MediaList.h"
34#include "core/css/MediaQueryEvaluator.h"
35#include "core/css/MediaValues.h"
36#include "core/css/parser/SizesAttributeParser.h"
37#include "core/html/LinkRelAttribute.h"
38#include "core/html/parser/HTMLParserIdioms.h"
39#include "core/html/parser/HTMLSrcsetParser.h"
40#include "core/html/parser/HTMLTokenizer.h"
41#include "platform/RuntimeEnabledFeatures.h"
42#include "platform/TraceEvent.h"
43#include "wtf/MainThread.h"
44
45namespace blink {
46
47using namespace HTMLNames;
48
49static bool match(const StringImpl* impl, const QualifiedName& qName)
50{
51    return impl == qName.localName().impl();
52}
53
54static bool match(const AtomicString& name, const QualifiedName& qName)
55{
56    ASSERT(isMainThread());
57    return qName.localName() == name;
58}
59
60static bool match(const String& name, const QualifiedName& qName)
61{
62    return threadSafeMatch(name, qName);
63}
64
65static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
66{
67    AtomicString tagName(data);
68    const StringImpl* result = tagName.impl();
69    if (result->isStatic())
70        return result;
71    return 0;
72}
73
74static const StringImpl* tagImplFor(const String& tagName)
75{
76    const StringImpl* result = tagName.impl();
77    if (result->isStatic())
78        return result;
79    return 0;
80}
81
82static String initiatorFor(const StringImpl* tagImpl)
83{
84    ASSERT(tagImpl);
85    if (match(tagImpl, imgTag))
86        return imgTag.localName();
87    if (match(tagImpl, inputTag))
88        return inputTag.localName();
89    if (match(tagImpl, linkTag))
90        return linkTag.localName();
91    if (match(tagImpl, scriptTag))
92        return scriptTag.localName();
93    ASSERT_NOT_REACHED();
94    return emptyString();
95}
96
97static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
98{
99    RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
100    MediaQueryEvaluator mediaQueryEvaluator(mediaValues);
101    return mediaQueryEvaluator.eval(mediaQueries.get());
102}
103
104class TokenPreloadScanner::StartTagScanner {
105public:
106    StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
107        : m_tagImpl(tagImpl)
108        , m_linkIsStyleSheet(false)
109        , m_matchedMediaAttribute(true)
110        , m_inputIsImage(false)
111        , m_sourceSize(0)
112        , m_sourceSizeSet(false)
113        , m_isCORSEnabled(false)
114        , m_defer(FetchRequest::NoDefer)
115        , m_allowCredentials(DoNotAllowStoredCredentials)
116        , m_mediaValues(mediaValues)
117    {
118        if (match(m_tagImpl, imgTag)
119            || match(m_tagImpl, sourceTag)) {
120            if (RuntimeEnabledFeatures::pictureSizesEnabled())
121                m_sourceSize = SizesAttributeParser(m_mediaValues, String()).length();
122            return;
123        }
124        if ( !match(m_tagImpl, inputTag)
125            && !match(m_tagImpl, linkTag)
126            && !match(m_tagImpl, scriptTag))
127            m_tagImpl = 0;
128    }
129
130    enum URLReplacement {
131        AllowURLReplacement,
132        DisallowURLReplacement
133    };
134
135    void processAttributes(const HTMLToken::AttributeList& attributes)
136    {
137        ASSERT(isMainThread());
138        if (!m_tagImpl)
139            return;
140        for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
141            AtomicString attributeName(iter->name);
142            String attributeValue = StringImpl::create8BitIfPossible(iter->value);
143            processAttribute(attributeName, attributeValue);
144        }
145    }
146
147    void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
148    {
149        if (!m_tagImpl)
150            return;
151        for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
152            processAttribute(iter->name, iter->value);
153    }
154
155    void handlePictureSourceURL(String& sourceURL)
156    {
157        if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty())
158            sourceURL = m_srcsetImageCandidate.toString();
159        else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty())
160            setUrlToLoad(sourceURL, AllowURLReplacement);
161    }
162
163    PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
164    {
165        if (!shouldPreload() || !m_matchedMediaAttribute)
166            return nullptr;
167
168        TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
169        TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
170        OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
171        if (isCORSEnabled())
172            request->setCrossOriginEnabled(allowStoredCredentials());
173        request->setCharset(charset());
174        request->setDefer(m_defer);
175        return request.release();
176    }
177
178private:
179    template<typename NameType>
180    void processScriptAttribute(const NameType& attributeName, const String& attributeValue)
181    {
182        // FIXME - Don't set crossorigin multiple times.
183        if (match(attributeName, srcAttr))
184            setUrlToLoad(attributeValue, DisallowURLReplacement);
185        else if (match(attributeName, crossoriginAttr))
186            setCrossOriginAllowed(attributeValue);
187        else if (match(attributeName, asyncAttr))
188            setDefer(FetchRequest::LazyLoad);
189        else if (match(attributeName, deferAttr))
190            setDefer(FetchRequest::LazyLoad);
191    }
192
193    template<typename NameType>
194    void processImgAttribute(const NameType& attributeName, const String& attributeValue)
195    {
196        if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
197            m_imgSrcUrl = attributeValue;
198            setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
199        } else if (match(attributeName, crossoriginAttr)) {
200            setCrossOriginAllowed(attributeValue);
201        } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
202            m_srcsetAttributeValue = attributeValue;
203            m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
204            setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
205        } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
206            m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
207            m_sourceSizeSet = true;
208            if (!m_srcsetImageCandidate.isEmpty()) {
209                m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
210                setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
211            }
212        }
213    }
214
215    template<typename NameType>
216    void processLinkAttribute(const NameType& attributeName, const String& attributeValue)
217    {
218        // FIXME - Don't set rel/media/crossorigin multiple times.
219        if (match(attributeName, hrefAttr))
220            setUrlToLoad(attributeValue, DisallowURLReplacement);
221        else if (match(attributeName, relAttr))
222            m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
223        else if (match(attributeName, mediaAttr))
224            m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
225        else if (match(attributeName, crossoriginAttr))
226            setCrossOriginAllowed(attributeValue);
227    }
228
229    template<typename NameType>
230    void processInputAttribute(const NameType& attributeName, const String& attributeValue)
231    {
232        // FIXME - Don't set type multiple times.
233        if (match(attributeName, srcAttr))
234            setUrlToLoad(attributeValue, DisallowURLReplacement);
235        else if (match(attributeName, typeAttr))
236            m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
237    }
238
239    template<typename NameType>
240    void processSourceAttribute(const NameType& attributeName, const String& attributeValue)
241    {
242        if (!RuntimeEnabledFeatures::pictureEnabled())
243            return;
244        if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
245            m_srcsetAttributeValue = attributeValue;
246            m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
247        } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) {
248            m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
249            m_sourceSizeSet = true;
250            if (!m_srcsetImageCandidate.isEmpty()) {
251                m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
252            }
253        } else if (match(attributeName, mediaAttr)) {
254            // FIXME - Don't match media multiple times.
255            m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
256        }
257
258    }
259
260    template<typename NameType>
261    void processAttribute(const NameType& attributeName, const String& attributeValue)
262    {
263        if (match(attributeName, charsetAttr))
264            m_charset = attributeValue;
265
266        if (match(m_tagImpl, scriptTag))
267            processScriptAttribute(attributeName, attributeValue);
268        else if (match(m_tagImpl, imgTag))
269            processImgAttribute(attributeName, attributeValue);
270        else if (match(m_tagImpl, linkTag))
271            processLinkAttribute(attributeName, attributeValue);
272        else if (match(m_tagImpl, inputTag))
273            processInputAttribute(attributeName, attributeValue);
274        else if (match(m_tagImpl, sourceTag))
275            processSourceAttribute(attributeName, attributeValue);
276    }
277
278    static bool relAttributeIsStyleSheet(const String& attributeValue)
279    {
280        LinkRelAttribute rel(attributeValue);
281        return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
282    }
283
284    void setUrlToLoad(const String& value, URLReplacement replacement)
285    {
286        // We only respect the first src/href, per HTML5:
287        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
288        if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
289            return;
290        String url = stripLeadingAndTrailingHTMLSpaces(value);
291        if (url.isEmpty())
292            return;
293        m_urlToLoad = url;
294    }
295
296    const String& charset() const
297    {
298        // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
299        if (match(m_tagImpl, imgTag))
300            return emptyString();
301        return m_charset;
302    }
303
304    Resource::Type resourceType() const
305    {
306        if (match(m_tagImpl, scriptTag))
307            return Resource::Script;
308        if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
309            return Resource::Image;
310        if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
311            return Resource::CSSStyleSheet;
312        ASSERT_NOT_REACHED();
313        return Resource::Raw;
314    }
315
316    bool shouldPreload() const
317    {
318        if (m_urlToLoad.isEmpty())
319            return false;
320        if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
321            return false;
322        if (match(m_tagImpl, inputTag) && !m_inputIsImage)
323            return false;
324        return true;
325    }
326
327    bool isCORSEnabled() const
328    {
329        return m_isCORSEnabled;
330    }
331
332    StoredCredentials allowStoredCredentials() const
333    {
334        return m_allowCredentials;
335    }
336
337    void setCrossOriginAllowed(const String& corsSetting)
338    {
339        m_isCORSEnabled = true;
340        if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
341            m_allowCredentials = AllowStoredCredentials;
342        else
343            m_allowCredentials = DoNotAllowStoredCredentials;
344    }
345
346    void setDefer(FetchRequest::DeferOption defer)
347    {
348        m_defer = defer;
349    }
350
351    bool defer() const
352    {
353        return m_defer;
354    }
355
356    const StringImpl* m_tagImpl;
357    String m_urlToLoad;
358    ImageCandidate m_srcsetImageCandidate;
359    String m_charset;
360    bool m_linkIsStyleSheet;
361    bool m_matchedMediaAttribute;
362    bool m_inputIsImage;
363    String m_imgSrcUrl;
364    String m_srcsetAttributeValue;
365    unsigned m_sourceSize;
366    bool m_sourceSizeSet;
367    bool m_isCORSEnabled;
368    FetchRequest::DeferOption m_defer;
369    StoredCredentials m_allowCredentials;
370    RefPtr<MediaValues> m_mediaValues;
371};
372
373TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
374    : m_documentURL(documentURL)
375    , m_inStyle(false)
376    , m_inPicture(false)
377    , m_templateCount(0)
378    , m_mediaValues(mediaValues)
379{
380}
381
382TokenPreloadScanner::~TokenPreloadScanner()
383{
384}
385
386TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
387{
388    TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
389    m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
390    return checkpoint;
391}
392
393void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
394{
395    ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
396    const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
397    m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
398    m_inStyle = checkpoint.inStyle;
399    m_templateCount = checkpoint.templateCount;
400    m_cssScanner.reset();
401    m_checkpoints.clear();
402}
403
404void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
405{
406    scanCommon(token, source, requests);
407}
408
409void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
410{
411    scanCommon(token, source, requests);
412}
413
414template<typename Token>
415void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
416{
417    switch (token.type()) {
418    case HTMLToken::Character: {
419        if (!m_inStyle)
420            return;
421        m_cssScanner.scan(token.data(), source, requests);
422        return;
423    }
424    case HTMLToken::EndTag: {
425        const StringImpl* tagImpl = tagImplFor(token.data());
426        if (match(tagImpl, templateTag)) {
427            if (m_templateCount)
428                --m_templateCount;
429            return;
430        }
431        if (match(tagImpl, styleTag)) {
432            if (m_inStyle)
433                m_cssScanner.reset();
434            m_inStyle = false;
435            return;
436        }
437        if (match(tagImpl, pictureTag))
438            m_inPicture = false;
439        return;
440    }
441    case HTMLToken::StartTag: {
442        if (m_templateCount)
443            return;
444        const StringImpl* tagImpl = tagImplFor(token.data());
445        if (match(tagImpl, templateTag)) {
446            ++m_templateCount;
447            return;
448        }
449        if (match(tagImpl, styleTag)) {
450            m_inStyle = true;
451            return;
452        }
453        if (match(tagImpl, baseTag)) {
454            // The first <base> element is the one that wins.
455            if (!m_predictedBaseElementURL.isEmpty())
456                return;
457            updatePredictedBaseURL(token);
458            return;
459        }
460        if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) {
461            m_inPicture = true;
462            m_pictureSourceURL = String();
463            return;
464        }
465
466        StartTagScanner scanner(tagImpl, m_mediaValues);
467        scanner.processAttributes(token.attributes());
468        if (m_inPicture)
469            scanner.handlePictureSourceURL(m_pictureSourceURL);
470        OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
471        if (request)
472            requests.append(request.release());
473        return;
474    }
475    default: {
476        return;
477    }
478    }
479}
480
481template<typename Token>
482void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
483{
484    ASSERT(m_predictedBaseElementURL.isEmpty());
485    if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
486        m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
487}
488
489HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
490    : m_scanner(documentURL, mediaValues)
491    , m_tokenizer(HTMLTokenizer::create(options))
492{
493}
494
495HTMLPreloadScanner::~HTMLPreloadScanner()
496{
497}
498
499void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
500{
501    m_source.append(source);
502}
503
504void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
505{
506    ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
507
508    TRACE_EVENT1("blink", "HTMLPreloadScanner::scan", "source_length", m_source.length());
509
510    // When we start scanning, our best prediction of the baseElementURL is the real one!
511    if (!startingBaseElementURL.isEmpty())
512        m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
513
514    PreloadRequestStream requests;
515
516    while (m_tokenizer->nextToken(m_source, m_token)) {
517        if (m_token.type() == HTMLToken::StartTag)
518            m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
519        m_scanner.scan(m_token, m_source, requests);
520        m_token.clear();
521    }
522
523    preloader->takeAndPreload(requests);
524}
525
526}
527