1/*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "core/html/parser/HTMLPreloadScanner.h"
30
31#include "HTMLNames.h"
32#include "InputTypeNames.h"
33#include "RuntimeEnabledFeatures.h"
34#include "core/html/LinkRelAttribute.h"
35#include "core/html/parser/HTMLParserIdioms.h"
36#include "core/html/parser/HTMLSrcsetParser.h"
37#include "core/html/parser/HTMLTokenizer.h"
38#include "platform/TraceEvent.h"
39#include "wtf/MainThread.h"
40
41namespace WebCore {
42
43using namespace HTMLNames;
44
45static bool match(const StringImpl* impl, const QualifiedName& qName)
46{
47    return impl == qName.localName().impl();
48}
49
50static bool match(const AtomicString& name, const QualifiedName& qName)
51{
52    ASSERT(isMainThread());
53    return qName.localName() == name;
54}
55
56static bool match(const String& name, const QualifiedName& qName)
57{
58    return threadSafeMatch(name, qName);
59}
60
61static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
62{
63    AtomicString tagName(data);
64    const StringImpl* result = tagName.impl();
65    if (result->isStatic())
66        return result;
67    return 0;
68}
69
70static const StringImpl* tagImplFor(const String& tagName)
71{
72    const StringImpl* result = tagName.impl();
73    if (result->isStatic())
74        return result;
75    return 0;
76}
77
78static String initiatorFor(const StringImpl* tagImpl)
79{
80    ASSERT(tagImpl);
81    if (match(tagImpl, imgTag))
82        return imgTag.localName();
83    if (match(tagImpl, inputTag))
84        return inputTag.localName();
85    if (match(tagImpl, linkTag))
86        return linkTag.localName();
87    if (match(tagImpl, scriptTag))
88        return scriptTag.localName();
89    ASSERT_NOT_REACHED();
90    return emptyString();
91}
92
93class TokenPreloadScanner::StartTagScanner {
94public:
95    StartTagScanner(const StringImpl* tagImpl, float deviceScaleFactor)
96        : m_tagImpl(tagImpl)
97        , m_linkIsStyleSheet(false)
98        , m_inputIsImage(false)
99        , m_deviceScaleFactor(deviceScaleFactor)
100        , m_encounteredImgSrc(false)
101        , m_isCORSEnabled(false)
102        , m_allowCredentials(DoNotAllowStoredCredentials)
103    {
104        if (!match(m_tagImpl, imgTag)
105            && !match(m_tagImpl, inputTag)
106            && !match(m_tagImpl, linkTag)
107            && !match(m_tagImpl, scriptTag))
108            m_tagImpl = 0;
109    }
110
111    enum URLReplacement {
112        AllowURLReplacement,
113        DisallowURLReplacement
114    };
115
116    void processAttributes(const HTMLToken::AttributeList& attributes)
117    {
118        ASSERT(isMainThread());
119        if (!m_tagImpl)
120            return;
121        for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
122            AtomicString attributeName(iter->name);
123            String attributeValue = StringImpl::create8BitIfPossible(iter->value);
124            processAttribute(attributeName, attributeValue);
125        }
126    }
127
128    void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
129    {
130        if (!m_tagImpl)
131            return;
132        for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
133            processAttribute(iter->name, iter->value);
134    }
135
136    PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
137    {
138        if (!shouldPreload())
139            return nullptr;
140
141        TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
142        TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
143        OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute);
144        if (isCORSEnabled())
145            request->setCrossOriginEnabled(allowCredentials());
146        request->setCharset(charset());
147        return request.release();
148    }
149
150private:
151    template<typename NameType>
152    void processAttribute(const NameType& attributeName, const String& attributeValue)
153    {
154        if (match(attributeName, charsetAttr))
155            m_charset = attributeValue;
156
157        if (match(m_tagImpl, scriptTag)) {
158            if (match(attributeName, srcAttr))
159                setUrlToLoad(attributeValue, DisallowURLReplacement);
160            else if (match(attributeName, crossoriginAttr))
161                setCrossOriginAllowed(attributeValue);
162        } else if (match(m_tagImpl, imgTag)) {
163            if (match(attributeName, srcAttr) && !m_encounteredImgSrc) {
164                m_encounteredImgSrc = true;
165                setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
166            } else if (match(attributeName, crossoriginAttr)) {
167                setCrossOriginAllowed(attributeValue);
168            } else if (RuntimeEnabledFeatures::srcsetEnabled()
169                && match(attributeName, srcsetAttr)
170                && m_srcsetImageCandidate.isEmpty()) {
171                m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_deviceScaleFactor, attributeValue);
172                setUrlToLoad(bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcsetImageCandidate), AllowURLReplacement);
173            }
174        } else if (match(m_tagImpl, linkTag)) {
175            if (match(attributeName, hrefAttr))
176                setUrlToLoad(attributeValue, DisallowURLReplacement);
177            else if (match(attributeName, relAttr))
178                m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
179            else if (match(attributeName, mediaAttr))
180                m_mediaAttribute = attributeValue;
181        } else if (match(m_tagImpl, inputTag)) {
182            if (match(attributeName, srcAttr))
183                setUrlToLoad(attributeValue, DisallowURLReplacement);
184            else if (match(attributeName, typeAttr))
185                m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
186        }
187    }
188
189    static bool relAttributeIsStyleSheet(const String& attributeValue)
190    {
191        LinkRelAttribute rel(attributeValue);
192        return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
193    }
194
195    void setUrlToLoad(const String& value, URLReplacement replacement)
196    {
197        // We only respect the first src/href, per HTML5:
198        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
199        if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
200            return;
201        String url = stripLeadingAndTrailingHTMLSpaces(value);
202        if (url.isEmpty())
203            return;
204        m_urlToLoad = url;
205    }
206
207    const String& charset() const
208    {
209        // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
210        if (match(m_tagImpl, imgTag))
211            return emptyString();
212        return m_charset;
213    }
214
215    Resource::Type resourceType() const
216    {
217        if (match(m_tagImpl, scriptTag))
218            return Resource::Script;
219        if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
220            return Resource::Image;
221        if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
222            return Resource::CSSStyleSheet;
223        ASSERT_NOT_REACHED();
224        return Resource::Raw;
225    }
226
227    bool shouldPreload() const
228    {
229        if (m_urlToLoad.isEmpty())
230            return false;
231        if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
232            return false;
233        if (match(m_tagImpl, inputTag) && !m_inputIsImage)
234            return false;
235        return true;
236    }
237
238    bool isCORSEnabled() const
239    {
240        return m_isCORSEnabled;
241    }
242
243    StoredCredentials allowCredentials() const
244    {
245        return m_allowCredentials;
246    }
247
248    void setCrossOriginAllowed(const String& corsSetting)
249    {
250        m_isCORSEnabled = true;
251        if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
252            m_allowCredentials = AllowStoredCredentials;
253        else
254            m_allowCredentials = DoNotAllowStoredCredentials;
255    }
256
257    const StringImpl* m_tagImpl;
258    String m_urlToLoad;
259    ImageCandidate m_srcsetImageCandidate;
260    String m_charset;
261    bool m_linkIsStyleSheet;
262    String m_mediaAttribute;
263    bool m_inputIsImage;
264    float m_deviceScaleFactor;
265    bool m_encounteredImgSrc;
266    bool m_isCORSEnabled;
267    StoredCredentials m_allowCredentials;
268};
269
270TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, float deviceScaleFactor)
271    : m_documentURL(documentURL)
272    , m_inStyle(false)
273    , m_deviceScaleFactor(deviceScaleFactor)
274    , m_templateCount(0)
275{
276}
277
278TokenPreloadScanner::~TokenPreloadScanner()
279{
280}
281
282TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
283{
284    TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
285    m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
286    return checkpoint;
287}
288
289void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
290{
291    ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
292    const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
293    m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
294    m_inStyle = checkpoint.inStyle;
295    m_templateCount = checkpoint.templateCount;
296    m_cssScanner.reset();
297    m_checkpoints.clear();
298}
299
300void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
301{
302    scanCommon(token, source, requests);
303}
304
305void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
306{
307    scanCommon(token, source, requests);
308}
309
310template<typename Token>
311void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
312{
313    switch (token.type()) {
314    case HTMLToken::Character: {
315        if (!m_inStyle)
316            return;
317        m_cssScanner.scan(token.data(), source, requests);
318        return;
319    }
320    case HTMLToken::EndTag: {
321        const StringImpl* tagImpl = tagImplFor(token.data());
322        if (match(tagImpl, templateTag)) {
323            if (m_templateCount)
324                --m_templateCount;
325            return;
326        }
327        if (match(tagImpl, styleTag)) {
328            if (m_inStyle)
329                m_cssScanner.reset();
330            m_inStyle = false;
331        }
332        return;
333    }
334    case HTMLToken::StartTag: {
335        if (m_templateCount)
336            return;
337        const StringImpl* tagImpl = tagImplFor(token.data());
338        if (match(tagImpl, templateTag)) {
339            ++m_templateCount;
340            return;
341        }
342        if (match(tagImpl, styleTag)) {
343            m_inStyle = true;
344            return;
345        }
346        if (match(tagImpl, baseTag)) {
347            // The first <base> element is the one that wins.
348            if (!m_predictedBaseElementURL.isEmpty())
349                return;
350            updatePredictedBaseURL(token);
351            return;
352        }
353
354        StartTagScanner scanner(tagImpl, m_deviceScaleFactor);
355        scanner.processAttributes(token.attributes());
356        OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
357        if (request)
358            requests.append(request.release());
359        return;
360    }
361    default: {
362        return;
363    }
364    }
365}
366
367template<typename Token>
368void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
369{
370    ASSERT(m_predictedBaseElementURL.isEmpty());
371    if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
372        m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
373}
374
375HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, float deviceScaleFactor)
376    : m_scanner(documentURL, deviceScaleFactor)
377    , m_tokenizer(HTMLTokenizer::create(options))
378{
379}
380
381HTMLPreloadScanner::~HTMLPreloadScanner()
382{
383}
384
385void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
386{
387    m_source.append(source);
388}
389
390void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
391{
392    ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
393
394    // When we start scanning, our best prediction of the baseElementURL is the real one!
395    if (!startingBaseElementURL.isEmpty())
396        m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
397
398    PreloadRequestStream requests;
399
400    while (m_tokenizer->nextToken(m_source, m_token)) {
401        if (m_token.type() == HTMLToken::StartTag)
402            m_tokenizer->updateStateFor(AtomicString(m_token.name()));
403        m_scanner.scan(m_token, m_source, requests);
404        m_token.clear();
405    }
406
407    preloader->takeAndPreload(requests);
408}
409
410}
411