1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/*
2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2013 Google, Inc. All Rights Reserved.
5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions
8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met:
9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */
27926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#ifndef InputStreamPreprocessor_h
29926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#define InputStreamPreprocessor_h
30926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
311e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)#include "platform/text/SegmentedString.h"
32e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch#include "wtf/Noncopyable.h"
33926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
34c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
35926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
3693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)const LChar kEndOfFileMarker = 0;
37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)template <typename Tokenizer>
40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)class InputStreamPreprocessor {
41926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)public:
43926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    InputStreamPreprocessor(Tokenizer* tokenizer)
44926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        : m_tokenizer(tokenizer)
45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    {
46926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        reset();
47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
48926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; }
50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // Returns whether we succeeded in peeking at the next character.
52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // The only way we can fail to peek is if there are no more
53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // characters in |source| (after collapsing \r\n, etc).
54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ALWAYS_INLINE bool peek(SegmentedString& source)
55926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    {
56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_nextInputCharacter = source.currentChar();
57926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
58926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // Every branch in this function is expensive, so we have a
59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // fast-reject branch for characters that don't require special
60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // handling. Please run the parser benchmark whenever you touch
61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // this function. It's very hot.
62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        static const UChar specialCharacterMask = '\n' | '\r' | '\0';
63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (m_nextInputCharacter & ~specialCharacterMask) {
64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_skipNextNewLine = false;
65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            return true;
66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
67e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        return processNextInputCharacter(source);
68e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    }
69e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)
70e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    // Returns whether there are more characters in |source| after advancing.
71e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    ALWAYS_INLINE bool advance(SegmentedString& source)
72e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    {
73e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        source.advanceAndUpdateLineNumber();
74e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        if (source.isEmpty())
75e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)            return false;
76e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        return peek(source);
77e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    }
78e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)
79e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    bool skipNextNewLine() const { return m_skipNextNewLine; }
80e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)
81e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    void reset(bool skipNextNewLine = false)
82e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    {
83e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        m_nextInputCharacter = '\0';
84e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        m_skipNextNewLine = skipNextNewLine;
85e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    }
86e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)
87e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)private:
88e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    bool processNextInputCharacter(SegmentedString& source)
89e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    {
90e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)    ProcessAgain:
91e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        ASSERT(m_nextInputCharacter == source.currentChar());
92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_skipNextNewLine = false;
95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            source.advancePastNewlineAndUpdateLineNumber();
96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (source.isEmpty())
97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                return false;
98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_nextInputCharacter = source.currentChar();
99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (m_nextInputCharacter == '\r') {
101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_nextInputCharacter = '\n';
102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_skipNextNewLine = true;
103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        } else {
104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_skipNextNewLine = false;
105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // FIXME: The spec indicates that the surrogate pair range as well as
106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // a number of specific character values are parse errors and should be replaced
107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // by the replacement character. We suspect this is a problem with the spec as doing
108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // that filtering breaks surrogate pair handling and causes us not to match Minefield.
109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                if (m_tokenizer->shouldSkipNullCharacters()) {
111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                    source.advancePastNonNewline();
112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                    if (source.isEmpty())
113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                        return false;
114e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)                    m_nextInputCharacter = source.currentChar();
115e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)                    goto ProcessAgain;
116926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                }
117926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                m_nextInputCharacter = 0xFFFD;
118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            }
119926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return true;
121926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
122926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
124926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    {
125926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return source.isClosed() && source.length() == 1;
126926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    Tokenizer* m_tokenizer;
129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    UChar m_nextInputCharacter;
132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    bool m_skipNextNewLine;
133926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)};
134926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
135926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
136926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
137926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif // InputStreamPreprocessor_h
138926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
139