1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/* 2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2013 Google, Inc. All Rights Reserved. 5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without 7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions 8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met: 9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright 10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * notice, this list of conditions and the following disclaimer. 11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright 12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * notice, this list of conditions and the following disclaimer in the 13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * documentation and/or other materials provided with the distribution. 14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */ 27926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#ifndef InputStreamPreprocessor_h 29926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#define InputStreamPreprocessor_h 30926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 311e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)#include "platform/text/SegmentedString.h" 32e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch#include "wtf/Noncopyable.h" 33926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 34c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink { 35926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 3693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)const LChar kEndOfFileMarker = 0; 37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream 39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)template <typename Tokenizer> 40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)class InputStreamPreprocessor { 41926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor); 42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)public: 43926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) InputStreamPreprocessor(Tokenizer* tokenizer) 44926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) : m_tokenizer(tokenizer) 45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) { 46926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) reset(); 47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 48926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter; } 50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // Returns whether we succeeded in peeking at the next character. 52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // The only way we can fail to peek is if there are no more 53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // characters in |source| (after collapsing \r\n, etc). 54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) ALWAYS_INLINE bool peek(SegmentedString& source) 55926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) { 56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_nextInputCharacter = source.currentChar(); 57926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 58926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // Every branch in this function is expensive, so we have a 59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // fast-reject branch for characters that don't require special 60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // handling. Please run the parser benchmark whenever you touch 61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // this function. It's very hot. 62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) static const UChar specialCharacterMask = '\n' | '\r' | '\0'; 63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (m_nextInputCharacter & ~specialCharacterMask) { 64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_skipNextNewLine = false; 65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) return true; 66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 67e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) return processNextInputCharacter(source); 68e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) } 69e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) 70e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) // Returns whether there are more characters in |source| after advancing. 71e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) ALWAYS_INLINE bool advance(SegmentedString& source) 72e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) { 73e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) source.advanceAndUpdateLineNumber(); 74e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) if (source.isEmpty()) 75e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) return false; 76e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) return peek(source); 77e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) } 78e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) 79e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) bool skipNextNewLine() const { return m_skipNextNewLine; } 80e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) 81e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) void reset(bool skipNextNewLine = false) 82e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) { 83e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) m_nextInputCharacter = '\0'; 84e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) m_skipNextNewLine = skipNextNewLine; 85e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) } 86e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) 87e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)private: 88e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) bool processNextInputCharacter(SegmentedString& source) 89e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) { 90e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) ProcessAgain: 91e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) ASSERT(m_nextInputCharacter == source.currentChar()); 92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { 94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_skipNextNewLine = false; 95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source.advancePastNewlineAndUpdateLineNumber(); 96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (source.isEmpty()) 97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) return false; 98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_nextInputCharacter = source.currentChar(); 99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (m_nextInputCharacter == '\r') { 101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_nextInputCharacter = '\n'; 102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_skipNextNewLine = true; 103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } else { 104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_skipNextNewLine = false; 105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // FIXME: The spec indicates that the surrogate pair range as well as 106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // a number of specific character values are parse errors and should be replaced 107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // by the replacement character. We suspect this is a problem with the spec as doing 108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // that filtering breaks surrogate pair handling and causes us not to match Minefield. 109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) { 110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (m_tokenizer->shouldSkipNullCharacters()) { 111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source.advancePastNonNewline(); 112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) if (source.isEmpty()) 113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) return false; 114e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) m_nextInputCharacter = source.currentChar(); 115e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles) goto ProcessAgain; 116926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 117926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_nextInputCharacter = 0xFFFD; 118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 119926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) return true; 121926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 122926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const 124926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) { 125926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) return source.isClosed() && source.length() == 1; 126926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) Tokenizer* m_tokenizer; 129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character 131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) UChar m_nextInputCharacter; 132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) bool m_skipNextNewLine; 133926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}; 134926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 135926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 136926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 137926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif // InputStreamPreprocessor_h 138926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 139