1/*
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
9 * Copyright (C) 2012 Intel Corporation. All rights reserved.
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Library General Public License for more details.
20 *
21 * You should have received a copy of the GNU Library General Public License
22 * along with this library; see the file COPYING.LIB.  If not, write to
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 * Boston, MA 02110-1301, USA.
25 */
26
27#include "config.h"
28#include "core/css/CSSTokenizer.h"
29
30#include "core/css/CSSKeyframeRule.h"
31#include "core/css/CSSParser.h"
32#include "core/css/CSSParserValues.h"
33#include "core/css/MediaQuery.h"
34#include "core/css/StyleRule.h"
35#include "core/html/parser/HTMLParserIdioms.h"
36#include "core/svg/SVGParserUtilities.h"
37
38namespace WebCore {
39
40#include "CSSGrammar.h"
41
42enum CharacterType {
43    // Types for the main switch.
44
45    // The first 4 types must be grouped together, as they
46    // represent the allowed chars in an identifier.
47    CharacterCaselessU,
48    CharacterIdentifierStart,
49    CharacterNumber,
50    CharacterDash,
51
52    CharacterOther,
53    CharacterNull,
54    CharacterWhiteSpace,
55    CharacterEndMediaQueryOrSupports,
56    CharacterEndNthChild,
57    CharacterQuote,
58    CharacterExclamationMark,
59    CharacterHashmark,
60    CharacterDollar,
61    CharacterAsterisk,
62    CharacterPlus,
63    CharacterDot,
64    CharacterSlash,
65    CharacterLess,
66    CharacterAt,
67    CharacterBackSlash,
68    CharacterXor,
69    CharacterVerticalBar,
70    CharacterTilde,
71};
72
73// 128 ASCII codes
74static const CharacterType typesOfASCIICharacters[128] = {
75/*   0 - Null               */ CharacterNull,
76/*   1 - Start of Heading   */ CharacterOther,
77/*   2 - Start of Text      */ CharacterOther,
78/*   3 - End of Text        */ CharacterOther,
79/*   4 - End of Transm.     */ CharacterOther,
80/*   5 - Enquiry            */ CharacterOther,
81/*   6 - Acknowledgment     */ CharacterOther,
82/*   7 - Bell               */ CharacterOther,
83/*   8 - Back Space         */ CharacterOther,
84/*   9 - Horizontal Tab     */ CharacterWhiteSpace,
85/*  10 - Line Feed          */ CharacterWhiteSpace,
86/*  11 - Vertical Tab       */ CharacterOther,
87/*  12 - Form Feed          */ CharacterWhiteSpace,
88/*  13 - Carriage Return    */ CharacterWhiteSpace,
89/*  14 - Shift Out          */ CharacterOther,
90/*  15 - Shift In           */ CharacterOther,
91/*  16 - Data Line Escape   */ CharacterOther,
92/*  17 - Device Control 1   */ CharacterOther,
93/*  18 - Device Control 2   */ CharacterOther,
94/*  19 - Device Control 3   */ CharacterOther,
95/*  20 - Device Control 4   */ CharacterOther,
96/*  21 - Negative Ack.      */ CharacterOther,
97/*  22 - Synchronous Idle   */ CharacterOther,
98/*  23 - End of Transmit    */ CharacterOther,
99/*  24 - Cancel             */ CharacterOther,
100/*  25 - End of Medium      */ CharacterOther,
101/*  26 - Substitute         */ CharacterOther,
102/*  27 - Escape             */ CharacterOther,
103/*  28 - File Separator     */ CharacterOther,
104/*  29 - Group Separator    */ CharacterOther,
105/*  30 - Record Separator   */ CharacterOther,
106/*  31 - Unit Separator     */ CharacterOther,
107/*  32 - Space              */ CharacterWhiteSpace,
108/*  33 - !                  */ CharacterExclamationMark,
109/*  34 - "                  */ CharacterQuote,
110/*  35 - #                  */ CharacterHashmark,
111/*  36 - $                  */ CharacterDollar,
112/*  37 - %                  */ CharacterOther,
113/*  38 - &                  */ CharacterOther,
114/*  39 - '                  */ CharacterQuote,
115/*  40 - (                  */ CharacterOther,
116/*  41 - )                  */ CharacterEndNthChild,
117/*  42 - *                  */ CharacterAsterisk,
118/*  43 - +                  */ CharacterPlus,
119/*  44 - ,                  */ CharacterOther,
120/*  45 - -                  */ CharacterDash,
121/*  46 - .                  */ CharacterDot,
122/*  47 - /                  */ CharacterSlash,
123/*  48 - 0                  */ CharacterNumber,
124/*  49 - 1                  */ CharacterNumber,
125/*  50 - 2                  */ CharacterNumber,
126/*  51 - 3                  */ CharacterNumber,
127/*  52 - 4                  */ CharacterNumber,
128/*  53 - 5                  */ CharacterNumber,
129/*  54 - 6                  */ CharacterNumber,
130/*  55 - 7                  */ CharacterNumber,
131/*  56 - 8                  */ CharacterNumber,
132/*  57 - 9                  */ CharacterNumber,
133/*  58 - :                  */ CharacterOther,
134/*  59 - ;                  */ CharacterEndMediaQueryOrSupports,
135/*  60 - <                  */ CharacterLess,
136/*  61 - =                  */ CharacterOther,
137/*  62 - >                  */ CharacterOther,
138/*  63 - ?                  */ CharacterOther,
139/*  64 - @                  */ CharacterAt,
140/*  65 - A                  */ CharacterIdentifierStart,
141/*  66 - B                  */ CharacterIdentifierStart,
142/*  67 - C                  */ CharacterIdentifierStart,
143/*  68 - D                  */ CharacterIdentifierStart,
144/*  69 - E                  */ CharacterIdentifierStart,
145/*  70 - F                  */ CharacterIdentifierStart,
146/*  71 - G                  */ CharacterIdentifierStart,
147/*  72 - H                  */ CharacterIdentifierStart,
148/*  73 - I                  */ CharacterIdentifierStart,
149/*  74 - J                  */ CharacterIdentifierStart,
150/*  75 - K                  */ CharacterIdentifierStart,
151/*  76 - L                  */ CharacterIdentifierStart,
152/*  77 - M                  */ CharacterIdentifierStart,
153/*  78 - N                  */ CharacterIdentifierStart,
154/*  79 - O                  */ CharacterIdentifierStart,
155/*  80 - P                  */ CharacterIdentifierStart,
156/*  81 - Q                  */ CharacterIdentifierStart,
157/*  82 - R                  */ CharacterIdentifierStart,
158/*  83 - S                  */ CharacterIdentifierStart,
159/*  84 - T                  */ CharacterIdentifierStart,
160/*  85 - U                  */ CharacterCaselessU,
161/*  86 - V                  */ CharacterIdentifierStart,
162/*  87 - W                  */ CharacterIdentifierStart,
163/*  88 - X                  */ CharacterIdentifierStart,
164/*  89 - Y                  */ CharacterIdentifierStart,
165/*  90 - Z                  */ CharacterIdentifierStart,
166/*  91 - [                  */ CharacterOther,
167/*  92 - \                  */ CharacterBackSlash,
168/*  93 - ]                  */ CharacterOther,
169/*  94 - ^                  */ CharacterXor,
170/*  95 - _                  */ CharacterIdentifierStart,
171/*  96 - `                  */ CharacterOther,
172/*  97 - a                  */ CharacterIdentifierStart,
173/*  98 - b                  */ CharacterIdentifierStart,
174/*  99 - c                  */ CharacterIdentifierStart,
175/* 100 - d                  */ CharacterIdentifierStart,
176/* 101 - e                  */ CharacterIdentifierStart,
177/* 102 - f                  */ CharacterIdentifierStart,
178/* 103 - g                  */ CharacterIdentifierStart,
179/* 104 - h                  */ CharacterIdentifierStart,
180/* 105 - i                  */ CharacterIdentifierStart,
181/* 106 - j                  */ CharacterIdentifierStart,
182/* 107 - k                  */ CharacterIdentifierStart,
183/* 108 - l                  */ CharacterIdentifierStart,
184/* 109 - m                  */ CharacterIdentifierStart,
185/* 110 - n                  */ CharacterIdentifierStart,
186/* 111 - o                  */ CharacterIdentifierStart,
187/* 112 - p                  */ CharacterIdentifierStart,
188/* 113 - q                  */ CharacterIdentifierStart,
189/* 114 - r                  */ CharacterIdentifierStart,
190/* 115 - s                  */ CharacterIdentifierStart,
191/* 116 - t                  */ CharacterIdentifierStart,
192/* 117 - u                  */ CharacterCaselessU,
193/* 118 - v                  */ CharacterIdentifierStart,
194/* 119 - w                  */ CharacterIdentifierStart,
195/* 120 - x                  */ CharacterIdentifierStart,
196/* 121 - y                  */ CharacterIdentifierStart,
197/* 122 - z                  */ CharacterIdentifierStart,
198/* 123 - {                  */ CharacterEndMediaQueryOrSupports,
199/* 124 - |                  */ CharacterVerticalBar,
200/* 125 - }                  */ CharacterOther,
201/* 126 - ~                  */ CharacterTilde,
202/* 127 - Delete             */ CharacterOther,
203};
204
205// Utility functions for the CSS tokenizer.
206
207template <typename CharacterType>
208static inline bool isCSSLetter(CharacterType character)
209{
210    return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
211}
212
213template <typename CharacterType>
214static inline bool isCSSEscape(CharacterType character)
215{
216    return character >= ' ' && character != 127;
217}
218
219template <typename CharacterType>
220static inline bool isURILetter(CharacterType character)
221{
222    return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
223}
224
225template <typename CharacterType>
226static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
227{
228    return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
229        || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
230}
231
232template <typename CharacterType>
233static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
234{
235    // Compare an character memory data with a zero terminated string.
236    do {
237        // The input must be part of an identifier if constantChar or constString
238        // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
239        ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
240        ASSERT(*constantString != '-' || isCSSLetter(*cssString));
241        if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
242            return false;
243    } while (*constantString);
244    return true;
245}
246
247template <typename CharacterType>
248static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
249{
250    ASSERT(*constantString);
251
252    do {
253        if (*string++ != *constantString++)
254            return false;
255    } while (*constantString);
256    return true;
257}
258
259template <typename CharacterType>
260static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
261{
262    // Returns with 0, if escape check is failed. Otherwise
263    // it returns with the following character.
264    ASSERT(*currentCharacter == '\\');
265
266    ++currentCharacter;
267    if (!isCSSEscape(*currentCharacter))
268        return 0;
269
270    if (isASCIIHexDigit(*currentCharacter)) {
271        int length = 6;
272
273        do {
274            ++currentCharacter;
275        } while (isASCIIHexDigit(*currentCharacter) && --length);
276
277        // Optional space after the escape sequence.
278        if (isHTMLSpace<CharacterType>(*currentCharacter))
279            ++currentCharacter;
280        return currentCharacter;
281    }
282    return currentCharacter + 1;
283}
284
285template <typename CharacterType>
286static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
287{
288    while (isHTMLSpace<CharacterType>(*currentCharacter))
289        ++currentCharacter;
290    return currentCharacter;
291}
292
293// Main CSS tokenizer functions.
294
295template <>
296inline LChar*& CSSTokenizer::currentCharacter<LChar>()
297{
298    return m_currentCharacter8;
299}
300
301template <>
302inline UChar*& CSSTokenizer::currentCharacter<UChar>()
303{
304    return m_currentCharacter16;
305}
306
307UChar*& CSSTokenizer::currentCharacter16()
308{
309    if (!m_currentCharacter16) {
310        m_dataStart16 = adoptArrayPtr(new UChar[m_length]);
311        m_currentCharacter16 = m_dataStart16.get();
312    }
313
314    return m_currentCharacter16;
315}
316
317template <>
318inline LChar* CSSTokenizer::dataStart<LChar>()
319{
320    return m_dataStart8.get();
321}
322
323template <>
324inline UChar* CSSTokenizer::dataStart<UChar>()
325{
326    return m_dataStart16.get();
327}
328
329template <typename CharacterType>
330inline CSSParserLocation CSSTokenizer::tokenLocation()
331{
332    CSSParserLocation location;
333    location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
334    location.lineNumber = m_tokenStartLineNumber;
335    location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
336    return location;
337}
338
339CSSParserLocation CSSTokenizer::currentLocation()
340{
341    if (is8BitSource())
342        return tokenLocation<LChar>();
343    return tokenLocation<UChar>();
344}
345
346template <typename CharacterType>
347inline bool CSSTokenizer::isIdentifierStart()
348{
349    // Check whether an identifier is started.
350    return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
351}
352
353template <typename CharacterType>
354static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote)
355{
356    // Returns with 0, if string check is failed. Otherwise
357    // it returns with the following character. This is necessary
358    // since we cannot revert escape sequences, thus strings
359    // must be validated before parsing.
360    while (true) {
361        if (UNLIKELY(*currentCharacter == quote)) {
362            // String parsing is successful.
363            return currentCharacter + 1;
364        }
365        if (UNLIKELY(!*currentCharacter)) {
366            // String parsing is successful up to end of input.
367            return currentCharacter;
368        }
369        if (UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
370            // String parsing is failed for character '\n', '\f' or '\r'.
371            return 0;
372        }
373
374        if (LIKELY(currentCharacter[0] != '\\')) {
375            ++currentCharacter;
376        } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
377            currentCharacter += 2;
378        } else if (currentCharacter[1] == '\r') {
379            currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
380        } else {
381            currentCharacter = checkAndSkipEscape(currentCharacter);
382            if (!currentCharacter)
383                return 0;
384        }
385    }
386}
387
388template <typename CharacterType>
389unsigned CSSTokenizer::parseEscape(CharacterType*& src)
390{
391    ASSERT(*src == '\\' && isCSSEscape(src[1]));
392
393    unsigned unicode = 0;
394
395    ++src;
396    if (isASCIIHexDigit(*src)) {
397
398        int length = 6;
399
400        do {
401            unicode = (unicode << 4) + toASCIIHexValue(*src++);
402        } while (--length && isASCIIHexDigit(*src));
403
404        // Characters above 0x10ffff are not handled.
405        if (unicode > 0x10ffff)
406            unicode = 0xfffd;
407
408        // Optional space after the escape sequence.
409        if (isHTMLSpace<CharacterType>(*src))
410            ++src;
411
412        return unicode;
413    }
414
415    return *currentCharacter<CharacterType>()++;
416}
417
418template <>
419inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
420{
421    ASSERT(unicode <= 0xff);
422    *result = unicode;
423
424    ++result;
425}
426
427template <>
428inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
429{
430    // Replace unicode with a surrogate pairs when it is bigger than 0xffff
431    if (U16_LENGTH(unicode) == 2) {
432        *result++ = U16_LEAD(unicode);
433        *result = U16_TRAIL(unicode);
434    } else {
435        *result = unicode;
436    }
437
438    ++result;
439}
440
441template <typename SrcCharacterType, typename DestCharacterType>
442inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
443{
444    hasEscape = false;
445    do {
446        if (LIKELY(*src != '\\')) {
447            *result++ = *src++;
448        } else {
449            hasEscape = true;
450            SrcCharacterType* savedEscapeStart = src;
451            unsigned unicode = parseEscape<SrcCharacterType>(src);
452            if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
453                src = savedEscapeStart;
454                return false;
455            }
456            UnicodeToChars(result, unicode);
457        }
458    } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
459
460    return true;
461}
462
463template <typename CharacterType>
464inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
465{
466    // If a valid identifier start is found, we can safely
467    // parse the identifier until the next invalid character.
468    ASSERT(isIdentifierStart<CharacterType>());
469
470    CharacterType* start = currentCharacter<CharacterType>();
471    if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
472        // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
473        ASSERT(is8BitSource());
474        UChar*& result16 = currentCharacter16();
475        UChar* start16 = result16;
476        int i = 0;
477        for (; i < result - start; i++)
478            result16[i] = start[i];
479
480        result16 += i;
481
482        parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);
483
484        resultString.init(start16, result16 - start16);
485
486        return;
487    }
488
489    resultString.init(start, result - start);
490}
491
492template <typename SrcCharacterType, typename DestCharacterType>
493inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
494{
495    while (true) {
496        if (UNLIKELY(*src == quote)) {
497            // String parsing is done.
498            ++src;
499            return true;
500        }
501        if (UNLIKELY(!*src)) {
502            // String parsing is done, but don't advance pointer if at the end of input.
503            return true;
504        }
505        ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
506
507        if (LIKELY(src[0] != '\\')) {
508            *result++ = *src++;
509        } else if (src[1] == '\n' || src[1] == '\f') {
510            src += 2;
511        } else if (src[1] == '\r') {
512            src += src[2] == '\n' ? 3 : 2;
513        } else {
514            SrcCharacterType* savedEscapeStart = src;
515            unsigned unicode = parseEscape<SrcCharacterType>(src);
516            if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
517                src = savedEscapeStart;
518                return false;
519            }
520            UnicodeToChars(result, unicode);
521        }
522    }
523
524    return true;
525}
526
527template <typename CharacterType>
528inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
529{
530    CharacterType* start = currentCharacter<CharacterType>();
531
532    if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
533        // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
534        ASSERT(is8BitSource());
535        UChar*& result16 = currentCharacter16();
536        UChar* start16 = result16;
537        int i = 0;
538        for (; i < result - start; i++)
539            result16[i] = start[i];
540
541        result16 += i;
542
543        parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
544
545        resultString.init(start16, result16 - start16);
546        return;
547    }
548
549    resultString.init(start, result - start);
550}
551
552template <typename CharacterType>
553inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
554{
555    start = skipWhiteSpace(currentCharacter<CharacterType>());
556
557    if (*start == '"' || *start == '\'') {
558        quote = *start++;
559        end = checkAndSkipString(start, quote);
560        if (!end)
561            return false;
562    } else {
563        quote = 0;
564        end = start;
565        while (isURILetter(*end)) {
566            if (LIKELY(*end != '\\')) {
567                ++end;
568            } else {
569                end = checkAndSkipEscape(end);
570                if (!end)
571                    return false;
572            }
573        }
574    }
575
576    end = skipWhiteSpace(end);
577    if (*end != ')')
578        return false;
579
580    return true;
581}
582
583template <typename SrcCharacterType, typename DestCharacterType>
584inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
585{
586    if (quote) {
587        ASSERT(quote == '"' || quote == '\'');
588        return parseStringInternal(src, dest, quote);
589    }
590
591    while (isURILetter(*src)) {
592        if (LIKELY(*src != '\\')) {
593            *dest++ = *src++;
594        } else {
595            unsigned unicode = parseEscape<SrcCharacterType>(src);
596            if (unicode > 0xff && sizeof(SrcCharacterType) == 1)
597                return false;
598            UnicodeToChars(dest, unicode);
599        }
600    }
601
602    return true;
603}
604
605template <typename CharacterType>
606inline void CSSTokenizer::parseURI(CSSParserString& string)
607{
608    CharacterType* uriStart;
609    CharacterType* uriEnd;
610    UChar quote;
611    if (!findURI(uriStart, uriEnd, quote))
612        return;
613
614    CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
615    if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
616        string.init(uriStart, dest - uriStart);
617    } else {
618        // An escape sequence was encountered that can't be stored in 8 bits.
619        // Reset the current character to the start of the URI and re-parse with
620        // a 16-bit destination.
621        ASSERT(is8BitSource());
622        UChar* uriStart16 = currentCharacter16();
623        currentCharacter<CharacterType>() = uriStart;
624        bool result = parseURIInternal(currentCharacter<CharacterType>(), currentCharacter16(), quote);
625        ASSERT_UNUSED(result, result);
626        string.init(uriStart16, currentCharacter16() - uriStart16);
627    }
628
629    currentCharacter<CharacterType>() = uriEnd + 1;
630    m_token = URI;
631}
632
633template <typename CharacterType>
634inline bool CSSTokenizer::parseUnicodeRange()
635{
636    CharacterType* character = currentCharacter<CharacterType>() + 1;
637    int length = 6;
638    ASSERT(*currentCharacter<CharacterType>() == '+');
639
640    while (isASCIIHexDigit(*character) && length) {
641        ++character;
642        --length;
643    }
644
645    if (length && *character == '?') {
646        // At most 5 hex digit followed by a question mark.
647        do {
648            ++character;
649            --length;
650        } while (*character == '?' && length);
651        currentCharacter<CharacterType>() = character;
652        return true;
653    }
654
655    if (length < 6) {
656        // At least one hex digit.
657        if (character[0] == '-' && isASCIIHexDigit(character[1])) {
658            // Followed by a dash and a hex digit.
659            ++character;
660            length = 6;
661            do {
662                ++character;
663            } while (--length && isASCIIHexDigit(*character));
664        }
665        currentCharacter<CharacterType>() = character;
666        return true;
667    }
668    return false;
669}
670
671template <typename CharacterType>
672bool CSSTokenizer::parseNthChild()
673{
674    CharacterType* character = currentCharacter<CharacterType>();
675
676    while (isASCIIDigit(*character))
677        ++character;
678    if (isASCIIAlphaCaselessEqual(*character, 'n')) {
679        currentCharacter<CharacterType>() = character + 1;
680        return true;
681    }
682    return false;
683}
684
685template <typename CharacterType>
686bool CSSTokenizer::parseNthChildExtra()
687{
688    CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
689    if (*character != '+' && *character != '-')
690        return false;
691
692    character = skipWhiteSpace(character + 1);
693    if (!isASCIIDigit(*character))
694        return false;
695
696    do {
697        ++character;
698    } while (isASCIIDigit(*character));
699
700    currentCharacter<CharacterType>() = character;
701    return true;
702}
703
704template <typename CharacterType>
705inline bool CSSTokenizer::detectFunctionTypeToken(int length)
706{
707    ASSERT(length > 0);
708    CharacterType* name = tokenStart<CharacterType>();
709    SWITCH(name, length) {
710        CASE("not") {
711            m_token = NOTFUNCTION;
712            return true;
713        }
714        CASE("url") {
715            m_token = URI;
716            return true;
717        }
718        CASE("cue") {
719            m_token = CUEFUNCTION;
720            return true;
721        }
722        CASE("var") {
723            if (!RuntimeEnabledFeatures::cssVariablesEnabled())
724                return false;
725            m_token = VARFUNCTION;
726            return true;
727        }
728        CASE("calc") {
729            m_token = CALCFUNCTION;
730            return true;
731        }
732        CASE("host") {
733            m_token = HOSTFUNCTION;
734            return true;
735        }
736        CASE("nth-child") {
737            m_parsingMode = NthChildMode;
738            return true;
739        }
740        CASE("nth-of-type") {
741            m_parsingMode = NthChildMode;
742            return true;
743        }
744        CASE("nth-last-child") {
745            m_parsingMode = NthChildMode;
746            return true;
747        }
748        CASE("nth-last-of-type") {
749            m_parsingMode = NthChildMode;
750            return true;
751        }
752    }
753    return false;
754}
755
756template <typename CharacterType>
757inline void CSSTokenizer::detectMediaQueryToken(int length)
758{
759    ASSERT(m_parsingMode == MediaQueryMode);
760    CharacterType* name = tokenStart<CharacterType>();
761
762    SWITCH(name, length) {
763        CASE("and") {
764            m_token = MEDIA_AND;
765        }
766        CASE("not") {
767            m_token = MEDIA_NOT;
768        }
769        CASE("only") {
770            m_token = MEDIA_ONLY;
771        }
772        CASE("or") {
773            m_token = MEDIA_OR;
774        }
775    }
776}
777
778template <typename CharacterType>
779inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
780{
781    ASSERT(length > 0);
782
783    SWITCH(type, length) {
784        CASE("cm") {
785            m_token = CMS;
786        }
787        CASE("ch") {
788            m_token = CHS;
789        }
790        CASE("deg") {
791            m_token = DEGS;
792        }
793        CASE("dppx") {
794            // There is a discussion about the name of this unit on www-style.
795            // Keep this compile time guard in place until that is resolved.
796            // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html
797            m_token = DPPX;
798        }
799        CASE("dpcm") {
800            m_token = DPCM;
801        }
802        CASE("dpi") {
803            m_token = DPI;
804        }
805        CASE("em") {
806            m_token = EMS;
807        }
808        CASE("ex") {
809            m_token = EXS;
810        }
811        CASE("fr") {
812            m_token = FR;
813        }
814        CASE("grad") {
815            m_token = GRADS;
816        }
817        CASE("hz") {
818            m_token = HERTZ;
819        }
820        CASE("in") {
821            m_token = INS;
822        }
823        CASE("khz") {
824            m_token = KHERTZ;
825        }
826        CASE("mm") {
827            m_token = MMS;
828        }
829        CASE("ms") {
830            m_token = MSECS;
831        }
832        CASE("px") {
833            m_token = PXS;
834        }
835        CASE("pt") {
836            m_token = PTS;
837        }
838        CASE("pc") {
839            m_token = PCS;
840        }
841        CASE("rad") {
842            m_token = RADS;
843        }
844        CASE("rem") {
845            m_token = REMS;
846        }
847        CASE("s") {
848            m_token = SECS;
849        }
850        CASE("turn") {
851            m_token = TURNS;
852        }
853        CASE("vw") {
854            m_token = VW;
855        }
856        CASE("vh") {
857            m_token = VH;
858        }
859        CASE("vmin") {
860            m_token = VMIN;
861        }
862        CASE("vmax") {
863            m_token = VMAX;
864        }
865        CASE("__qem") {
866            m_token = QEMS;
867        }
868    }
869}
870
871template <typename CharacterType>
872inline void CSSTokenizer::detectDashToken(int length)
873{
874    CharacterType* name = tokenStart<CharacterType>();
875
876    // Ignore leading dash.
877    ++name;
878    --length;
879
880    SWITCH(name, length) {
881        CASE("webkit-any") {
882            m_token = ANYFUNCTION;
883        }
884        CASE("webkit-min") {
885            m_token = MINFUNCTION;
886        }
887        CASE("webkit-max") {
888            m_token = MAXFUNCTION;
889        }
890        CASE("webkit-calc") {
891            m_token = CALCFUNCTION;
892        }
893        CASE("webkit-distributed") {
894            m_token = DISTRIBUTEDFUNCTION;
895        }
896    }
897}
898
899template <typename CharacterType>
900inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
901{
902    CharacterType* name = tokenStart<CharacterType>();
903    ASSERT(name[0] == '@' && length >= 2);
904
905    // Ignore leading @.
906    ++name;
907    --length;
908
909    // charset, font-face, import, media, namespace, page, supports,
910    // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.
911    SWITCH(name, length) {
912        CASE("bottom-left") {
913            if (LIKELY(!hasEscape))
914                m_token = BOTTOMLEFT_SYM;
915        }
916        CASE("bottom-right") {
917            if (LIKELY(!hasEscape))
918                m_token = BOTTOMRIGHT_SYM;
919        }
920        CASE("bottom-center") {
921            if (LIKELY(!hasEscape))
922                m_token = BOTTOMCENTER_SYM;
923        }
924        CASE("bottom-left-corner") {
925            if (LIKELY(!hasEscape))
926                m_token = BOTTOMLEFTCORNER_SYM;
927        }
928        CASE("bottom-right-corner") {
929            if (LIKELY(!hasEscape))
930                m_token = BOTTOMRIGHTCORNER_SYM;
931        }
932        CASE("charset") {
933            if (name - 1 == dataStart<CharacterType>())
934                m_token = CHARSET_SYM;
935        }
936        CASE("font-face") {
937            m_token = FONT_FACE_SYM;
938        }
939        CASE("import") {
940            m_parsingMode = MediaQueryMode;
941            m_token = IMPORT_SYM;
942        }
943        CASE("keyframes") {
944            if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
945                m_token = KEYFRAMES_SYM;
946        }
947        CASE("left-top") {
948            if (LIKELY(!hasEscape))
949                m_token = LEFTTOP_SYM;
950        }
951        CASE("left-middle") {
952            if (LIKELY(!hasEscape))
953                m_token = LEFTMIDDLE_SYM;
954        }
955        CASE("left-bottom") {
956            if (LIKELY(!hasEscape))
957                m_token = LEFTBOTTOM_SYM;
958        }
959        CASE("media") {
960            m_parsingMode = MediaQueryMode;
961            m_token = MEDIA_SYM;
962        }
963        CASE("namespace") {
964            m_token = NAMESPACE_SYM;
965        }
966        CASE("page") {
967            m_token = PAGE_SYM;
968        }
969        CASE("right-top") {
970            if (LIKELY(!hasEscape))
971                m_token = RIGHTTOP_SYM;
972        }
973        CASE("right-middle") {
974            if (LIKELY(!hasEscape))
975                m_token = RIGHTMIDDLE_SYM;
976        }
977        CASE("right-bottom") {
978            if (LIKELY(!hasEscape))
979                m_token = RIGHTBOTTOM_SYM;
980        }
981        CASE("supports") {
982            m_parsingMode = SupportsMode;
983            m_token = SUPPORTS_SYM;
984        }
985        CASE("top-left") {
986            if (LIKELY(!hasEscape))
987                m_token = TOPLEFT_SYM;
988        }
989        CASE("top-right") {
990            if (LIKELY(!hasEscape))
991                m_token = TOPRIGHT_SYM;
992        }
993        CASE("top-center") {
994            if (LIKELY(!hasEscape))
995                m_token = TOPCENTER_SYM;
996        }
997        CASE("top-left-corner") {
998            if (LIKELY(!hasEscape))
999                m_token = TOPLEFTCORNER_SYM;
1000        }
1001        CASE("top-right-corner") {
1002            if (LIKELY(!hasEscape))
1003                m_token = TOPRIGHTCORNER_SYM;
1004        }
1005        CASE("viewport") {
1006            m_token = VIEWPORT_RULE_SYM;
1007        }
1008        CASE("-internal-rule") {
1009            if (LIKELY(!hasEscape && m_internal))
1010                m_token = INTERNAL_RULE_SYM;
1011        }
1012        CASE("-webkit-region") {
1013            if (LIKELY(!hasEscape))
1014                m_token = WEBKIT_REGION_RULE_SYM;
1015        }
1016        CASE("-webkit-filter") {
1017            if (LIKELY(!hasEscape))
1018                m_token = WEBKIT_FILTER_RULE_SYM;
1019        }
1020        CASE("-internal-decls") {
1021            if (LIKELY(!hasEscape && m_internal))
1022                m_token = INTERNAL_DECLS_SYM;
1023        }
1024        CASE("-internal-value") {
1025            if (LIKELY(!hasEscape && m_internal))
1026                m_token = INTERNAL_VALUE_SYM;
1027        }
1028        CASE("-webkit-keyframes") {
1029            m_token = WEBKIT_KEYFRAMES_SYM;
1030        }
1031        CASE("-internal-selector") {
1032            if (LIKELY(!hasEscape && m_internal))
1033                m_token = INTERNAL_SELECTOR_SYM;
1034        }
1035        CASE("-internal-medialist") {
1036            if (!m_internal)
1037                return;
1038            m_parsingMode = MediaQueryMode;
1039            m_token = INTERNAL_MEDIALIST_SYM;
1040        }
1041        CASE("-internal-keyframe-rule") {
1042            if (LIKELY(!hasEscape && m_internal))
1043                m_token = INTERNAL_KEYFRAME_RULE_SYM;
1044        }
1045        CASE("-internal-keyframe-key-list") {
1046            if (!m_internal)
1047                return;
1048            m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
1049        }
1050        CASE("-internal-supports-condition") {
1051            if (!m_internal)
1052                return;
1053            m_parsingMode = SupportsMode;
1054            m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
1055        }
1056    }
1057}
1058
1059template <typename CharacterType>
1060inline void CSSTokenizer::detectSupportsToken(int length)
1061{
1062    ASSERT(m_parsingMode == SupportsMode);
1063    CharacterType* name = tokenStart<CharacterType>();
1064
1065    SWITCH(name, length) {
1066        CASE("or") {
1067            m_token = SUPPORTS_OR;
1068        }
1069        CASE("and") {
1070            m_token = SUPPORTS_AND;
1071        }
1072        CASE("not") {
1073            m_token = SUPPORTS_NOT;
1074        }
1075    }
1076}
1077
1078template <typename CharacterType>
1079inline void CSSTokenizer::detectCSSVariableDefinitionToken(int length)
1080{
1081    static const int prefixLength = static_cast<int>(sizeof("var-") - 1);
1082    if (length <= prefixLength)
1083        return;
1084    CharacterType* name = tokenStart<CharacterType>();
1085    COMPILE_ASSERT(prefixLength > 0, CSS_variable_prefix_must_be_nonempty);
1086    if (name[prefixLength - 1] == '-' && isIdentifierStartAfterDash(name + prefixLength) && isEqualToCSSCaseSensitiveIdentifier(name, "var"))
1087        m_token = VAR_DEFINITION;
1088}
1089
1090template <typename SrcCharacterType>
1091int CSSTokenizer::realLex(void* yylvalWithoutType)
1092{
1093    YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
1094    // Write pointer for the next character.
1095    SrcCharacterType* result;
1096    CSSParserString resultString;
1097    bool hasEscape;
1098
1099    // The input buffer is terminated by a \0 character, so
1100    // it is safe to read one character ahead of a known non-null.
1101#ifndef NDEBUG
1102    // In debug we check with an ASSERT that the length is > 0 for string types.
1103    yylval->string.clear();
1104#endif
1105
1106restartAfterComment:
1107    result = currentCharacter<SrcCharacterType>();
1108    setTokenStart(result);
1109    m_tokenStartLineNumber = m_lineNumber;
1110    m_token = *currentCharacter<SrcCharacterType>();
1111    ++currentCharacter<SrcCharacterType>();
1112
1113    switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
1114    case CharacterCaselessU:
1115        if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
1116            if (parseUnicodeRange<SrcCharacterType>()) {
1117                m_token = UNICODERANGE;
1118                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1119                break;
1120            }
1121        }
1122        // Fall through to CharacterIdentifierStart.
1123
1124    case CharacterIdentifierStart:
1125        --currentCharacter<SrcCharacterType>();
1126        parseIdentifier(result, yylval->string, hasEscape);
1127        m_token = IDENT;
1128
1129        if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
1130            if (m_parsingMode == SupportsMode && !hasEscape) {
1131                detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1132                if (m_token != IDENT)
1133                    break;
1134            }
1135
1136            m_token = FUNCTION;
1137            if (!hasEscape)
1138                detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1139
1140            // Skip parenthesis
1141            ++currentCharacter<SrcCharacterType>();
1142            ++result;
1143            ++yylval->string.m_length;
1144
1145            if (m_token == URI) {
1146                m_token = FUNCTION;
1147                // Check whether it is really an URI.
1148                if (yylval->string.is8Bit())
1149                    parseURI<LChar>(yylval->string);
1150                else
1151                    parseURI<UChar>(yylval->string);
1152            }
1153        } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
1154            if (m_parsingMode == MediaQueryMode) {
1155                detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1156            } else if (m_parsingMode == SupportsMode) {
1157                detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1158            } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
1159                if (result - tokenStart<SrcCharacterType>() == 1) {
1160                    // String "n" is IDENT but "n+1" is NTH.
1161                    if (parseNthChildExtra<SrcCharacterType>()) {
1162                        m_token = NTH;
1163                        yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
1164                    }
1165                } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
1166                    // String "n-" is IDENT but "n-1" is NTH.
1167                    // Set currentCharacter to '-' to continue parsing.
1168                    SrcCharacterType* nextCharacter = result;
1169                    currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
1170                    if (parseNthChildExtra<SrcCharacterType>()) {
1171                        m_token = NTH;
1172                        yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1173                    } else {
1174                        // Revert the change to currentCharacter if unsuccessful.
1175                        currentCharacter<SrcCharacterType>() = nextCharacter;
1176                    }
1177                }
1178            }
1179        } else if (UNLIKELY(RuntimeEnabledFeatures::cssVariablesEnabled())) {
1180            detectCSSVariableDefinitionToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1181        }
1182        break;
1183
1184    case CharacterDot:
1185        if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
1186            break;
1187        // Fall through to CharacterNumber.
1188
1189    case CharacterNumber: {
1190        bool dotSeen = (m_token == '.');
1191
1192        while (true) {
1193            if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
1194                // Only one dot is allowed for a number,
1195                // and it must be followed by a digit.
1196                if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
1197                    break;
1198                dotSeen = true;
1199            }
1200            ++currentCharacter<SrcCharacterType>();
1201        }
1202
1203        if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
1204            // "[0-9]+n" is always an NthChild.
1205            ++currentCharacter<SrcCharacterType>();
1206            parseNthChildExtra<SrcCharacterType>();
1207            m_token = NTH;
1208            yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1209            break;
1210        }
1211
1212        // Use SVG parser for numbers on SVG presentation attributes.
1213        if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
1214            // We need to take care of units like 'em' or 'ex'.
1215            SrcCharacterType* character = currentCharacter<SrcCharacterType>();
1216            if (isASCIIAlphaCaselessEqual(*character, 'e')) {
1217                ASSERT(character - tokenStart<SrcCharacterType>() > 0);
1218                ++character;
1219                if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
1220                    ++character;
1221                    while (isASCIIDigit(*character))
1222                        ++character;
1223                    // Use FLOATTOKEN if the string contains exponents.
1224                    dotSeen = true;
1225                    currentCharacter<SrcCharacterType>() = character;
1226                }
1227            }
1228            if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
1229                break;
1230        } else {
1231            yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1232        }
1233
1234        // Type of the function.
1235        if (isIdentifierStart<SrcCharacterType>()) {
1236            SrcCharacterType* type = currentCharacter<SrcCharacterType>();
1237            result = currentCharacter<SrcCharacterType>();
1238
1239            parseIdentifier(result, resultString, hasEscape);
1240
1241            m_token = DIMEN;
1242            if (!hasEscape)
1243                detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);
1244
1245            if (m_token == DIMEN) {
1246                // The decoded number is overwritten, but this is intentional.
1247                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1248            }
1249        } else if (*currentCharacter<SrcCharacterType>() == '%') {
1250            // Although the CSS grammar says {num}% we follow
1251            // webkit at the moment which uses {num}%+.
1252            do {
1253                ++currentCharacter<SrcCharacterType>();
1254            } while (*currentCharacter<SrcCharacterType>() == '%');
1255            m_token = PERCENTAGE;
1256        } else {
1257            m_token = dotSeen ? FLOATTOKEN : INTEGER;
1258        }
1259        break;
1260    }
1261
1262    case CharacterDash:
1263        if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
1264            --currentCharacter<SrcCharacterType>();
1265            parseIdentifier(result, resultString, hasEscape);
1266            m_token = IDENT;
1267
1268            if (*currentCharacter<SrcCharacterType>() == '(') {
1269                m_token = FUNCTION;
1270                if (!hasEscape)
1271                    detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
1272                ++currentCharacter<SrcCharacterType>();
1273                ++result;
1274            } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
1275                if (result - tokenStart<SrcCharacterType>() == 2) {
1276                    // String "-n" is IDENT but "-n+1" is NTH.
1277                    if (parseNthChildExtra<SrcCharacterType>()) {
1278                        m_token = NTH;
1279                        result = currentCharacter<SrcCharacterType>();
1280                    }
1281                } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
1282                    // String "-n-" is IDENT but "-n-1" is NTH.
1283                    // Set currentCharacter to second '-' of '-n-' to continue parsing.
1284                    SrcCharacterType* nextCharacter = result;
1285                    currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
1286                    if (parseNthChildExtra<SrcCharacterType>()) {
1287                        m_token = NTH;
1288                        result = currentCharacter<SrcCharacterType>();
1289                    } else {
1290                        // Revert the change to currentCharacter if unsuccessful.
1291                        currentCharacter<SrcCharacterType>() = nextCharacter;
1292                    }
1293                }
1294            }
1295            resultString.setLength(result - tokenStart<SrcCharacterType>());
1296            yylval->string = resultString;
1297        } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
1298            currentCharacter<SrcCharacterType>() += 2;
1299            m_token = SGML_CD;
1300        } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
1301            // "-[0-9]+n" is always an NthChild.
1302            if (parseNthChild<SrcCharacterType>()) {
1303                parseNthChildExtra<SrcCharacterType>();
1304                m_token = NTH;
1305                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1306            }
1307        }
1308        break;
1309
1310    case CharacterOther:
1311        // m_token is simply the current character.
1312        break;
1313
1314    case CharacterNull:
1315        // Do not advance pointer at the end of input.
1316        --currentCharacter<SrcCharacterType>();
1317        break;
1318
1319    case CharacterWhiteSpace:
1320        m_token = WHITESPACE;
1321        // Might start with a '\n'.
1322        --currentCharacter<SrcCharacterType>();
1323        do {
1324            if (*currentCharacter<SrcCharacterType>() == '\n')
1325                ++m_lineNumber;
1326            ++currentCharacter<SrcCharacterType>();
1327        } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
1328        break;
1329
1330    case CharacterEndMediaQueryOrSupports:
1331        if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
1332            m_parsingMode = NormalMode;
1333        break;
1334
1335    case CharacterEndNthChild:
1336        if (m_parsingMode == NthChildMode)
1337            m_parsingMode = NormalMode;
1338        break;
1339
1340    case CharacterQuote:
1341        if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token)) {
1342            ++result;
1343            parseString<SrcCharacterType>(result, yylval->string, m_token);
1344            m_token = STRING;
1345        }
1346        break;
1347
1348    case CharacterExclamationMark: {
1349        SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
1350        if (isEqualToCSSIdentifier(start, "important")) {
1351            m_token = IMPORTANT_SYM;
1352            currentCharacter<SrcCharacterType>() = start + 9;
1353        }
1354        break;
1355    }
1356
1357    case CharacterHashmark: {
1358        SrcCharacterType* start = currentCharacter<SrcCharacterType>();
1359        result = currentCharacter<SrcCharacterType>();
1360
1361        if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
1362            // This must be a valid hex number token.
1363            do {
1364                ++currentCharacter<SrcCharacterType>();
1365            } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
1366            m_token = HEX;
1367            yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
1368        } else if (isIdentifierStart<SrcCharacterType>()) {
1369            m_token = IDSEL;
1370            parseIdentifier(result, yylval->string, hasEscape);
1371            if (!hasEscape) {
1372                // Check whether the identifier is also a valid hex number.
1373                SrcCharacterType* current = start;
1374                m_token = HEX;
1375                do {
1376                    if (!isASCIIHexDigit(*current)) {
1377                        m_token = IDSEL;
1378                        break;
1379                    }
1380                    ++current;
1381                } while (current < result);
1382            }
1383        }
1384        break;
1385    }
1386
1387    case CharacterSlash:
1388        // Ignore comments. They are not even considered as white spaces.
1389        if (*currentCharacter<SrcCharacterType>() == '*') {
1390            const CSSParserLocation startLocation = currentLocation();
1391            if (m_parser.m_sourceDataHandler) {
1392                unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash.
1393                m_parser.m_sourceDataHandler->startComment(startOffset - m_parsedTextPrefixLength);
1394            }
1395            ++currentCharacter<SrcCharacterType>();
1396            while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
1397                if (*currentCharacter<SrcCharacterType>() == '\n')
1398                    ++m_lineNumber;
1399                if (*currentCharacter<SrcCharacterType>() == '\0') {
1400                    // Unterminated comments are simply ignored.
1401                    currentCharacter<SrcCharacterType>() -= 2;
1402                    m_parser.reportError(startLocation, CSSParser::UnterminatedCommentError);
1403                    break;
1404                }
1405                ++currentCharacter<SrcCharacterType>();
1406            }
1407            currentCharacter<SrcCharacterType>() += 2;
1408            if (m_parser.m_sourceDataHandler) {
1409                unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
1410                unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
1411                m_parser.m_sourceDataHandler->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
1412            }
1413            goto restartAfterComment;
1414        }
1415        break;
1416
1417    case CharacterDollar:
1418        if (*currentCharacter<SrcCharacterType>() == '=') {
1419            ++currentCharacter<SrcCharacterType>();
1420            m_token = ENDSWITH;
1421        }
1422        break;
1423
1424    case CharacterAsterisk:
1425        if (*currentCharacter<SrcCharacterType>() == '=') {
1426            ++currentCharacter<SrcCharacterType>();
1427            m_token = CONTAINS;
1428        }
1429        break;
1430
1431    case CharacterPlus:
1432        if (UNLIKELY(m_parsingMode == NthChildMode)) {
1433            // Simplest case. "+[0-9]*n" is always NthChild.
1434            if (parseNthChild<SrcCharacterType>()) {
1435                parseNthChildExtra<SrcCharacterType>();
1436                m_token = NTH;
1437                yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
1438            }
1439        }
1440        break;
1441
1442    case CharacterLess:
1443        if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
1444            currentCharacter<SrcCharacterType>() += 3;
1445            m_token = SGML_CD;
1446        }
1447        break;
1448
1449    case CharacterAt:
1450        if (isIdentifierStart<SrcCharacterType>()) {
1451            m_token = ATKEYWORD;
1452            ++result;
1453            parseIdentifier(result, resultString, hasEscape);
1454            detectAtToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>(), hasEscape);
1455        }
1456        break;
1457
1458    case CharacterBackSlash:
1459        if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
1460            --currentCharacter<SrcCharacterType>();
1461            parseIdentifier(result, yylval->string, hasEscape);
1462            m_token = IDENT;
1463        }
1464        break;
1465
1466    case CharacterXor:
1467        if (*currentCharacter<SrcCharacterType>() == '=') {
1468            ++currentCharacter<SrcCharacterType>();
1469            m_token = BEGINSWITH;
1470        }
1471        break;
1472
1473    case CharacterVerticalBar:
1474        if (*currentCharacter<SrcCharacterType>() == '=') {
1475            ++currentCharacter<SrcCharacterType>();
1476            m_token = DASHMATCH;
1477        }
1478        break;
1479
1480    case CharacterTilde:
1481        if (*currentCharacter<SrcCharacterType>() == '=') {
1482            ++currentCharacter<SrcCharacterType>();
1483            m_token = INCLUDES;
1484        }
1485        break;
1486
1487    default:
1488        ASSERT_NOT_REACHED();
1489        break;
1490    }
1491
1492    return m_token;
1493}
1494
1495template <>
1496inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
1497{
1498    m_tokenStart.ptr8 = tokenStart;
1499}
1500
1501template <>
1502inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
1503{
1504    m_tokenStart.ptr16 = tokenStart;
1505}
1506
1507void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
1508{
1509    m_parsedTextPrefixLength = prefixLength;
1510    m_parsedTextSuffixLength = suffixLength;
1511    unsigned stringLength = string.length();
1512    unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
1513    m_length = length;
1514
1515    if (!stringLength || string.is8Bit()) {
1516        m_dataStart8 = adoptArrayPtr(new LChar[length]);
1517        for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
1518            m_dataStart8[i] = prefix[i];
1519
1520        if (stringLength)
1521            memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));
1522
1523        unsigned start = m_parsedTextPrefixLength + stringLength;
1524        unsigned end = start + suffixLength;
1525        for (unsigned i = start; i < end; i++)
1526            m_dataStart8[i] = suffix[i - start];
1527
1528        m_dataStart8[length - 1] = 0;
1529
1530        m_is8BitSource = true;
1531        m_currentCharacter8 = m_dataStart8.get();
1532        m_currentCharacter16 = 0;
1533        setTokenStart<LChar>(m_currentCharacter8);
1534        m_lexFunc = &CSSTokenizer::realLex<LChar>;
1535        return;
1536    }
1537
1538    m_dataStart16 = adoptArrayPtr(new UChar[length]);
1539    for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
1540        m_dataStart16[i] = prefix[i];
1541
1542    ASSERT(stringLength);
1543    memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));
1544
1545    unsigned start = m_parsedTextPrefixLength + stringLength;
1546    unsigned end = start + suffixLength;
1547    for (unsigned i = start; i < end; i++)
1548        m_dataStart16[i] = suffix[i - start];
1549
1550    m_dataStart16[length - 1] = 0;
1551
1552    m_is8BitSource = false;
1553    m_currentCharacter8 = 0;
1554    m_currentCharacter16 = m_dataStart16.get();
1555    setTokenStart<UChar>(m_currentCharacter16);
1556    m_lexFunc = &CSSTokenizer::realLex<UChar>;
1557}
1558
1559} // namespace WebCore
1560