1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31final class CssGrammar {
32
33  private static void errorRecoveryUntilSemiOrCloseBracket(
34      CssTokens.TokenIterator it) {
35    int bracketDepth = 0;
36    for (; it.hasNext(); it.advance()) {
37      switch (it.type()) {
38        case SEMICOLON:
39          it.advance();
40          return;
41        case LEFT_CURLY:
42        case LEFT_PAREN:
43        case LEFT_SQUARE:
44          ++bracketDepth;
45          break;
46        case RIGHT_CURLY:
47        case RIGHT_PAREN:
48        case RIGHT_SQUARE:
49          --bracketDepth;
50          if (bracketDepth <= 0) {
51            if (bracketDepth != 0) { it.advance(); }
52            return;
53          }
54          break;
55        default:
56          break;
57      }
58    }
59  }
60
61  static void parsePropertyGroup(String css, PropertyHandler handler) {
62    // Split tokens by semicolons/curly-braces, then by first colon,
63    // dropping spaces and comments to identify property names and token runs
64    // that form the value.
65
66    CssTokens tokens = CssTokens.lex(css);
67    CssTokens.TokenIterator it = tokens.iterator();
68    propertyNameLoop:
69    while (it.hasTokenAfterSpace()) {
70      // Check that we have an identifier that might be a property name.
71      if (it.type() != CssTokens.TokenType.IDENT) {
72        errorRecoveryUntilSemiOrCloseBracket(it);
73        continue;
74      }
75
76      String name = it.next();
77
78      // Look for a colon.
79      if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) {
80        errorRecoveryUntilSemiOrCloseBracket(it);
81        continue propertyNameLoop;
82      }
83      it.advance();
84
85      handler.startProperty(Strings.toLowerCase(name));
86      parsePropertyValue(it, handler);
87      handler.endProperty();
88    }
89  }
90
91  private static void parsePropertyValue(
92      CssTokens.TokenIterator it, PropertyHandler handler) {
93    propertyValueLoop:
94    while (it.hasNext()) {
95      CssTokens.TokenType type = it.type();
96      String token = it.token();
97      switch (type) {
98        case SEMICOLON:
99          it.advance();
100          break propertyValueLoop;
101        case FUNCTION:
102          CssTokens.TokenIterator actuals = it.spliceToEnd();
103          handler.startFunction(token);
104          parsePropertyValue(actuals, handler);
105          handler.endFunction(token);
106          continue;  // Skip the advance over token.
107        case IDENT:
108          handler.identifier(token);
109          break;
110        case HASH_UNRESTRICTED:
111          if (token.length() == 4 || token.length() == 7) {
112            handler.hash(token);
113          }
114          break;
115        case STRING:
116          handler.quotedString(token);
117          break;
118        case URL:
119          handler.url(token);
120          break;
121        case DIMENSION:
122        case NUMBER:
123        case PERCENTAGE:
124          handler.quantity(token);
125          break;
126        case AT:
127        case BAD_DIMENSION:
128        case COLUMN:
129        case DOT_IDENT:
130        case HASH_ID:
131        case MATCH:
132        case UNICODE_RANGE:
133        case WHITESPACE:
134          break;
135        case LEFT_CURLY:
136        case LEFT_PAREN:
137        case LEFT_SQUARE:
138        case RIGHT_CURLY:
139        case RIGHT_PAREN:
140        case RIGHT_SQUARE:
141        case COMMA:
142        case COLON:
143        case DELIM:
144          handler.punctuation(token);
145          break;
146      }
147      it.advance();
148    }
149  }
150
151  /**
152   * Decodes any escape sequences and strips any quotes from the input.
153   */
154  static String cssContent(String token) {
155    int n = token.length();
156    int pos = 0;
157    StringBuilder sb = null;
158    if (n >= 2) {
159      char ch0 = token.charAt(0);
160      if (ch0 == '"' || ch0 == '\'') {
161        if (ch0 == token.charAt(n - 1)) {
162          pos = 1;
163          --n;
164          sb = new StringBuilder(n);
165        }
166      }
167    }
168    for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) {
169      int end = esc + 2;
170      if (esc > n) { break; }
171      if (sb == null) { sb = new StringBuilder(n); }
172      sb.append(token, pos, esc);
173      int codepoint = token.charAt(end - 1);
174      if (isHex(codepoint)) {
175        // Parse \hhhhh<opt-break> where hhhhh is one or more hex digits
176        // and <opt-break> is an optional space or tab character that can be
177        // used to separate an escape sequence from a following literal hex
178        // digit.
179        while (end < n && isHex(token.charAt(end))) { ++end; }
180        try {
181          codepoint = Integer.parseInt(token.substring(esc + 1, end), 16);
182        } catch (RuntimeException ex) {
183          codepoint = 0xfffd;  // Unknown codepoint.
184        }
185        if (end < n) {
186          char ch = token.charAt(end);
187          if (ch == ' ' || ch == '\t') {  // Ignorable hex follower.
188            ++end;
189          }
190        }
191      }
192      sb.appendCodePoint(codepoint);
193      pos = end;
194    }
195    if (sb == null) { return token; }
196    return sb.append(token, pos, n).toString();
197  }
198
199  private static boolean isHex(int codepoint) {
200    return ('0' <= codepoint && codepoint <= '9')
201        || ('A' <= codepoint && codepoint <= 'F')
202        || ('a' <= codepoint && codepoint <= 'f');
203  }
204
205  interface PropertyHandler {
206    void startProperty(String propertyName);
207    void quantity(String token);
208    void identifier(String token);
209    void hash(String token);
210    void quotedString(String token);
211    void url(String token);
212    void punctuation(String token);
213    void startFunction(String token);
214    void endFunction(String token);
215    void endProperty();
216  }
217
218}
219