1// Copyright (c) 2013, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.util.Arrays;
32import java.util.EnumMap;
33import java.util.Random;
34import java.util.regex.Pattern;
35
36import org.junit.Test;
37import org.owasp.html.CssTokens.TokenType;
38
39import com.google.common.collect.Maps;
40
41public class CssFuzzerTest extends FuzzyTestCase {
42
43  private static final String[] TOKEN_PARTS = new String[] {
44    "'", "\"", "<!--", "-->", "/*", "*/", "***", "//", "\r", "\n",
45    "<", ">", "/", ",", ";", ":", "(", "url", "Url", ")", "[", "]", "{", "}",
46    "\\", "\\a", "\\d", "\\0", " ", "\t", "42", ".", "ex", "auto", "foo", "BAr",
47    "important", "!", "\ufeff", "\u0000", "\u00a0", "\ufffd", "\ud801\udc02",
48    "\u007f", "\u000c", "CDATA", "style"
49  };
50
51  private static final String[] FREQUENT_TOKEN_PARTS = new String[] {
52    "*/", " ", "\t", "\r", "\n",
53  };
54
55  private static final String[] DISALLOWED_IN_OUTPUT = {
56    "</style", "<![CDATA[", "]]>", "\r", "\n",
57  };
58
59  final class Watcher implements Runnable {
60    String input;
61    long started;
62
63    public void run() {
64      synchronized (this) {
65        try {
66          while (true) {
67            this.wait(1000 /* ms = 1s */);
68            String input = this.input;
69            if (input == null) { break; }  // Done
70            long started = this.started;
71            long now = System.nanoTime();
72            if (now - started >= 1000000000L /* ns = 1s */) {
73              System.err.println(
74                  "`" + input + "` is slow. seed=" + CssFuzzerTest.this.seed);
75            }
76          }
77        } catch (InterruptedException ex) {
78          // Done
79        }
80      }
81    }
82  }
83
84  @Test
85  public final void testUnderStress() {
86    Random r = this.rnd;
87    Watcher watcher = new Watcher();
88    Thread watcherThread = null;
89    for (int run = 0, nRuns = (1 << 16); run < nRuns; ++run) {
90      // Compose a random string from token parts.
91      StringBuilder sb = new StringBuilder();
92      int nParts = r.nextInt(64) + 16;
93      for (int j = nParts; --j >= 0;) {
94        int die = r.nextInt(32);
95        switch (die) {
96        case 0: sb.append((char) rnd.nextInt(0x80)); break;
97        case 1: sb.append((char) rnd.nextInt(0x1800)); break;
98        default:
99          String[] arr = (die & 1) != 0 ? TOKEN_PARTS : FREQUENT_TOKEN_PARTS;
100          sb.append(arr[rnd.nextInt(arr.length)]);
101          break;
102        }
103      }
104      String randomCss = sb.toString();
105
106      synchronized (watcher) {
107        watcher.input = randomCss;
108        watcher.started = System.nanoTime();
109      }
110      if (watcherThread == null) {
111        watcherThread = new Thread(watcher);
112        watcherThread.setDaemon(true);
113        watcherThread.start();
114      }
115
116      String msg = "seed=" + this.seed + ", css=`" + randomCss + "`";
117      CssTokens tokens = CssTokens.lex(randomCss);
118
119      // Test idempotent
120      String renormalized = CssTokens.lex(tokens.normalizedCss).normalizedCss;
121      if (!renormalized.equals(tokens.normalizedCss)) {
122        if (!renormalized.equals(fixDigitSpaceUnit(tokens))) {
123          for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();
124               it.advance()) {
125            System.err.println(it.token() + ":" + it.type());
126          }
127          assertEquals(
128              "not idempotent, " + msg,
129              tokens.normalizedCss,
130              renormalized);
131        }
132      }
133
134      // Test normalized CSS does not contain HTML/XML breaking tokens.
135      for (String disallowed : DISALLOWED_IN_OUTPUT) {
136        assertFalse(
137            "contains " + disallowed + ", " + msg,
138            tokens.normalizedCss.contains(disallowed));
139      }
140
141      // Test that tokens are roughly well-formed.
142      int nTokens = 0;
143      for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
144        CssTokens.TokenType type = it.type();
145        String token = it.next();
146        Pattern filter = TOKEN_TYPE_FILTERS.get(type);
147        if (filter != null && !filter.matcher(token).matches()) {
148          fail(type + " `" + token + "`, " + msg);
149        }
150        ++nTokens;
151      }
152
153      // Test that walking the bracket list works.
154      int[] reverse = new int[nTokens];
155      Arrays.fill(reverse, -1);
156      for (int j = 0; j < nTokens; ++j) {
157        int partner = tokens.brackets.partner(j);
158        if (partner != -1) {
159          reverse[partner] = j;
160        }
161      }
162      for (int j = 0; j < nTokens; ++j) {
163        if (reverse[j] != -1) {
164          assertEquals(msg, reverse[reverse[j]], j);
165        }
166      }
167    }
168    synchronized (watcher) {
169      watcher.input = null;
170      watcher.notifyAll();
171    }
172  }
173
174  private static final EnumMap<CssTokens.TokenType, Pattern> TOKEN_TYPE_FILTERS
175    = Maps.newEnumMap(CssTokens.TokenType.class);
176  static {
177    String NUMBER = "-?(?:0|[1-9][0-9]*)(?:\\.[0-9]*[1-9])?(?:e-?[1-9][0-9]*)?";
178    String IDENT_START = "[a-zA-Z_\\u0080-\udbff\udfff\\-]";
179    String IDENT_PART = "(?:" + IDENT_START + "|[0-9])";
180    String IDENT = IDENT_START + IDENT_PART + "*";
181    TOKEN_TYPE_FILTERS.put(
182        CssTokens.TokenType.AT, Pattern.compile("@" + IDENT));
183    TOKEN_TYPE_FILTERS.put(
184        CssTokens.TokenType.COLON, Pattern.compile(":"));
185    TOKEN_TYPE_FILTERS.put(
186        CssTokens.TokenType.COLUMN, Pattern.compile("\\|\\|"));
187    TOKEN_TYPE_FILTERS.put(
188        CssTokens.TokenType.COMMA, Pattern.compile(","));
189    TOKEN_TYPE_FILTERS.put(
190        CssTokens.TokenType.DELIM,
191        Pattern.compile("[^\\w\u0000- \u0080-\uffff\\-]"));
192    TOKEN_TYPE_FILTERS.put(
193        CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+"));
194    TOKEN_TYPE_FILTERS.put(
195        CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\." + IDENT));
196    TOKEN_TYPE_FILTERS.put(
197        CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]"));
198    TOKEN_TYPE_FILTERS.put(
199        CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+"));
200    TOKEN_TYPE_FILTERS.put(
201        CssTokens.TokenType.HASH_UNRESTRICTED,
202        Pattern.compile("#[a-fA-F0-9]+"));
203    TOKEN_TYPE_FILTERS.put(
204        CssTokens.TokenType.IDENT,
205        Pattern.compile(IDENT));
206    TOKEN_TYPE_FILTERS.put(
207        CssTokens.TokenType.LEFT_CURLY,
208        Pattern.compile("[{]"));
209    TOKEN_TYPE_FILTERS.put(
210        CssTokens.TokenType.LEFT_PAREN,
211        Pattern.compile("[(]"));
212    TOKEN_TYPE_FILTERS.put(
213        CssTokens.TokenType.LEFT_SQUARE,
214        Pattern.compile("[\\[]"));
215    TOKEN_TYPE_FILTERS.put(
216        CssTokens.TokenType.MATCH,
217        Pattern.compile("[~^$|*]="));
218    TOKEN_TYPE_FILTERS.put(
219        CssTokens.TokenType.NUMBER,
220        Pattern.compile(NUMBER));
221    TOKEN_TYPE_FILTERS.put(
222        CssTokens.TokenType.PERCENTAGE,
223        Pattern.compile(NUMBER + "%"));
224    TOKEN_TYPE_FILTERS.put(
225        CssTokens.TokenType.RIGHT_CURLY,
226        Pattern.compile("[}]"));
227    TOKEN_TYPE_FILTERS.put(
228        CssTokens.TokenType.RIGHT_PAREN,
229        Pattern.compile("[)]"));
230    TOKEN_TYPE_FILTERS.put(
231        CssTokens.TokenType.RIGHT_SQUARE,
232        Pattern.compile("[\\]]"));
233    TOKEN_TYPE_FILTERS.put(
234        CssTokens.TokenType.SEMICOLON,
235        Pattern.compile(";"));
236    TOKEN_TYPE_FILTERS.put(
237        CssTokens.TokenType.STRING,
238        Pattern.compile("'(?:[^'\r\n\\\\]|\\\\[^\r\n])*'"));
239    TOKEN_TYPE_FILTERS.put(
240        CssTokens.TokenType.UNICODE_RANGE,
241        Pattern.compile("U\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\?{0,5})?"));
242    TOKEN_TYPE_FILTERS.put(
243        CssTokens.TokenType.URL,
244        Pattern.compile("url\\('[0-9A-Za-z\\-_.~:/?#\\[\\]@!$&+,;=%]*'\\)"));
245    TOKEN_TYPE_FILTERS.put(
246        CssTokens.TokenType.WHITESPACE,
247        Pattern.compile(" "));
248  }
249
250  /**
251   * "1:NUMBER ex:IDENT" -> "1ex:DIMENSION" is a common source source of
252   * a-idempotency, but not one that causes problems in practice.
253   * This hack helps ignore it.
254   */
255  static String fixDigitSpaceUnit(CssTokens tokens) {
256    StringBuilder sb = new StringBuilder();
257    for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
258      if (it.type() != TokenType.NUMBER) {
259        sb.append(it.next());
260      } else {
261        do {
262          sb.append(it.next());
263        } while (it.hasNext() && it.type() == TokenType.NUMBER);
264        if (it.hasNext() && it.type() == TokenType.WHITESPACE) {
265          it.advance();
266          String numberFollower = null;
267          if (it.hasNext()) {
268            String token = it.token();
269            switch (it.type()) {
270              case IDENT:
271                if (CssTokens.isWellKnownUnit(token)) {
272                  numberFollower = token;
273                  it.advance();
274                  if (it.hasNext() && it.token().startsWith(".")) {
275                    numberFollower += " ";
276                  }
277                  it.backup();
278                }
279                break;
280              case FUNCTION:
281                String name = token.substring(0, token.length() - 1);
282                if (CssTokens.isWellKnownUnit(name)) {
283                  numberFollower = token;
284                }
285                break;
286              case DELIM:
287                if ("%".equals(token)) {
288                  numberFollower = token;
289                }
290                break;
291              default: break;
292            }
293          }
294          if (numberFollower == null) {
295            sb.append(' ');
296          } else {
297            // Drop the space and append a lower-case version of the unit.
298            sb.append(Strings.toLowerCase(numberFollower));
299            it.advance();
300          }
301        }
302      }
303    }
304    return sb.toString();
305  }
306}
307