15d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// Copyright (c) 2013, Mike Samuel
25d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// All rights reserved.
35d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel//
45d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// Redistribution and use in source and binary forms, with or without
55d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// modification, are permitted provided that the following conditions
65d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// are met:
75d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel//
85d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// Redistributions of source code must retain the above copyright
95d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// notice, this list of conditions and the following disclaimer.
105d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// Redistributions in binary form must reproduce the above copyright
115d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// notice, this list of conditions and the following disclaimer in the
125d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// documentation and/or other materials provided with the distribution.
135d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// Neither the name of the OWASP nor the names of its contributors may
145d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// be used to endorse or promote products derived from this software
155d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// without specific prior written permission.
165d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
175d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
185d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
195d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
205d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
215d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
225d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
235d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
245d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
255d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
265d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
275d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel// POSSIBILITY OF SUCH DAMAGE.
285d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
295d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelpackage org.owasp.html;
305d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
315d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport java.util.Arrays;
325d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport java.util.EnumMap;
335d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport java.util.Random;
345d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport java.util.regex.Pattern;
355d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
365d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport org.junit.Test;
375d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport org.owasp.html.CssTokens.TokenType;
385d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
395d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelimport com.google.common.collect.Maps;
405d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
415d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuelpublic class CssFuzzerTest extends FuzzyTestCase {
425d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
435d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  private static final String[] TOKEN_PARTS = new String[] {
445d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "'", "\"", "<!--", "-->", "/*", "*/", "***", "//", "\r", "\n",
455d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "<", ">", "/", ",", ";", ":", "(", "url", "Url", ")", "[", "]", "{", "}",
465d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "\\", "\\a", "\\d", "\\0", " ", "\t", "42", ".", "ex", "auto", "foo", "BAr",
475d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "important", "!", "\ufeff", "\u0000", "\u00a0", "\ufffd", "\ud801\udc02",
485d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "\u007f", "\u000c", "CDATA", "style"
495d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  };
505d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
515d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  private static final String[] FREQUENT_TOKEN_PARTS = new String[] {
525d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "*/", " ", "\t", "\r", "\n",
535d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  };
545d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
555d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  private static final String[] DISALLOWED_IN_OUTPUT = {
565d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    "</style", "<![CDATA[", "]]>", "\r", "\n",
575d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  };
585d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
595d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  final class Watcher implements Runnable {
605d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    String input;
615d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    long started;
625d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
635d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    public void run() {
645d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      synchronized (this) {
6544d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel        try {
6644d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel          while (true) {
6744d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            this.wait(1000 /* ms = 1s */);
6844d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            String input = this.input;
6944d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            if (input == null) { break; }  // Done
7044d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            long started = this.started;
7144d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            long now = System.nanoTime();
7244d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            if (now - started >= 1000000000L /* ns = 1s */) {
7344d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel              System.err.println(
7444d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel                  "`" + input + "` is slow. seed=" + CssFuzzerTest.this.seed);
7544d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel            }
765d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          }
7744d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel        } catch (InterruptedException ex) {
7844d782b87eb0aa32d8cf12a3d39be9de05b3c544mikesamuel          // Done
795d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
805d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
815d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    }
825d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  }
835d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
845d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  @Test
855d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  public final void testUnderStress() {
865d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    Random r = this.rnd;
875d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    Watcher watcher = new Watcher();
885d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    Thread watcherThread = null;
895d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    for (int run = 0, nRuns = (1 << 16); run < nRuns; ++run) {
905d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      // Compose a random string from token parts.
915d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      StringBuilder sb = new StringBuilder();
925d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      int nParts = r.nextInt(64) + 16;
935d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      for (int j = nParts; --j >= 0;) {
945d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        int die = r.nextInt(32);
955d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        switch (die) {
965d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        case 0: sb.append((char) rnd.nextInt(0x80)); break;
975d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        case 1: sb.append((char) rnd.nextInt(0x1800)); break;
985d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        default:
995d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          String[] arr = (die & 1) != 0 ? TOKEN_PARTS : FREQUENT_TOKEN_PARTS;
1005d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          sb.append(arr[rnd.nextInt(arr.length)]);
1015d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          break;
1025d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
1035d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1045d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      String randomCss = sb.toString();
1055d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1065d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      synchronized (watcher) {
1075d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        watcher.input = randomCss;
1085d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        watcher.started = System.nanoTime();
1095d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1105d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      if (watcherThread == null) {
1115d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        watcherThread = new Thread(watcher);
1125d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        watcherThread.setDaemon(true);
1135d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        watcherThread.start();
1145d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1155d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1165d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      String msg = "seed=" + this.seed + ", css=`" + randomCss + "`";
1175d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      CssTokens tokens = CssTokens.lex(randomCss);
1185d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1195d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      // Test idempotent
1205d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      String renormalized = CssTokens.lex(tokens.normalizedCss).normalizedCss;
1215d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      if (!renormalized.equals(tokens.normalizedCss)) {
1225d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        if (!renormalized.equals(fixDigitSpaceUnit(tokens))) {
1235d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();
1245d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel               it.advance()) {
1255d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            System.err.println(it.token() + ":" + it.type());
1265d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          }
1275d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          assertEquals(
1285d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              "not idempotent, " + msg,
1295d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              tokens.normalizedCss,
1305d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              renormalized);
1315d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
1325d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1335d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1345d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      // Test normalized CSS does not contain HTML/XML breaking tokens.
1355d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      for (String disallowed : DISALLOWED_IN_OUTPUT) {
1365d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        assertFalse(
1375d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            "contains " + disallowed + ", " + msg,
1385d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            tokens.normalizedCss.contains(disallowed));
1395d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1405d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1415d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      // Test that tokens are roughly well-formed.
1425d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      int nTokens = 0;
1435d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
1445d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType type = it.type();
1455d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        String token = it.next();
1465d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern filter = TOKEN_TYPE_FILTERS.get(type);
1475d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        if (filter != null && !filter.matcher(token).matches()) {
1485d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          fail(type + " `" + token + "`, " + msg);
1495d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
1505d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        ++nTokens;
1515d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1525d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1535d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      // Test that walking the bracket list works.
1545d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      int[] reverse = new int[nTokens];
1555d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      Arrays.fill(reverse, -1);
1565d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      for (int j = 0; j < nTokens; ++j) {
1575d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        int partner = tokens.brackets.partner(j);
1585d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        if (partner != -1) {
1595d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          reverse[partner] = j;
1605d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
1615d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1625d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      for (int j = 0; j < nTokens; ++j) {
1635d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        if (reverse[j] != -1) {
1645d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          assertEquals(msg, reverse[reverse[j]], j);
1655d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
1665d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
1675d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    }
1685d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    synchronized (watcher) {
1695d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      watcher.input = null;
1705d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      watcher.notifyAll();
1715d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    }
1725d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  }
1735d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
1745d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  private static final EnumMap<CssTokens.TokenType, Pattern> TOKEN_TYPE_FILTERS
1755d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    = Maps.newEnumMap(CssTokens.TokenType.class);
1765d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  static {
1775d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    String NUMBER = "-?(?:0|[1-9][0-9]*)(?:\\.[0-9]*[1-9])?(?:e-?[1-9][0-9]*)?";
1785d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    String IDENT_START = "[a-zA-Z_\\u0080-\udbff\udfff\\-]";
1795d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    String IDENT_PART = "(?:" + IDENT_START + "|[0-9])";
1805d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    String IDENT = IDENT_START + IDENT_PART + "*";
1815d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1825d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.AT, Pattern.compile("@" + IDENT));
1835d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1845d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.COLON, Pattern.compile(":"));
1855d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1865d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.COLUMN, Pattern.compile("\\|\\|"));
1875d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1885d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.COMMA, Pattern.compile(","));
1895d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1905d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.DELIM,
1915d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[^\\w\u0000- \u0080-\uffff\\-]"));
1925d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1935d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+"));
1945d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1955d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\." + IDENT));
1965d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1975d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]"));
1985d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
1995d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+"));
2005d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2015d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.HASH_UNRESTRICTED,
2025d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("#[a-fA-F0-9]+"));
2035d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2045d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.IDENT,
2055d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile(IDENT));
2065d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2075d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.LEFT_CURLY,
2085d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[{]"));
2095d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2105d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.LEFT_PAREN,
2115d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[(]"));
2125d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2135d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.LEFT_SQUARE,
2145d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[\\[]"));
2155d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2165d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.MATCH,
2175d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[~^$|*]="));
2185d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2195d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.NUMBER,
2205d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile(NUMBER));
2215d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2225d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.PERCENTAGE,
2235d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile(NUMBER + "%"));
2245d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2255d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.RIGHT_CURLY,
2265d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[}]"));
2275d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2285d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.RIGHT_PAREN,
2295d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[)]"));
2305d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2315d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.RIGHT_SQUARE,
2325d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("[\\]]"));
2335d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2345d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.SEMICOLON,
2355d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile(";"));
2365d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2375d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.STRING,
2385d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("'(?:[^'\r\n\\\\]|\\\\[^\r\n])*'"));
2395d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2405d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.UNICODE_RANGE,
2415d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("U\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\?{0,5})?"));
2425d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2435d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.URL,
2445d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile("url\\('[0-9A-Za-z\\-_.~:/?#\\[\\]@!$&+,;=%]*'\\)"));
2455d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    TOKEN_TYPE_FILTERS.put(
2465d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        CssTokens.TokenType.WHITESPACE,
2475d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        Pattern.compile(" "));
2485d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  }
2495d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel
2505d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  /**
2515d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel   * "1:NUMBER ex:IDENT" -> "1ex:DIMENSION" is a common source source of
2525d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel   * a-idempotency, but not one that causes problems in practice.
2535d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel   * This hack helps ignore it.
2545d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel   */
2555d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  static String fixDigitSpaceUnit(CssTokens tokens) {
2565d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    StringBuilder sb = new StringBuilder();
2575d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
2585d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      if (it.type() != TokenType.NUMBER) {
2595d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        sb.append(it.next());
2605d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      } else {
2615d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        do {
2625d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          sb.append(it.next());
2635d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        } while (it.hasNext() && it.type() == TokenType.NUMBER);
2645d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        if (it.hasNext() && it.type() == TokenType.WHITESPACE) {
2655d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          it.advance();
2665d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          String numberFollower = null;
2675d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          if (it.hasNext()) {
2685d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            String token = it.token();
2695d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            switch (it.type()) {
2705d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              case IDENT:
2715d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                if (CssTokens.isWellKnownUnit(token)) {
2725d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  numberFollower = token;
2735d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  it.advance();
2745d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  if (it.hasNext() && it.token().startsWith(".")) {
2755d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                    numberFollower += " ";
2765d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  }
2775d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  it.backup();
2785d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                }
2795d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                break;
2805d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              case FUNCTION:
2815d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                String name = token.substring(0, token.length() - 1);
2825d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                if (CssTokens.isWellKnownUnit(name)) {
2835d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  numberFollower = token;
2845d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                }
2855d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                break;
2865d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              case DELIM:
2875d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                if ("%".equals(token)) {
2885d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                  numberFollower = token;
2895d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                }
2905d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel                break;
2915d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel              default: break;
2925d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            }
2935d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          }
2945d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          if (numberFollower == null) {
2955d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            sb.append(' ');
2965d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          } else {
2975d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            // Drop the space and append a lower-case version of the unit.
2985d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            sb.append(Strings.toLowerCase(numberFollower));
2995d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel            it.advance();
3005d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel          }
3015d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel        }
3025d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel      }
3035d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    }
3045d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel    return sb.toString();
3055d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel  }
3065d249f1bf7938bbba10d2cbfdeb159220a6ea16cmikesamuel}
307