// Copyright (c) 2011, Mike Samuel // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // Neither the name of the OWASP nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. package org.owasp.html; final class CssGrammar { private static void errorRecoveryUntilSemiOrCloseBracket( CssTokens.TokenIterator it) { int bracketDepth = 0; for (; it.hasNext(); it.advance()) { switch (it.type()) { case SEMICOLON: it.advance(); return; case LEFT_CURLY: case LEFT_PAREN: case LEFT_SQUARE: ++bracketDepth; break; case RIGHT_CURLY: case RIGHT_PAREN: case RIGHT_SQUARE: --bracketDepth; if (bracketDepth <= 0) { if (bracketDepth != 0) { it.advance(); } return; } break; default: break; } } } static void parsePropertyGroup(String css, PropertyHandler handler) { // Split tokens by semicolons/curly-braces, then by first colon, // dropping spaces and comments to identify property names and token runs // that form the value. CssTokens tokens = CssTokens.lex(css); CssTokens.TokenIterator it = tokens.iterator(); propertyNameLoop: while (it.hasTokenAfterSpace()) { // Check that we have an identifier that might be a property name. if (it.type() != CssTokens.TokenType.IDENT) { errorRecoveryUntilSemiOrCloseBracket(it); continue; } String name = it.next(); // Look for a colon. if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) { errorRecoveryUntilSemiOrCloseBracket(it); continue propertyNameLoop; } it.advance(); handler.startProperty(Strings.toLowerCase(name)); parsePropertyValue(it, handler); handler.endProperty(); } } private static void parsePropertyValue( CssTokens.TokenIterator it, PropertyHandler handler) { propertyValueLoop: while (it.hasNext()) { CssTokens.TokenType type = it.type(); String token = it.token(); switch (type) { case SEMICOLON: it.advance(); break propertyValueLoop; case FUNCTION: CssTokens.TokenIterator actuals = it.spliceToEnd(); handler.startFunction(token); parsePropertyValue(actuals, handler); handler.endFunction(token); continue; // Skip the advance over token. case IDENT: handler.identifier(token); break; case HASH_UNRESTRICTED: if (token.length() == 4 || token.length() == 7) { handler.hash(token); } break; case STRING: handler.quotedString(token); break; case URL: handler.url(token); break; case DIMENSION: case NUMBER: case PERCENTAGE: handler.quantity(token); break; case AT: case BAD_DIMENSION: case COLUMN: case DOT_IDENT: case HASH_ID: case MATCH: case UNICODE_RANGE: case WHITESPACE: break; case LEFT_CURLY: case LEFT_PAREN: case LEFT_SQUARE: case RIGHT_CURLY: case RIGHT_PAREN: case RIGHT_SQUARE: case COMMA: case COLON: case DELIM: handler.punctuation(token); break; } it.advance(); } } /** * Decodes any escape sequences and strips any quotes from the input. */ static String cssContent(String token) { int n = token.length(); int pos = 0; StringBuilder sb = null; if (n >= 2) { char ch0 = token.charAt(0); if (ch0 == '"' || ch0 == '\'') { if (ch0 == token.charAt(n - 1)) { pos = 1; --n; sb = new StringBuilder(n); } } } for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) { int end = esc + 2; if (esc > n) { break; } if (sb == null) { sb = new StringBuilder(n); } sb.append(token, pos, esc); int codepoint = token.charAt(end - 1); if (isHex(codepoint)) { // Parse \hhhhh where hhhhh is one or more hex digits // and is an optional space or tab character that can be // used to separate an escape sequence from a following literal hex // digit. while (end < n && isHex(token.charAt(end))) { ++end; } try { codepoint = Integer.parseInt(token.substring(esc + 1, end), 16); } catch (RuntimeException ex) { codepoint = 0xfffd; // Unknown codepoint. } if (end < n) { char ch = token.charAt(end); if (ch == ' ' || ch == '\t') { // Ignorable hex follower. ++end; } } } sb.appendCodePoint(codepoint); pos = end; } if (sb == null) { return token; } return sb.append(token, pos, n).toString(); } private static boolean isHex(int codepoint) { return ('0' <= codepoint && codepoint <= '9') || ('A' <= codepoint && codepoint <= 'F') || ('a' <= codepoint && codepoint <= 'f'); } interface PropertyHandler { void startProperty(String propertyName); void quantity(String token); void identifier(String token); void hash(String token); void quotedString(String token); void url(String token); void punctuation(String token); void startFunction(String token); void endFunction(String token); void endProperty(); } }