/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.clearsilver.jsilver.autoescape; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START; import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException; import com.google.streamhtmlparser.ExternalState; import com.google.streamhtmlparser.HtmlParser; import com.google.streamhtmlparser.HtmlParserFactory; import com.google.streamhtmlparser.ParseException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; /** * Encapsulates auto escaping logic. */ public class AutoEscapeContext { /** * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to * specify the content type of provided input. Valid values and the corresponding mode are:
* * * * * * * * * * * * * * * * * * * * * * * * * *
text/htmlHtmlParser.Mode.HTML
text/plainHtmlParser.Mode.HTML
application/javascriptHtmlParser.Mode.JS
application/jsonHtmlParser.Mode.JS
text/javascriptHtmlParser.Mode.JS
text/cssHtmlParser.Mode.CSS
* * @see #setContentType */ public static final Map CONTENT_TYPE_LIST; // These options are used to provide extra information to HtmlParserFactory.createParserInMode or // HtmlParserFactory.createParserInAttribute, which is required for certain modes. private static final HashSet quotedJsAttributeOption; private static final HashSet partialUrlAttributeOption; private static final HashSet jsModeOption; private HtmlParser htmlParser; static { quotedJsAttributeOption = new HashSet(); quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED); partialUrlAttributeOption = new HashSet(); partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL); jsModeOption = new HashSet(); jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED); CONTENT_TYPE_LIST = new HashMap(); CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML); CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML); CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS); } /** * Name of resource being auto escaped. Will be used in error and display messages. */ private String resourceName; public AutoEscapeContext() { this(EscapeMode.ESCAPE_AUTO, null); } /** * Create a new context in the state represented by mode. * * @param mode EscapeMode object. */ public AutoEscapeContext(EscapeMode mode) { this(mode, null); } /** * Create a new context in the state represented by mode. If a non-null resourceName is provided, * it will be used in displaying error messages. * * @param mode The initial EscapeMode for this context * @param resourceName Name of the resource being auto escaped. */ public AutoEscapeContext(EscapeMode mode, String resourceName) { this.resourceName = resourceName; htmlParser = createHtmlParser(mode); } /** * Create a new context that is a copy of the current state of this context. * * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context. */ public AutoEscapeContext cloneCurrentEscapeContext() { AutoEscapeContext autoEscapeContext = new AutoEscapeContext(); autoEscapeContext.resourceName = resourceName; autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser); return autoEscapeContext; } /** * Sets the current position in the resource being auto escaped. Useful for generating detailed * error messages. * * @param line line number. * @param column column number within line. */ public void setCurrentPosition(int line, int column) { htmlParser.setLineNumber(line); htmlParser.setColumnNumber(column); } /** * Returns the name of the resource currently being auto escaped. */ public String getResourceName() { return resourceName; } /** * Returns the current line number within the resource being auto escaped. */ public int getLineNumber() { return htmlParser.getLineNumber(); } /** * Returns the current column number within the resource being auto escaped. */ public int getColumnNumber() { return htmlParser.getColumnNumber(); } private HtmlParser createHtmlParser(EscapeMode mode) { switch (mode) { case ESCAPE_AUTO: case ESCAPE_AUTO_HTML: return HtmlParserFactory.createParser(); case ESCAPE_AUTO_JS_UNQUOTED: // return AutoEscapeState.JS; } else { // // No quotes around the variable, hence it can inject arbitrary javascript. // So severely restrict the values it may contain. return AutoEscapeState.JS_UNQUOTED; } } // Inside an HTML tag or attribute name if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) { return AutoEscapeState.ATTR; // TODO: Need a strict validation function for tag and attribute names. } else if (state.equals(HtmlParser.STATE_VALUE)) { // Inside an HTML attribute value return getCurrentAttributeState(); } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) { // Default is assumed to be HTML body // Hello : return AutoEscapeState.HTML; } throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: " + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } private AutoEscapeState getCurrentAttributeState() { HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType(); boolean attrQuoted = htmlParser.isAttributeQuoted(); switch (type) { case REGULAR: // : if (attrQuoted) { return AutoEscapeState.ATTR; } else { return AutoEscapeState.UNQUOTED_ATTR; } case URI: if (htmlParser.isUrlStart()) { // if (attrQuoted) { return AutoEscapeState.ATTR_URI_START; } else { return AutoEscapeState.UNQUOTED_ATTR_URI_START; } } else { // if (attrQuoted) { // TODO: Html escaping because that is what Clearsilver does right now. // May change this to url escaping soon. return AutoEscapeState.ATTR_URI; } else { return AutoEscapeState.UNQUOTED_ATTR_URI; } } case JS: if (htmlParser.isJavascriptQuoted()) { /* * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do * an HTML escape around this. */ if (attrQuoted) { // return AutoEscapeState.ATTR_JS; } else { // ');> return AutoEscapeState.UNQUOTED_ATTR_JS; } } else { if (attrQuoted) { /* */ return AutoEscapeState.ATTR_UNQUOTED_JS; } else { /* );> */ return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS; } } case STYLE: // : if (attrQuoted) { return AutoEscapeState.ATTR_CSS; } else { return AutoEscapeState.UNQUOTED_ATTR_CSS; } default: throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type, resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } } /** * Resets the state of the underlying html parser to a state consistent with the {@code * contentType} provided. This method should be used when the starting auto escaping context of a * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript * source file. * * @param contentType MIME type header representing the content being parsed. * @see #CONTENT_TYPE_LIST */ public void setContentType(String contentType) { HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType); if (mode == null) { throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType, resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } htmlParser.resetMode(mode); } /** * Enum representing states of the data being parsed. * * This enumeration lists all the states in which autoescaping would have some effect. * */ public static enum AutoEscapeState { HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED( "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR( "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI( "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate", ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS( "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number", ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS( "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS); private final String functionName; private final EscapeMode escapeMode; private AutoEscapeState(String functionName, EscapeMode mode) { this.functionName = functionName; this.escapeMode = mode; } public String getFunctionName() { return functionName; } public EscapeMode getEscapeMode() { return escapeMode; } } }