/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.clearsilver.jsilver.data; import com.google.clearsilver.jsilver.resourceloader.ResourceLoader; import java.io.IOException; import java.io.LineNumberReader; import java.io.Reader; import java.util.ArrayList; import java.util.Iterator; import java.util.Stack; /** * Parser for HDF based on the following grammar by Brandon Long. * * COMMAND := (INCLUDE | COMMENT | HDF_SET | HDF_DESCEND | HDF_ASCEND ) INCLUDE := #include * "FILENAME" EOL COMMENT := # .* EOL HDF_DESCEND := HDF_NAME_ATTRS { EOL HDF_ASCEND := } EOL * HDF_SET := (HDF_ASSIGN | HDF_MULTILINE_ASSIGN | HDF_COPY | HDF_LINK) HDF_ASSIGN := HDF_NAME_ATTRS * = .* EOL HDF_MULTILINE_ASSIGN := HDF_NAME_ATTRS << EOM_MARKER EOL (.* EOL)* EOM_MARKER EOL * HDF_COPY := HDF_NAME_ATTRS := HDF_NAME EOL HDF_LINK := HDF_NAME_ATTRS : HDF_NAME EOL * HDF_NAME_ATTRS := (HDF_NAME | HDF_NAME [HDF_ATTRS]) HDF_ATTRS := (HDF_ATTR | HDF_ATTR, HDF_ATTRS) * HDF_ATTR := (HDF_ATTR_KEY | HDF_ATTR_KEY = [^\s,\]]+ | HDF_ATTR_KEY = DQUOTED_STRING) * HDF_ATTR_KEY := [0-9a-zA-Z]+ DQUOTED_STRING := "([^\\"]|\\[ntr]|\\.)*" HDF_NAME := (HDF_SUB_NAME * | HDF_SUB_NAME\.HDF_NAME) HDF_SUB_NAME := [0-9a-zA-Z_]+ EOM_MARKER := \S.*\S EOL := \n */ public class NewHdfParser implements Parser { private final StringInternStrategy internStrategy; /** * Special exception used to detect when we unexpectedly run out of characters on the line. */ private static class OutOfCharsException extends Exception {} /** * Object used to hold the name and attributes of an HDF node before we are ready to commit it to * the Data object. */ private static class HdfNameAttrs { String name; ArrayList attrs = null; int endOfSequence; void reset(String newname) { // TODO: think about moving interning here instead of parser code this.name = newname; if (attrs != null) { attrs.clear(); } endOfSequence = 0; } void addAttribute(String key, String value) { if (attrs == null) { attrs = new ArrayList(10); } attrs.ensureCapacity(attrs.size() + 2); // TODO: think about moving interning here instead of parser code attrs.add(key); attrs.add(value); } Data toData(Data data) { Data child = data.createChild(name); if (attrs != null) { Iterator it = attrs.iterator(); while (it.hasNext()) { String key = it.next(); String value = it.next(); child.setAttribute(key, value); } } return child; } } static final String UNNAMED_INPUT = "[UNNAMED_INPUT]"; /** * State information that we pass through the parse methods. Allows parser to be reentrant as all * the state is passed through method calls. */ static class ParseState { final Stack context = new Stack(); final Data output; final LineNumberReader lineReader; final ErrorHandler errorHandler; final ResourceLoader resourceLoader; final NewHdfParser hdfParser; final boolean ignoreAttributes; final HdfNameAttrs hdfNameAttrs; final UniqueStack includeStack; final String parsedFileName; String line; Data currentNode; private ParseState(Data output, LineNumberReader lineReader, ErrorHandler errorHandler, ResourceLoader resourceLoader, NewHdfParser hdfParser, String parsedFileName, boolean ignoreAttributes, HdfNameAttrs hdfNameAttrs, UniqueStack includeStack) { this.lineReader = lineReader; this.errorHandler = errorHandler; this.output = output; currentNode = output; this.resourceLoader = resourceLoader; this.hdfParser = hdfParser; this.parsedFileName = parsedFileName; this.ignoreAttributes = ignoreAttributes; this.hdfNameAttrs = hdfNameAttrs; this.includeStack = includeStack; } public static ParseState createNewParseState(Data output, Reader reader, ErrorHandler errorHandler, ResourceLoader resourceLoader, NewHdfParser hdfParser, String parsedFileName, boolean ignoreAttributes) { if (parsedFileName == null) { parsedFileName = UNNAMED_INPUT; } UniqueStack includeStack = new UniqueStack(); includeStack.push(parsedFileName); return new ParseState(output, new LineNumberReader(reader), errorHandler, resourceLoader, hdfParser, parsedFileName, ignoreAttributes, new HdfNameAttrs(), includeStack); } public static ParseState createParseStateForIncludedFile(ParseState originalState, String includeFileName, Reader includeFileReader) { return new ParseState(originalState.output, new LineNumberReader(includeFileReader), originalState.errorHandler, originalState.resourceLoader, originalState.hdfParser, originalState.parsedFileName, originalState.ignoreAttributes, new HdfNameAttrs(), originalState.includeStack); } } /** * Constructor for {@link NewHdfParser}. * * @param internPool - {@link StringInternStrategy} instance used to optimize the HDF parsing. */ public NewHdfParser(StringInternStrategy internPool) { this.internStrategy = internPool; } private static class NewHdfParserFactory implements ParserFactory { private final StringInternStrategy stringInternStrategy; public NewHdfParserFactory(StringInternStrategy stringInternStrategy) { this.stringInternStrategy = stringInternStrategy; } @Override public Parser newInstance() { return new NewHdfParser(stringInternStrategy); } } /** * Creates a {@link ParserFactory} instance. * *

* Provided {@code stringInternStrategy} instance will be used by shared all {@link Parser} * objects created by the factory and used to optimize the HDF parsing process by reusing the * String for keys and values. * * @param stringInternStrategy - {@link StringInternStrategy} instance used to optimize the HDF * parsing. * @return an instance of {@link ParserFactory} implementation. */ public static ParserFactory newFactory(StringInternStrategy stringInternStrategy) { return new NewHdfParserFactory(stringInternStrategy); } public void parse(Reader reader, Data output, Parser.ErrorHandler errorHandler, ResourceLoader resourceLoader, String dataFileName, boolean ignoreAttributes) throws IOException { parse(ParseState.createNewParseState(output, reader, errorHandler, resourceLoader, this, dataFileName, ignoreAttributes)); } private void parse(ParseState state) throws IOException { while ((state.line = state.lineReader.readLine()) != null) { String seq = stripWhitespace(state.line); try { parseCommand(seq, state); } catch (OutOfCharsException e) { reportError(state, "End of line was prematurely reached. Parse error."); } } } private static final String INCLUDE_WS = "#include "; private void parseCommand(String seq, ParseState state) throws IOException, OutOfCharsException { if (seq.length() == 0) { // Empty line. return; } if (charAt(seq, 0) == '#') { // If there isn't a match on include then this is a comment and we do nothing. if (matches(seq, 0, INCLUDE_WS)) { // This is an include command int start = skipLeadingWhitespace(seq, INCLUDE_WS.length()); parseInclude(seq, start, state); } return; } else if (charAt(seq, 0) == '}') { if (skipLeadingWhitespace(seq, 1) != seq.length()) { reportError(state, "Extra chars after '}'"); return; } handleAscend(state); } else { parseHdfElement(seq, state); } } private void parseInclude(String seq, int start, ParseState state) throws IOException, OutOfCharsException { int end = seq.length(); if (charAt(seq, start) == '"') { if (charAt(seq, end - 1) == '"') { start++; end--; } else { reportError(state, "Missing '\"' at end of include"); return; } } handleInclude(seq.substring(start, end), state); } private static final int NO_MATCH = -1; private void parseHdfElement(String seq, ParseState state) throws IOException, OutOfCharsException { // Re-use a single element to avoid repeated allocations/trashing (serious // performance impact, 5% of real service performance) HdfNameAttrs element = state.hdfNameAttrs; if (!parseHdfNameAttrs(element, seq, 0, state)) { return; } int index = skipLeadingWhitespace(seq, element.endOfSequence); switch (charAt(seq, index)) { case '{': // Descend if (index + 1 != seq.length()) { reportError(state, "No characters expected after '{'"); return; } handleDescend(state, element); return; case '=': // Assignment index = skipLeadingWhitespace(seq, index + 1); String value = internStrategy.intern(seq.substring(index, seq.length())); handleAssign(state, element, value); return; case ':': if (charAt(seq, index + 1) == '=') { // Copy index = skipLeadingWhitespace(seq, index + 2); String src = parseHdfName(seq, index); if (src == null) { reportError(state, "Invalid HDF name"); return; } if (index + src.length() != seq.length()) { reportError(state, "No characters expected after '{'"); return; } handleCopy(state, element, src); } else { // Link index = skipLeadingWhitespace(seq, index + 1); String src = parseHdfName(seq, index); if (src == null) { reportError(state, "Invalid HDF name"); return; } if (index + src.length() != seq.length()) { reportError(state, "No characters expected after '{'"); return; } handleLink(state, element, src); } return; case '<': if (charAt(seq, index + 1) != '<') { reportError(state, "Expected '<<'"); } index = skipLeadingWhitespace(seq, index + 2); String eomMarker = seq.substring(index, seq.length()); // TODO: think about moving interning to handleAssign() String multilineValue = internStrategy.intern(parseMultilineValue(state, eomMarker)); if (multilineValue == null) { return; } handleAssign(state, element, multilineValue); return; default: reportError(state, "No valid operator"); return; } } /** * This method parses out an HDF element name and any optional attributes into a caller-supplied * HdfNameAttrs object. It returns a {@code boolean} with whether it succeeded to parse. */ private boolean parseHdfNameAttrs(HdfNameAttrs destination, String seq, int index, ParseState state) throws OutOfCharsException { String hdfName = parseHdfName(seq, index); if (hdfName == null) { reportError(state, "Invalid HDF name"); return false; } destination.reset(hdfName); index = skipLeadingWhitespace(seq, index + hdfName.length()); int end = parseAttributes(seq, index, state, destination); if (end == NO_MATCH) { // Error already reported below. return false; } else { destination.endOfSequence = end; return true; } } /** * Parses a valid hdf path name. */ private String parseHdfName(String seq, int index) throws OutOfCharsException { int end = index; while (end < seq.length() && isHdfNameChar(charAt(seq, end))) { end++; } if (end == index) { return null; } return internStrategy.intern(seq.substring(index, end)); } /** * Looks for optional attributes and adds them to the HdfNameAttrs object passed into the method. */ private int parseAttributes(String seq, int index, ParseState state, HdfNameAttrs element) throws OutOfCharsException { if (charAt(seq, index) != '[') { // No attributes to parse return index; } index = skipLeadingWhitespace(seq, index + 1); // If we don't care about attributes, just skip over them. if (state.ignoreAttributes) { while (charAt(seq, index) != ']') { index++; } return index + 1; } boolean first = true; do { if (first) { first = false; } else if (charAt(seq, index) == ',') { index = skipLeadingWhitespace(seq, index + 1); } else { reportError(state, "Error parsing attribute list"); } index = parseAttribute(seq, index, state, element); if (index == NO_MATCH) { // reportError called by parseAttribute already. return NO_MATCH; } index = skipLeadingWhitespace(seq, index); } while (charAt(seq, index) != ']'); return index + 1; } private static final String DEFAULT_ATTR_VALUE = "1"; /** * Parse out a single HDF attribute. If there is no explicit value, use default value of "1" like * in C clearsilver. Returns NO_MATCH if it fails to parse an attribute. */ private int parseAttribute(String seq, int index, ParseState state, HdfNameAttrs element) throws OutOfCharsException { int end = parseAttributeKey(seq, index); if (index == end) { reportError(state, "No valid attribute key"); return NO_MATCH; } String attrKey = internStrategy.intern(seq.substring(index, end)); index = skipLeadingWhitespace(seq, end); if (charAt(seq, index) != '=') { // No value for this attribute key. Use default value of "1" element.addAttribute(attrKey, DEFAULT_ATTR_VALUE); return index; } // We need to parse out the attribute value. index = skipLeadingWhitespace(seq, index + 1); if (charAt(seq, index) == '"') { index++; StringBuilder sb = new StringBuilder(); end = parseQuotedAttributeValue(seq, index, sb); if (end == NO_MATCH) { reportError(state, "Unable to parse quoted attribute value"); return NO_MATCH; } String attrValue = internStrategy.intern(sb.toString()); element.addAttribute(attrKey, attrValue); end++; } else { // Simple attribute that has no whitespace. String attrValue = parseAttributeValue(seq, index, state); if (attrValue == null || attrValue.length() == 0) { reportError(state, "No attribute for key " + attrKey); return NO_MATCH; } attrValue = internStrategy.intern(attrValue); element.addAttribute(attrKey, attrValue); end = index + attrValue.length(); } return end; } /** * Returns the range in the sequence starting at start that corresponds to a valid attribute key. */ private int parseAttributeKey(String seq, int index) throws OutOfCharsException { while (isAlphaNumericChar(charAt(seq, index))) { index++; } return index; } /** * Parses a quoted attribute value. Unescapes octal characters and \n, \r, \t, \", etc. */ private int parseQuotedAttributeValue(String seq, int index, StringBuilder sb) throws OutOfCharsException { char c; while ((c = charAt(seq, index)) != '"') { if (c == '\\') { // Escaped character. Look for 1 to 3 digits in a row as octal or n,t,r. index++; char next = charAt(seq, index); if (isNumericChar(next)) { // Parse the next 1 to 3 characters if they are digits. Treat it as an octal code. int val = next - '0'; if (isNumericChar(charAt(seq, index + 1))) { index++; val = val * 8 + (charAt(seq, index) - '0'); if (isNumericChar(charAt(seq, index + 1))) { index++; val = val * 8 + (charAt(seq, index) - '0'); } } c = (char) val; } else if (next == 'n') { c = '\n'; } else if (next == 't') { c = '\t'; } else if (next == 'r') { c = '\r'; } else { // Regular escaped char like " or / c = next; } } sb.append(c); index++; } return index; } /** * Parses a simple attribute value that cannot have any whitespace or specific punctuation * reserved by the HDF grammar. */ private String parseAttributeValue(String seq, int index, ParseState state) throws OutOfCharsException { int end = index; char c = charAt(seq, end); while (c != ',' && c != ']' && c != '"' && !Character.isWhitespace(c)) { end++; c = charAt(seq, end); } return seq.substring(index, end); } private String parseMultilineValue(ParseState state, String eomMarker) throws IOException { StringBuilder sb = new StringBuilder(256); String line; while ((line = state.lineReader.readLine()) != null) { if (line.startsWith(eomMarker) && skipLeadingWhitespace(line, eomMarker.length()) == line.length()) { return sb.toString(); } else { sb.append(line).append('\n'); } } reportError(state, "EOM " + eomMarker + " never found"); return null; } // ////////////////////////////////////////////////////////////////////////// // // Handlers private void handleDescend(ParseState state, HdfNameAttrs element) { Data child = handleNodeCreation(state.currentNode, element); state.context.push(state.currentNode); state.currentNode = child; } private Data handleNodeCreation(Data node, HdfNameAttrs element) { return element.toData(node); } private void handleAssign(ParseState state, HdfNameAttrs element, String value) { // TODO: think about moving interning here Data child = handleNodeCreation(state.currentNode, element); child.setValue(value); } private void handleCopy(ParseState state, HdfNameAttrs element, String srcName) { Data child = handleNodeCreation(state.currentNode, element); Data src = state.output.getChild(srcName); if (src != null) { child.setValue(src.getValue()); } else { child.setValue(""); } } private void handleLink(ParseState state, HdfNameAttrs element, String srcName) { Data child = handleNodeCreation(state.currentNode, element); child.setSymlink(state.output.createChild(srcName)); } private void handleAscend(ParseState state) { if (state.context.isEmpty()) { reportError(state, "Too many '}'"); return; } state.currentNode = state.context.pop(); } private void handleInclude(String seq, ParseState state) throws IOException { String includeFileName = internStrategy.intern(seq); // Load the file Reader reader = state.resourceLoader.open(includeFileName); if (reader == null) { reportError(state, "Unable to find file " + includeFileName); return; } // Check whether we are in include loop if (!state.includeStack.push(includeFileName)) { reportError(state, createIncludeStackTraceMessage(state.includeStack, includeFileName)); return; } // Parse the file state.hdfParser.parse(ParseState .createParseStateForIncludedFile(state, includeFileName, reader)); if (!includeFileName.equals(state.includeStack.pop())) { // Include stack trace is corrupted throw new IllegalStateException("Unable to find on include stack: " + includeFileName); } } private String createIncludeStackTraceMessage(UniqueStack includeStack, String includeFileName) { StringBuilder message = new StringBuilder(); message.append("File included twice: "); message.append(includeFileName); message.append(" Include stack: "); for (String fileName : includeStack) { message.append(fileName); message.append(" -> "); } message.append(includeFileName); return message.toString(); } // ///////////////////////////////////////////////////////////////////////// // // Character values private static boolean isNumericChar(char c) { if ('0' <= c && c <= '9') { return true; } else { return false; } } private static boolean isAlphaNumericChar(char c) { if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) { return true; } else { return false; } } private static boolean isHdfNameChar(char c) { if (isAlphaNumericChar(c) || c == '_' || c == '.') { return true; } else { return false; } } private static String stripWhitespace(String seq) { int start = skipLeadingWhitespace(seq, 0); int end = seq.length() - 1; while (end > start && Character.isWhitespace(seq.charAt(end))) { --end; } if (start == 0 && end == seq.length() - 1) { return seq; } else { return seq.substring(start, end + 1); } } private static int skipLeadingWhitespace(String seq, int index) { while (index < seq.length() && Character.isWhitespace(seq.charAt(index))) { index++; } return index; } /** * Determines if a character sequence appears in the given sequence starting at a specified index. * * @param seq the sequence that we want to see if it contains the string match. * @param start the index into seq where we want to check for match * @param match the String we want to look for in the sequence. * @return {@code true} if the string match appears in seq starting at the index start, {@code * false} otherwise. */ private static boolean matches(String seq, int start, String match) { if (seq.length() - start < match.length()) { return false; } for (int i = 0; i < match.length(); i++) { if (match.charAt(i) != seq.charAt(start + i)) { return false; } } return true; } /** * Reads the character at the specified index in the given String. Throws an exception to be * caught above if the index is out of range. */ private static char charAt(String seq, int index) throws OutOfCharsException { if (0 <= index && index < seq.length()) { return seq.charAt(index); } else { throw new OutOfCharsException(); } } private static void reportError(ParseState state, String errorMessage) { if (state.errorHandler != null) { state.errorHandler.error(state.lineReader.getLineNumber(), state.line, state.parsedFileName, errorMessage); } else { throw new RuntimeException("Parse Error on line " + state.lineReader.getLineNumber() + ": " + errorMessage + " : " + state.line); } } }