1/* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.google.streamhtmlparser.util; 18 19import com.google.common.base.Preconditions; 20 21import java.util.Arrays; 22 23/** 24 * Implements a circular (ring) buffer of characters with specialized 25 * application logic in order to determine the context of some 26 * Javascript content that is being parsed. 27 * 28 * This is a specialized class - of no use to external code - 29 * which aims to be 100% compatible with the corresponding logic 30 * in the C-version of the HtmlParser, specifically 31 * <code>jsparser.c</code>. In particular: 32 * <ul> 33 * <li> The API is odd, using negative indexes to access content in 34 * the buffer. Changing the API would mean changing the test 35 * cases and have more difficulty determining whether we are 36 * remaining compatible with the C-version. It is left as an 37 * exercise for once the code is very stable and proven. 38 * <li> Repeated whitespace is folded into just one character to 39 * use the space available efficiently. 40 * <li> The buffer size is fixed. There is currently no need to 41 * make it variable so we avoid the need for constructors. 42 * </ul> 43 */ 44public class JavascriptTokenBuffer { 45 46 /** 47 * Size of the ring buffer used to lookup the last token in the javascript 48 * stream. The size is somewhat arbitrary but must be larger than 49 * the biggest token we want to lookup plus three: Two delimiters plus 50 * an empty ring buffer slot. 51 */ 52 private static final int BUFFER_SIZE = 18; 53 54 /** Storage implementing the circular buffer. */ 55 private final char[] buffer; 56 57 /** Index of the first item in our circular buffer. */ 58 private int startIndex; 59 60 /** Index of the last item in our circular buffer. */ 61 private int endIndex; 62 63 /** 64 * Constructs an empty javascript token buffer. The size is fixed, 65 * see {@link #BUFFER_SIZE}. 66 */ 67 public JavascriptTokenBuffer() { 68 buffer = new char[BUFFER_SIZE]; 69 startIndex = 0; 70 endIndex = 0; 71 } 72 73 /** 74 * Constructs a javascript token buffer that is identical to 75 * the one given. In particular, it has the same size and contents. 76 * 77 * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy 78 */ 79 public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) { 80 buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer, 81 aJavascriptTokenBuffer.buffer.length); 82 startIndex = aJavascriptTokenBuffer.startIndex; 83 endIndex = aJavascriptTokenBuffer.endIndex; 84 } 85 86 /** 87 * A simple wrapper over <code>appendChar</code>, it appends a string 88 * to the buffer. Sequences of whitespace and newlines 89 * are folded into one character to save space. Null strings are 90 * not allowed. 91 * 92 * @param input the {@code String} to append, cannot be {@code null} 93 */ 94 // TODO: Move to testing since not used in code. 95 public void appendString(String input) { 96 if (input == null) { 97 throw new NullPointerException("input == null is not allowed"); 98 } 99 for (int i = 0; i < input.length(); i++) { 100 appendChar(input.charAt(i)); 101 } 102 } 103 104 /** 105 * Appends a character to the buffer. We fold sequences of whitespace and 106 * newlines into one to save space. 107 * 108 * @param input the {@code char} to append 109 */ 110 public void appendChar(char input) { 111 if (HtmlUtils.isJavascriptWhitespace(input) && 112 HtmlUtils.isJavascriptWhitespace(getChar(-1))) { 113 return; 114 } 115 buffer[endIndex] = input; 116 endIndex = (endIndex + 1) % buffer.length; 117 if (endIndex == startIndex) { 118 startIndex = (endIndex + 1) % buffer.length; 119 } 120 } 121 122 /** 123 * Returns the last character in the buffer and removes it from the buffer 124 * or the NUL character '\0' if the buffer is empty. 125 * 126 * @return last character in the buffer or '\0' if the buffer is empty 127 */ 128 public char popChar() { 129 if (startIndex == endIndex) { 130 return '\0'; 131 } 132 endIndex--; 133 if (endIndex < 0) { 134 endIndex += buffer.length; 135 } 136 return buffer[endIndex]; 137 } 138 139 /** 140 * Returns the character at a given index in the buffer or nul ('\0') 141 * if the index is outside the range of the buffer. Such could happen 142 * if the buffer is not filled enough or the index is larger than the 143 * size of the buffer. 144 * 145 * <p>Position must be negative where -1 is the index of the last 146 * character in the buffer. 147 * 148 * @param position The index into the buffer 149 * 150 * @return character at the requested index 151 */ 152 public char getChar(int position) { 153 assert(position < 0); // Developer error if it triggers. 154 155 int absolutePosition = getAbsolutePosition(position); 156 if (absolutePosition < 0) { 157 return '\0'; 158 } 159 160 return buffer[absolutePosition]; 161 } 162 163 /** 164 * Sets the given {@code input} at the given {@code position} of the buffer. 165 * Returns {@code true} if we succeeded or {@code false} if we 166 * failed (i.e. the write was beyond the buffer boundary). 167 * 168 * <p>Index positions are negative where -1 is the index of the 169 * last character in the buffer. 170 * 171 * @param position The index at which to set the character 172 * @param input The character to set in the buffer 173 * @return {@code true} if we succeeded, {@code false} otherwise 174 */ 175 public boolean setChar(int position, char input) { 176 assert(position < 0); // Developer error if it triggers. 177 178 int absolutePosition = getAbsolutePosition(position); 179 if (absolutePosition < 0) { 180 return false; 181 } 182 183 buffer[absolutePosition] = input; 184 return true; 185 } 186 187 188 /** 189 * Returns the last javascript identifier/keyword in the buffer. 190 * 191 * @return the last identifier or {@code null} if none was found 192 */ 193 public String getLastIdentifier() { 194 int end = -1; 195 196 if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) { 197 end--; 198 } 199 int position; 200 for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position)); 201 position--) { 202 } 203 if ((position + 1) >= end) { 204 return null; 205 } 206 return slice(position + 1, end); 207 } 208 209 /** 210 * Returns a slice of the buffer delimited by the given indices. 211 * 212 * The start and end indexes represent the start and end of the 213 * slice to copy. If the start argument extends beyond the beginning 214 * of the buffer, the slice will only contain characters 215 * starting from the beginning of the buffer. 216 * 217 * @param start The index of the first character the copy 218 * @param end the index of the last character to copy 219 * 220 * @return {@code String} between the given indices 221 */ 222 public String slice(int start, int end) { 223 // Developer error if any of the asserts below fail. 224 Preconditions.checkArgument(start <= end); 225 Preconditions.checkArgument(start < 0); 226 Preconditions.checkArgument(end < 0); 227 228 StringBuffer output = new StringBuffer(); 229 for (int position = start; position <= end; position++) { 230 char c = getChar(position); 231 if (c != '\0') { 232 output.append(c); 233 } 234 } 235 return new String(output); 236 } 237 238 /** 239 * Returns the position relative to the start of the buffer or -1 240 * if the position is past the size of the buffer. 241 * 242 * @param position the index to be translated 243 * @return the position relative to the start of the buffer 244 */ 245 private int getAbsolutePosition(int position) { 246 assert (position < 0); // Developer error if it triggers. 247 if (position <= -buffer.length) { 248 return -1; 249 } 250 int len = endIndex - startIndex; 251 if (len < 0) { 252 len += buffer.length; 253 } 254 if (position < -len) { 255 return -1; 256 } 257 int absolutePosition = (position + endIndex) % buffer.length; 258 if (absolutePosition < 0) { 259 absolutePosition += buffer.length; 260 } 261 return absolutePosition; 262 } 263} 264