1/*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.streamhtmlparser.util;
18
19import com.google.common.base.Preconditions;
20
21import java.util.Arrays;
22
23/**
24 * Implements a circular (ring) buffer of characters with specialized
25 * application logic in order to determine the context of some
26 * Javascript content that is being parsed.
27 *
28 * This is a specialized class - of no use to external code -
29 * which aims to be 100% compatible with the corresponding logic
30 * in the C-version of the HtmlParser, specifically
31 * <code>jsparser.c</code>. In particular:
32 * <ul>
33 *   <li> The API is odd, using negative indexes to access content in
34 *        the buffer. Changing the API would mean changing the test
35 *        cases and have more difficulty determining whether we are
36 *        remaining compatible with the C-version. It is left as an
37 *        exercise for once the code is very stable and proven.
38 *   <li> Repeated whitespace is folded into just one character to
39 *        use the space available efficiently.
40 *   <li> The buffer size is fixed. There is currently no need to
41 *        make it variable so we avoid the need for constructors.
42 * </ul>
43 */
44public class JavascriptTokenBuffer {
45
46  /**
47   * Size of the ring buffer used to lookup the last token in the javascript
48   * stream. The size is somewhat arbitrary but must be larger than
49   * the biggest token we want to lookup plus three: Two delimiters plus
50   * an empty ring buffer slot.
51   */
52  private static final int BUFFER_SIZE = 18;
53
54  /** Storage implementing the circular buffer. */
55  private final char[] buffer;
56
57  /** Index of the first item in our circular buffer. */
58  private int startIndex;
59
60  /** Index of the last item in our circular buffer. */
61  private int endIndex;
62
63  /**
64   * Constructs an empty javascript token buffer. The size is fixed,
65   * see {@link #BUFFER_SIZE}.
66   */
67  public JavascriptTokenBuffer() {
68    buffer = new char[BUFFER_SIZE];
69    startIndex = 0;
70    endIndex = 0;
71  }
72
73  /**
74   * Constructs a javascript token buffer that is identical to
75   * the one given. In particular, it has the same size and contents.
76   *
77   * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy
78   */
79  public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) {
80    buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer,
81                           aJavascriptTokenBuffer.buffer.length);
82    startIndex = aJavascriptTokenBuffer.startIndex;
83    endIndex = aJavascriptTokenBuffer.endIndex;
84  }
85
86  /**
87   * A simple wrapper over <code>appendChar</code>, it appends a string
88   * to the buffer. Sequences of whitespace and newlines
89   * are folded into one character to save space. Null strings are
90   * not allowed.
91   *
92   * @param input the {@code String} to append, cannot be {@code null}
93   */
94  // TODO: Move to testing since not used in code.
95  public void appendString(String input) {
96    if (input == null) {
97      throw new NullPointerException("input == null is not allowed");
98    }
99    for (int i = 0; i < input.length(); i++) {
100      appendChar(input.charAt(i));
101    }
102  }
103
104  /**
105   * Appends a character to the buffer. We fold sequences of whitespace and
106   * newlines into one to save space.
107   *
108   * @param input the {@code char} to append
109   */
110  public void appendChar(char input) {
111    if (HtmlUtils.isJavascriptWhitespace(input) &&
112        HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
113      return;
114    }
115    buffer[endIndex] = input;
116    endIndex = (endIndex + 1) % buffer.length;
117    if (endIndex == startIndex) {
118      startIndex = (endIndex + 1) % buffer.length;
119    }
120  }
121
122  /**
123   * Returns the last character in the buffer and removes it from the buffer
124   * or the NUL character '\0' if the buffer is empty.
125   *
126   * @return last character in the buffer or '\0' if the buffer is empty
127   */
128  public char popChar() {
129    if (startIndex == endIndex) {
130      return '\0';
131    }
132    endIndex--;
133    if (endIndex < 0) {
134      endIndex += buffer.length;
135    }
136    return buffer[endIndex];
137  }
138
139  /**
140   * Returns the character at a given index in the buffer or nul ('\0')
141   * if the index is outside the range of the buffer. Such could happen
142   * if the buffer is not filled enough or the index is larger than the
143   * size of the buffer.
144   *
145   * <p>Position must be negative where -1 is the index of the last
146   * character in the buffer.
147   *
148   * @param position The index into the buffer
149   *
150   * @return character at the requested index
151   */
152  public char getChar(int position) {
153    assert(position < 0);   // Developer error if it triggers.
154
155    int absolutePosition = getAbsolutePosition(position);
156    if (absolutePosition < 0) {
157      return '\0';
158    }
159
160    return buffer[absolutePosition];
161  }
162
163  /**
164   * Sets the given {@code input} at the given {@code position} of the buffer.
165   * Returns {@code true} if we succeeded or {@code false} if we
166   * failed (i.e. the write was beyond the buffer boundary).
167   *
168   * <p>Index positions are negative where -1 is the index of the
169   * last character in the buffer.
170   *
171   * @param position The index at which to set the character
172   * @param input The character to set in the buffer
173   * @return {@code true} if we succeeded, {@code false} otherwise
174   */
175  public boolean setChar(int position, char input) {
176    assert(position < 0);   // Developer error if it triggers.
177
178    int absolutePosition = getAbsolutePosition(position);
179    if (absolutePosition < 0) {
180      return false;
181    }
182
183    buffer[absolutePosition] = input;
184    return true;
185  }
186
187
188  /**
189   * Returns the last javascript identifier/keyword in the buffer.
190   *
191   * @return the last identifier or {@code null} if none was found
192   */
193  public String getLastIdentifier() {
194    int end = -1;
195
196    if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
197      end--;
198    }
199    int position;
200    for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position));
201         position--) {
202    }
203    if ((position + 1) >= end) {
204      return null;
205    }
206    return slice(position + 1, end);
207  }
208
209  /**
210   * Returns a slice of the buffer delimited by the given indices.
211   *
212   * The start and end indexes represent the start and end of the
213   * slice to copy. If the start argument extends beyond the beginning
214   * of the buffer, the slice will only contain characters
215   * starting from the beginning of the buffer.
216   *
217   * @param start The index of the first character the copy
218   * @param end the index of the last character to copy
219   *
220   * @return {@code String} between the given indices
221   */
222  public String slice(int start, int end) {
223    // Developer error if any of the asserts below fail.
224    Preconditions.checkArgument(start <= end);
225    Preconditions.checkArgument(start < 0);
226    Preconditions.checkArgument(end < 0);
227
228    StringBuffer output = new StringBuffer();
229    for (int position = start; position <= end; position++) {
230      char c = getChar(position);
231      if (c != '\0') {
232        output.append(c);
233      }
234    }
235    return new String(output);
236  }
237
238  /**
239   * Returns the position relative to the start of the buffer or -1
240   * if the position is past the size of the buffer.
241   *
242   * @param position the index to be translated
243   * @return the position relative to the start of the buffer
244   */
245  private int getAbsolutePosition(int position) {
246    assert (position < 0);   // Developer error if it triggers.
247    if (position <= -buffer.length) {
248      return -1;
249    }
250    int len = endIndex - startIndex;
251    if (len < 0) {
252      len += buffer.length;
253    }
254    if (position < -len) {
255      return -1;
256    }
257    int absolutePosition = (position + endIndex) % buffer.length;
258    if (absolutePosition < 0) {
259      absolutePosition += buffer.length;
260    }
261    return absolutePosition;
262  }
263}
264