156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/* 256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Copyright (C) 2010 Google Inc. 356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Licensed under the Apache License, Version 2.0 (the "License"); 556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * you may not use this file except in compliance with the License. 656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * You may obtain a copy of the License at 756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * http://www.apache.org/licenses/LICENSE-2.0 956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 1056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Unless required by applicable law or agreed to in writing, software 1156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * distributed under the License is distributed on an "AS IS" BASIS, 1256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * See the License for the specific language governing permissions and 1456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * limitations under the License. 1556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 1656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 1756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpackage com.google.streamhtmlparser.util; 1856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 1956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/** 2056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Records (stores) characters supplied one at a time conditional on 2156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * whether recording is currently enabled. 2256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 2356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <p>When {@link #maybeRecord(char)} is called, it will add the 2456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * supplied character to the recording buffer but only if 2556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * recording is in progress. This is useful in our 2656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * {@link com.google.security.streamhtmlparser.HtmlParser} 2756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * as the caller logic to enable/disable recording is decoupled from the logic 2856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * of recording. 2956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 3056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <p>This is a specialized class - of no use to external code - 3156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * which aims to be 100% compatible with the corresponding logic 3256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * in the C-version of the HtmlParser, specifically in 3356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <code>statemachine.c</code>. In particular: 3456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <ul> 3556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>The {@code startRecording()} and {@code stopRecording()} methods 3656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * may be called repeatedly without interleaving since the C version is 3756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * not guaranteed to interleave them. 3856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>There is a size limit to the recording buffer as set in 3956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * {@link #RECORDING_BUFFER_SIZE}. Once the size is 4056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * reached, no further characters are recorded regardless of whether 4156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * recording is currently enabled. 4256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * </ul> 4356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 4456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpublic class CharacterRecorder { 4556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 4656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 4756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * How many characters can be recorded before stopping to accept new 4856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * ones. Set to one less than in the C-version as we do not need 4956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * to reserve a character for the terminating null. 5056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 5156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public static final int RECORDING_BUFFER_SIZE = 255; 5256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 5356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 5456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * This is where characters provided for recording are stored. Given 5556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * that the <code>CharacterRecorder</code> object is re-used, might as well 5656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * allocate the full size from the get-go. 5756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 5856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson private final StringBuilder sb; 5956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 6056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** Holds whether we are currently recording characters or not. */ 6156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson private boolean recording; 6256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 6356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 6456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Constructs an empty character recorder of fixed size currently 6556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * not recording. See {@link #RECORDING_BUFFER_SIZE} for the size. 6656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 6756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public CharacterRecorder() { 6856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb = new StringBuilder(RECORDING_BUFFER_SIZE); 6956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson recording = false; 7056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 7156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 7256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 7356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Constructs a character recorder of fixed size that is a copy 7456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * of the one provided. In particular it has the same recording 7556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * setting and the same contents. 7656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 7756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * @param aCharacterRecorder the {@code CharacterRecorder} to copy 7856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 7956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public CharacterRecorder(CharacterRecorder aCharacterRecorder) { 8056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson recording = aCharacterRecorder.recording; 8156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb = new StringBuilder(RECORDING_BUFFER_SIZE); 8256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb.append(aCharacterRecorder.getContent()); 8356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 8456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 8556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 8656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Enables recording for incoming characters. The recording buffer is cleared 8756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * of content it may have contained. 8856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 8956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void startRecording() { 9056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // This is very fast, no memory (re-) allocation will take place. 9156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb.setLength(0); 9256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson recording = true; 9356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 9456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 9556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 9656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Disables recording further characters. 9756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 9856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void stopRecording() { 9956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson recording = false; 10056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 10156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 10256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 10356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Records the {@code input} if recording is currently on and we 10456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * have space available in the buffer. If recording is not 10556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * currently on, this method will not perform any action. 10656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 10756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * @param input the character to record 10856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 10956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void maybeRecord(char input) { 11056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (recording && (sb.length() < RECORDING_BUFFER_SIZE)) { 11156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb.append(input); 11256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 11356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 11456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 11556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 11656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Empties the underlying storage but does not change the recording 11756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * state [i.e whether we are recording or not incoming characters]. 11856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 11956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void clear() { 12056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb.setLength(0); 12156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 12256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 12356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 12456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Empties the underlying storage and resets the recording indicator 12556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * to indicate we are not recording currently. 12656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 12756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void reset() { 12856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson clear(); 12956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson recording = false; 13056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 13156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 13256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 13356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Returns the characters recorded in a {@code String} form. This 13456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * method has no side-effects, the characters remain stored as is. 13556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 13656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * @return the contents in a {@code String} form 13756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 13856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public String getContent() { 13956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson return sb.toString(); 14056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 14156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 14256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 14356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Returns whether or not we are currently recording incoming characters. 14456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 14556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * @return {@code true} if we are recording, {@code false} otherwise 14656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 14756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public boolean isRecording() { 14856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson return recording; 14956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 15056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 15156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 15256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Returns the full state of the object in a human readable form. The 15356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * format of the returned {@code String} is not specified and is 15456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * subject to change. 15556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 15656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * @return the full state of this object 15756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 15856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson @Override 15956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public String toString() { 16056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson return String.format("In recording: %s; Value: %s", isRecording(), 16156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson sb.toString()); 16256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 16356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson} 164