156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/*
256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Copyright (C) 2010 Google Inc.
356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Licensed under the Apache License, Version 2.0 (the "License");
556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * you may not use this file except in compliance with the License.
656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * You may obtain a copy of the License at
756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * http://www.apache.org/licenses/LICENSE-2.0
956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
1056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Unless required by applicable law or agreed to in writing, software
1156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * distributed under the License is distributed on an "AS IS" BASIS,
1256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * See the License for the specific language governing permissions and
1456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * limitations under the License.
1556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
1656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpackage com.google.streamhtmlparser.util;
1856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/**
2056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Records (stores) characters supplied one at a time conditional on
2156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * whether recording is currently enabled.
2256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
2356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <p>When {@link #maybeRecord(char)} is called, it will add the
2456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * supplied character to the recording buffer but only if
2556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * recording is in progress. This is useful in our
2656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * {@link com.google.security.streamhtmlparser.HtmlParser}
2756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * as the caller logic to enable/disable recording is decoupled from the logic
2856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * of recording.
2956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
3056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <p>This is a specialized class - of no use to external code -
3156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * which aims to be 100% compatible with the corresponding logic
3256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * in the C-version of the HtmlParser, specifically in
3356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <code>statemachine.c</code>. In particular:
3456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <ul>
3556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *   <li>The {@code startRecording()} and {@code stopRecording()} methods
3656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *       may be called repeatedly without interleaving since the C version is
3756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *       not guaranteed to interleave them.
3856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *   <li>There is a size limit to the recording buffer as set in
3956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *       {@link #RECORDING_BUFFER_SIZE}. Once the size is
4056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *       reached, no further characters are recorded regardless of whether
4156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *       recording is currently enabled.
4256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * </ul>
4356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
4456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpublic class CharacterRecorder {
4556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
4656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
4756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * How many characters can be recorded before stopping to accept new
4856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * ones. Set to one less than in the C-version as we do not need
4956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * to reserve a character for the terminating null.
5056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
5156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public static final int RECORDING_BUFFER_SIZE = 255;
5256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
5356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
5456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * This is where characters provided for recording are stored. Given
5556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * that the <code>CharacterRecorder</code> object is re-used, might as well
5656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * allocate the full size from the get-go.
5756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
5856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  private final StringBuilder sb;
5956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
6056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /** Holds whether we are currently recording characters or not. */
6156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  private boolean recording;
6256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
6356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
6456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Constructs an empty character recorder of fixed size currently
6556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * not recording. See {@link #RECORDING_BUFFER_SIZE} for the size.
6656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
6756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public CharacterRecorder() {
6856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    sb = new StringBuilder(RECORDING_BUFFER_SIZE);
6956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    recording = false;
7056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
7156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
7256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
7356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Constructs a character recorder of fixed size that is a copy
7456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * of the one provided. In particular it has the same recording
7556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * setting and the same contents.
7656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
7756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @param aCharacterRecorder the {@code CharacterRecorder} to copy
7856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
7956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public CharacterRecorder(CharacterRecorder aCharacterRecorder) {
8056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    recording = aCharacterRecorder.recording;
8156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    sb = new StringBuilder(RECORDING_BUFFER_SIZE);
8256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    sb.append(aCharacterRecorder.getContent());
8356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
8456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
8556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
8656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Enables recording for incoming characters. The recording buffer is cleared
8756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * of content it may have contained.
8856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
8956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void startRecording() {
9056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    // This is very fast, no memory (re-) allocation will take place.
9156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    sb.setLength(0);
9256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    recording = true;
9356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
9456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
9556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
9656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Disables recording further characters.
9756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
9856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void stopRecording() {
9956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    recording = false;
10056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
10156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
10256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
10356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Records the {@code input} if recording is currently on and we
10456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * have space available in the buffer. If recording is not
10556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * currently on, this method will not perform any action.
10656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
10756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @param input the character to record
10856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
10956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void maybeRecord(char input) {
11056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (recording && (sb.length() < RECORDING_BUFFER_SIZE)) {
11156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      sb.append(input);
11256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
11356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
11456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
11556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
11656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Empties the underlying storage but does not change the recording
11756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * state [i.e whether we are recording or not incoming characters].
11856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
11956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void clear() {
12056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    sb.setLength(0);
12156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
12256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
12356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
12456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Empties the underlying storage and resets the recording indicator
12556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * to indicate we are not recording currently.
12656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
12756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void reset() {
12856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    clear();
12956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    recording = false;
13056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
13156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
13256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
13356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Returns the characters recorded in a {@code String} form. This
13456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * method has no side-effects, the characters remain stored as is.
13556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
13656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @return the contents in a {@code String} form
13756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
13856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public String getContent() {
13956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    return sb.toString();
14056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
14156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
14256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
14356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Returns whether or not we are currently recording incoming characters.
14456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
14556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @return {@code true} if we are recording, {@code false} otherwise
14656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
14756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public boolean isRecording() {
14856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    return recording;
14956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
15056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
15156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
15256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Returns the full state of the object in a human readable form. The
15356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * format of the returned {@code String} is not specified and is
15456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * subject to change.
15556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
15656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @return the full state of this object
15756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
15856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  @Override
15956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public String toString() {
16056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    return String.format("In recording: %s; Value: %s", isRecording(),
16156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson                         sb.toString());
16256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
16356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson}
164