1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.util.ArrayList;
32import java.util.Arrays;
33import java.util.List;
34import java.util.Map;
35import java.util.TreeMap;
36
37/**
38 * A trie used to separate punctuation tokens in a run of non-whitespace
39 * characters by preferring the longest punctuation string possible in a
40 * greedy left-to-right scan.
41 *
42 * @author Mike Samuel <mikesamuel@gmail.com>
43 */
44final class Trie {
45  private final char[] childMap;
46  private final Trie[] children;
47  private final boolean terminal;
48  private final int value;
49
50  /**
51   * @param elements not empty, non null.
52   */
53  public Trie(Map<String, Integer> elements) {
54    this(sortedUniqEntries(elements), 0);
55  }
56
57  private Trie(List<Map.Entry<String, Integer>> elements, int depth) {
58    this(elements, depth, 0, elements.size());
59  }
60
61  /**
62   * @param elements not empty, non null.  Not modified.
63   * @param depth the depth in the tree.
64   * @param start an index into punctuationStrings of the first string in this
65   *   subtree.
66   * @param end an index into punctuationStrings past the last string in this
67   *   subtree.
68   */
69  private Trie(
70      List<Map.Entry<String, Integer>> elements, int depth,
71      int start, int end) {
72    this.terminal = depth == elements.get(start).getKey().length();
73    if (this.terminal) {
74      this.value = elements.get(start).getValue();
75      if (start + 1 == end) {  // base case
76        this.childMap = ZERO_CHARS;
77        this.children = ZERO_TRIES;
78        return;
79      } else {
80        ++start;
81      }
82    } else {
83      this.value = Integer.MAX_VALUE;
84    }
85    int childCount = 0;
86    {
87      int last = -1;
88      for (int i = start; i < end; ++i) {
89        char ch = elements.get(i).getKey().charAt(depth);
90        if (ch != last) {
91          ++childCount;
92          last = ch;
93        }
94      }
95    }
96    this.childMap = new char[childCount];
97    this.children = new Trie[childCount];
98    int childStart = start;
99    int childIndex = 0;
100    char lastCh = elements.get(start).getKey().charAt(depth);
101    for (int i = start + 1; i < end; ++i) {
102      char ch = elements.get(i).getKey().charAt(depth);
103      if (ch != lastCh) {
104        childMap[childIndex] = lastCh;
105        children[childIndex++] = new Trie(
106          elements, depth + 1, childStart, i);
107        childStart = i;
108        lastCh = ch;
109      }
110    }
111    childMap[childIndex] = lastCh;
112    children[childIndex++] = new Trie(elements, depth + 1, childStart, end);
113  }
114
115  /** Does this node correspond to a complete string in the input set. */
116  public boolean isTerminal() { return terminal; }
117
118  public int getValue() { return value; }
119
120  /**
121   * The child corresponding to the given character.
122   * @return null if no such trie.
123   */
124  public Trie lookup(char ch) {
125    int i = Arrays.binarySearch(childMap, ch);
126    return i >= 0 ? children[i] : null;
127  }
128
129  /**
130   * The descendant of this trie corresponding to the string for this trie
131   * appended with s.
132   * @param s non null.
133   * @return null if no such trie.
134   */
135  public Trie lookup(CharSequence s) {
136    Trie t = this;
137    for (int i = 0, n = s.length(); i < n; ++i) {
138      t = t.lookup(s.charAt(i));
139      if (null == t) { break; }
140    }
141    return t;
142  }
143
144  public boolean contains(char ch) {
145    return Arrays.binarySearch(childMap, ch) >= 0;
146  }
147
148  private static <T> List<Map.Entry<String, T>> sortedUniqEntries(
149      Map<String, T> m) {
150    return new ArrayList<Map.Entry<String, T>>(
151        new TreeMap<String, T>(m).entrySet());
152  }
153
154  private static final char[] ZERO_CHARS = new char[0];
155  private static final Trie[] ZERO_TRIES = new Trie[0];
156
157  /**
158   * Append all strings s such that {@code this.lookup(s).isTerminal()} to the
159   * given list in lexical order.
160   */
161  public void toStringList(List<String> strings) {
162    toStringList("", strings);
163  }
164
165  private void toStringList(String prefix, List<String> strings) {
166    if (terminal) { strings.add(prefix); }
167    for (int i = 0, n = childMap.length; i < n; ++i) {
168      children[i].toStringList(prefix + childMap[i], strings);
169    }
170  }
171
172  @Override
173  public String toString() {
174    StringBuilder sb = new StringBuilder();
175    toStringBuilder(0, sb);
176    return sb.toString();
177  }
178
179  private void toStringBuilder(int depth, StringBuilder sb) {
180    sb.append(terminal ? "terminal" : "nonterminal");
181    ++depth;
182    for (int i = 0; i < childMap.length; ++i) {
183      sb.append('\n');
184      for (int d = 0; d < depth; ++d) {
185        sb.append('\t');
186      }
187      sb.append('\'').append(childMap[i]).append("' ");
188      children[i].toStringBuilder(depth, sb);
189    }
190  }
191}
192