1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $
20 */
21
22package org.apache.xml.dtm.ref;
23
24import java.util.Vector;
25
26import org.apache.xml.utils.IntVector;
27
28/** <p>DTMStringPool is an "interning" mechanism for strings. It will
29 * create a stable 1:1 mapping between a set of string values and a set of
30 * integer index values, so the integers can be used to reliably and
31 * uniquely identify (and when necessary retrieve) the strings.</p>
32 *
33 * <p>Design Priorities:
34 * <ul>
35 * <li>String-to-index lookup speed is critical.</li>
36 * <li>Index-to-String lookup speed is slightly less so.</li>
37 * <li>Threadsafety is not guaranteed at this level.
38 * Enforce that in the application if needed.</li>
39 * <li>Storage efficiency is an issue but not a huge one.
40 * It is expected that string pools won't exceed about 2000 entries.</li>
41 * </ul>
42 * </p>
43 *
44 * <p>Implementation detail: A standard Hashtable is relatively
45 * inefficient when looking up primitive int values, especially when
46 * we're already maintaining an int-to-string vector.  So I'm
47 * maintaining a simple hash chain within this class.</p>
48 *
49 * <p>NOTE: There is nothing in the code that has a real dependency upon
50 * String. It would work with any object type that implements reliable
51 * .hashCode() and .equals() operations. The API enforces Strings because
52 * it's safer that way, but this could trivially be turned into a general
53 * ObjectPool if one was needed.</p>
54 *
55 * <p>Status: Passed basic test in main().</p>
56 * */
57public class DTMStringPool
58{
59  Vector m_intToString;
60  static final int HASHPRIME=101;
61  int[] m_hashStart=new int[HASHPRIME];
62  IntVector m_hashChain;
63  public static final int NULL=-1;
64
65  /**
66   * Create a DTMStringPool using the given chain size
67   *
68   * @param chainSize The size of the hash chain vector
69   */
70  public DTMStringPool(int chainSize)
71    {
72      m_intToString=new Vector();
73      m_hashChain=new IntVector(chainSize);
74      removeAllElements();
75
76      // -sb Add this to force empty strings to be index 0.
77      stringToIndex("");
78    }
79
80  public DTMStringPool()
81    {
82      this(512);
83    }
84
85  public void removeAllElements()
86    {
87      m_intToString.removeAllElements();
88      for(int i=0;i<HASHPRIME;++i)
89        m_hashStart[i]=NULL;
90      m_hashChain.removeAllElements();
91    }
92
93  /** @return string whose value is uniquely identified by this integer index.
94   * @throws java.lang.ArrayIndexOutOfBoundsException
95   *  if index doesn't map to a string.
96   * */
97  public String indexToString(int i)
98    throws java.lang.ArrayIndexOutOfBoundsException
99    {
100      if(i==NULL) return null;
101      return (String) m_intToString.elementAt(i);
102    }
103
104  /** @return integer index uniquely identifying the value of this string. */
105  public int stringToIndex(String s)
106    {
107      if(s==null) return NULL;
108
109      int hashslot=s.hashCode()%HASHPRIME;
110      if(hashslot<0) hashslot=-hashslot;
111
112      // Is it one we already know?
113      int hashlast=m_hashStart[hashslot];
114      int hashcandidate=hashlast;
115      while(hashcandidate!=NULL)
116        {
117          if(m_intToString.elementAt(hashcandidate).equals(s))
118            return hashcandidate;
119
120          hashlast=hashcandidate;
121          hashcandidate=m_hashChain.elementAt(hashcandidate);
122        }
123
124      // New value. Add to tables.
125      int newIndex=m_intToString.size();
126      m_intToString.addElement(s);
127
128      m_hashChain.addElement(NULL);	// Initialize to no-following-same-hash
129      if(hashlast==NULL)  // First for this hash
130        m_hashStart[hashslot]=newIndex;
131      else // Link from previous with same hash
132        m_hashChain.setElementAt(newIndex,hashlast);
133
134      return newIndex;
135    }
136
137  /** Command-line unit test driver. This test relies on the fact that
138   * this version of the pool assigns indices consecutively, starting
139   * from zero, as new unique strings are encountered.
140   */
141  public static void main(String[] args)
142  {
143    String[] word={
144      "Zero","One","Two","Three","Four","Five",
145      "Six","Seven","Eight","Nine","Ten",
146      "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
147      "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
148      "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
149      "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
150      "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
151      "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
152      "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
153
154    DTMStringPool pool=new DTMStringPool();
155
156    System.out.println("If no complaints are printed below, we passed initial test.");
157
158    for(int pass=0;pass<=1;++pass)
159      {
160        int i;
161
162        for(i=0;i<word.length;++i)
163          {
164            int j=pool.stringToIndex(word[i]);
165            if(j!=i)
166              System.out.println("\tMismatch populating pool: assigned "+
167                                 j+" for create "+i);
168          }
169
170        for(i=0;i<word.length;++i)
171          {
172            int j=pool.stringToIndex(word[i]);
173            if(j!=i)
174              System.out.println("\tMismatch in stringToIndex: returned "+
175                                 j+" for lookup "+i);
176          }
177
178        for(i=0;i<word.length;++i)
179          {
180            String w=pool.indexToString(i);
181            if(!word[i].equals(w))
182              System.out.println("\tMismatch in indexToString: returned"+
183                                 w+" for lookup "+i);
184          }
185
186        pool.removeAllElements();
187
188        System.out.println("\nPass "+pass+" complete\n");
189      } // end pass loop
190  }
191}
192