001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $
020 */
021
022 package org.apache.xml.dtm.ref;
023
024 import java.util.Vector;
025
026 import org.apache.xml.utils.IntVector;
027
028 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
029 * create a stable 1:1 mapping between a set of string values and a set of
030 * integer index values, so the integers can be used to reliably and
031 * uniquely identify (and when necessary retrieve) the strings.</p>
032 *
033 * <p>Design Priorities:
034 * <ul>
035 * <li>String-to-index lookup speed is critical.</li>
036 * <li>Index-to-String lookup speed is slightly less so.</li>
037 * <li>Threadsafety is not guaranteed at this level.
038 * Enforce that in the application if needed.</li>
039 * <li>Storage efficiency is an issue but not a huge one.
040 * It is expected that string pools won't exceed about 2000 entries.</li>
041 * </ul>
042 * </p>
043 *
044 * <p>Implementation detail: A standard Hashtable is relatively
045 * inefficient when looking up primitive int values, especially when
046 * we're already maintaining an int-to-string vector. So I'm
047 * maintaining a simple hash chain within this class.</p>
048 *
049 * <p>NOTE: There is nothing in the code that has a real dependency upon
050 * String. It would work with any object type that implements reliable
051 * .hashCode() and .equals() operations. The API enforces Strings because
052 * it's safer that way, but this could trivially be turned into a general
053 * ObjectPool if one was needed.</p>
054 *
055 * <p>Status: Passed basic test in main().</p>
056 * */
057 public class DTMStringPool
058 {
059 Vector m_intToString;
060 static final int HASHPRIME=101;
061 int[] m_hashStart=new int[HASHPRIME];
062 IntVector m_hashChain;
063 public static final int NULL=-1;
064
065 /**
066 * Create a DTMStringPool using the given chain size
067 *
068 * @param chainSize The size of the hash chain vector
069 */
070 public DTMStringPool(int chainSize)
071 {
072 m_intToString=new Vector();
073 m_hashChain=new IntVector(chainSize);
074 removeAllElements();
075
076 // -sb Add this to force empty strings to be index 0.
077 stringToIndex("");
078 }
079
080 public DTMStringPool()
081 {
082 this(512);
083 }
084
085 public void removeAllElements()
086 {
087 m_intToString.removeAllElements();
088 for(int i=0;i<HASHPRIME;++i)
089 m_hashStart[i]=NULL;
090 m_hashChain.removeAllElements();
091 }
092
093 /** @return string whose value is uniquely identified by this integer index.
094 * @throws java.lang.ArrayIndexOutOfBoundsException
095 * if index doesn't map to a string.
096 * */
097 public String indexToString(int i)
098 throws java.lang.ArrayIndexOutOfBoundsException
099 {
100 if(i==NULL) return null;
101 return (String) m_intToString.elementAt(i);
102 }
103
104 /** @return integer index uniquely identifying the value of this string. */
105 public int stringToIndex(String s)
106 {
107 if(s==null) return NULL;
108
109 int hashslot=s.hashCode()%HASHPRIME;
110 if(hashslot<0) hashslot=-hashslot;
111
112 // Is it one we already know?
113 int hashlast=m_hashStart[hashslot];
114 int hashcandidate=hashlast;
115 while(hashcandidate!=NULL)
116 {
117 if(m_intToString.elementAt(hashcandidate).equals(s))
118 return hashcandidate;
119
120 hashlast=hashcandidate;
121 hashcandidate=m_hashChain.elementAt(hashcandidate);
122 }
123
124 // New value. Add to tables.
125 int newIndex=m_intToString.size();
126 m_intToString.addElement(s);
127
128 m_hashChain.addElement(NULL); // Initialize to no-following-same-hash
129 if(hashlast==NULL) // First for this hash
130 m_hashStart[hashslot]=newIndex;
131 else // Link from previous with same hash
132 m_hashChain.setElementAt(newIndex,hashlast);
133
134 return newIndex;
135 }
136
137 /** Command-line unit test driver. This test relies on the fact that
138 * this version of the pool assigns indices consecutively, starting
139 * from zero, as new unique strings are encountered.
140 */
141 public static void main(String[] args)
142 {
143 String[] word={
144 "Zero","One","Two","Three","Four","Five",
145 "Six","Seven","Eight","Nine","Ten",
146 "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
147 "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
148 "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
149 "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
150 "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
151 "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
152 "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
153
154 DTMStringPool pool=new DTMStringPool();
155
156 System.out.println("If no complaints are printed below, we passed initial test.");
157
158 for(int pass=0;pass<=1;++pass)
159 {
160 int i;
161
162 for(i=0;i<word.length;++i)
163 {
164 int j=pool.stringToIndex(word[i]);
165 if(j!=i)
166 System.out.println("\tMismatch populating pool: assigned "+
167 j+" for create "+i);
168 }
169
170 for(i=0;i<word.length;++i)
171 {
172 int j=pool.stringToIndex(word[i]);
173 if(j!=i)
174 System.out.println("\tMismatch in stringToIndex: returned "+
175 j+" for lookup "+i);
176 }
177
178 for(i=0;i<word.length;++i)
179 {
180 String w=pool.indexToString(i);
181 if(!word[i].equals(w))
182 System.out.println("\tMismatch in indexToString: returned"+
183 w+" for lookup "+i);
184 }
185
186 pool.removeAllElements();
187
188 System.out.println("\nPass "+pass+" complete\n");
189 } // end pass loop
190 }
191 }