001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $
020     */
021    
022    package org.apache.xml.dtm.ref;
023    
024    import java.util.Vector;
025    
026    import org.apache.xml.utils.IntVector;
027    
028    /** <p>DTMStringPool is an "interning" mechanism for strings. It will
029     * create a stable 1:1 mapping between a set of string values and a set of
030     * integer index values, so the integers can be used to reliably and
031     * uniquely identify (and when necessary retrieve) the strings.</p>
032     *
033     * <p>Design Priorities:
034     * <ul>
035     * <li>String-to-index lookup speed is critical.</li>
036     * <li>Index-to-String lookup speed is slightly less so.</li>
037     * <li>Threadsafety is not guaranteed at this level.
038     * Enforce that in the application if needed.</li>
039     * <li>Storage efficiency is an issue but not a huge one.
040     * It is expected that string pools won't exceed about 2000 entries.</li>
041     * </ul>
042     * </p>
043     *
044     * <p>Implementation detail: A standard Hashtable is relatively
045     * inefficient when looking up primitive int values, especially when
046     * we're already maintaining an int-to-string vector.  So I'm
047     * maintaining a simple hash chain within this class.</p>
048     *
049     * <p>NOTE: There is nothing in the code that has a real dependency upon
050     * String. It would work with any object type that implements reliable
051     * .hashCode() and .equals() operations. The API enforces Strings because
052     * it's safer that way, but this could trivially be turned into a general
053     * ObjectPool if one was needed.</p>
054     *
055     * <p>Status: Passed basic test in main().</p>
056     * */
057    public class DTMStringPool
058    {
059      Vector m_intToString;
060      static final int HASHPRIME=101;
061      int[] m_hashStart=new int[HASHPRIME];
062      IntVector m_hashChain;
063      public static final int NULL=-1;
064    
065      /**
066       * Create a DTMStringPool using the given chain size
067       * 
068       * @param chainSize The size of the hash chain vector
069       */
070      public DTMStringPool(int chainSize)
071        {
072          m_intToString=new Vector();
073          m_hashChain=new IntVector(chainSize);
074          removeAllElements();
075          
076          // -sb Add this to force empty strings to be index 0.
077          stringToIndex("");
078        }
079      
080      public DTMStringPool()
081        {
082          this(512);        
083        }
084        
085      public void removeAllElements()
086        {
087          m_intToString.removeAllElements();
088          for(int i=0;i<HASHPRIME;++i)
089            m_hashStart[i]=NULL;
090          m_hashChain.removeAllElements();
091        }
092    
093      /** @return string whose value is uniquely identified by this integer index.
094       * @throws java.lang.ArrayIndexOutOfBoundsException
095       *  if index doesn't map to a string.
096       * */ 
097      public String indexToString(int i)
098        throws java.lang.ArrayIndexOutOfBoundsException
099        {
100          if(i==NULL) return null;
101          return (String) m_intToString.elementAt(i);
102        }
103    
104      /** @return integer index uniquely identifying the value of this string. */ 
105      public int stringToIndex(String s)
106        {
107          if(s==null) return NULL;
108          
109          int hashslot=s.hashCode()%HASHPRIME;
110          if(hashslot<0) hashslot=-hashslot;
111    
112          // Is it one we already know?
113          int hashlast=m_hashStart[hashslot];
114          int hashcandidate=hashlast;
115          while(hashcandidate!=NULL)
116            {
117              if(m_intToString.elementAt(hashcandidate).equals(s))
118                return hashcandidate;
119    
120              hashlast=hashcandidate;
121              hashcandidate=m_hashChain.elementAt(hashcandidate);
122            }
123          
124          // New value. Add to tables.
125          int newIndex=m_intToString.size();
126          m_intToString.addElement(s);
127    
128          m_hashChain.addElement(NULL);     // Initialize to no-following-same-hash
129          if(hashlast==NULL)  // First for this hash
130            m_hashStart[hashslot]=newIndex;
131          else // Link from previous with same hash
132            m_hashChain.setElementAt(newIndex,hashlast);
133    
134          return newIndex;
135        }
136    
137      /** Command-line unit test driver. This test relies on the fact that
138       * this version of the pool assigns indices consecutively, starting
139       * from zero, as new unique strings are encountered.
140       */
141      public static void main(String[] args)
142      {
143        String[] word={
144          "Zero","One","Two","Three","Four","Five",
145          "Six","Seven","Eight","Nine","Ten",
146          "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
147          "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
148          "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
149          "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
150          "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
151          "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
152          "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
153    
154        DTMStringPool pool=new DTMStringPool();
155    
156        System.out.println("If no complaints are printed below, we passed initial test.");
157    
158        for(int pass=0;pass<=1;++pass)
159          {
160            int i;
161    
162            for(i=0;i<word.length;++i)
163              {
164                int j=pool.stringToIndex(word[i]);
165                if(j!=i)
166                  System.out.println("\tMismatch populating pool: assigned "+
167                                     j+" for create "+i);
168              }
169    
170            for(i=0;i<word.length;++i)
171              {
172                int j=pool.stringToIndex(word[i]);
173                if(j!=i)
174                  System.out.println("\tMismatch in stringToIndex: returned "+
175                                     j+" for lookup "+i);
176              }
177    
178            for(i=0;i<word.length;++i)
179              {
180                String w=pool.indexToString(i);
181                if(!word[i].equals(w))
182                  System.out.println("\tMismatch in indexToString: returned"+
183                                     w+" for lookup "+i);
184              }
185            
186            pool.removeAllElements();
187            
188            System.out.println("\nPass "+pass+" complete\n");
189          } // end pass loop
190      }
191    }