001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: ExsltStrings.java 1225758 2011-12-30 05:44:27Z mrglavas $
020     */
021    package org.apache.xalan.lib;
022    
023    import java.util.StringTokenizer;
024    
025    import javax.xml.parsers.DocumentBuilderFactory;
026    import javax.xml.parsers.ParserConfigurationException;
027    
028    import org.apache.xpath.NodeSet;
029    import org.w3c.dom.Document;
030    import org.w3c.dom.Element;
031    import org.w3c.dom.Node;
032    import org.w3c.dom.NodeList;
033    import org.w3c.dom.Text;
034    
035    /**
036     * This class contains EXSLT strings extension functions.
037     *
038     * It is accessed by specifying a namespace URI as follows:
039     * <pre>
040     *    xmlns:str="http://exslt.org/strings"
041     * </pre>
042     * The documentation for each function has been copied from the relevant
043     * EXSLT Implementer page.
044     * 
045     * @see <a href="http://www.exslt.org/">EXSLT</a>
046    
047     * @xsl.usage general
048     */
049    public class ExsltStrings extends ExsltBase
050    {
051      /**
052       * The str:align function aligns a string within another string. 
053       * <p>
054       * The first argument gives the target string to be aligned. The second argument gives 
055       * the padding string within which it is to be aligned. 
056       * <p>
057       * If the target string is shorter than the padding string then a range of characters 
058       * in the padding string are repaced with those in the target string. Which characters 
059       * are replaced depends on the value of the third argument, which gives the type of 
060       * alignment. It can be one of 'left', 'right' or 'center'. If no third argument is 
061       * given or if it is not one of these values, then it defaults to left alignment. 
062       * <p>
063       * With left alignment, the range of characters replaced by the target string begins 
064       * with the first character in the padding string. With right alignment, the range of 
065       * characters replaced by the target string ends with the last character in the padding 
066       * string. With center alignment, the range of characters replaced by the target string 
067       * is in the middle of the padding string, such that either the number of unreplaced 
068       * characters on either side of the range is the same or there is one less on the left 
069       * than there is on the right. 
070       * <p>
071       * If the target string is longer than the padding string, then it is truncated to be 
072       * the same length as the padding string and returned.
073       *
074       * @param targetStr The target string
075       * @param paddingStr The padding string
076       * @param type The type of alignment
077       * 
078       * @return The string after alignment
079       */
080      public static String align(String targetStr, String paddingStr, String type)
081      {
082        if (targetStr.length() >= paddingStr.length())
083          return targetStr.substring(0, paddingStr.length());
084        
085        if (type.equals("right"))
086        {
087          return paddingStr.substring(0, paddingStr.length() - targetStr.length()) + targetStr;
088        }
089        else if (type.equals("center"))
090        {
091          int startIndex = (paddingStr.length() - targetStr.length()) / 2;
092          return paddingStr.substring(0, startIndex) + targetStr + paddingStr.substring(startIndex + targetStr.length());
093        }
094        // Default is left
095        else
096        {
097          return targetStr + paddingStr.substring(targetStr.length());
098        }    
099      }
100    
101      /**
102       * See above
103       */
104      public static String align(String targetStr, String paddingStr)
105      {
106        return align(targetStr, paddingStr, "left");
107      }
108      
109      /**
110       * The str:concat function takes a node set and returns the concatenation of the 
111       * string values of the nodes in that node set. If the node set is empty, it returns 
112       * an empty string.
113       *
114       * @param nl A node set
115       * @return The concatenation of the string values of the nodes in that node set
116       */
117      public static String concat(NodeList nl)
118      {
119        StringBuffer sb = new StringBuffer();
120        for (int i = 0; i < nl.getLength(); i++)
121        {
122          Node node = nl.item(i);
123          String value = toString(node);
124          
125          if (value != null && value.length() > 0)
126            sb.append(value);
127        }
128        
129        return sb.toString();
130      }
131        
132      /**
133       * The str:padding function creates a padding string of a certain length. 
134       * The first argument gives the length of the padding string to be created. 
135       * The second argument gives a string to be used to create the padding. This 
136       * string is repeated as many times as is necessary to create a string of the 
137       * length specified by the first argument; if the string is more than a character 
138       * long, it may have to be truncated to produce the required length. If no second 
139       * argument is specified, it defaults to a space (' '). If the second argument is 
140       * an empty string, str:padding returns an empty string.
141       *
142       * @param length The length of the padding string to be created
143       * @param pattern The string to be used as pattern
144       *
145       * @return A padding string of the given length
146       */
147      public static String padding(double length, String pattern)
148      {
149        if (pattern == null || pattern.length() == 0)
150          return "";
151        
152        StringBuffer sb = new StringBuffer();
153        int len = (int)length;
154        int numAdded = 0;
155        int index = 0;
156        while (numAdded < len)
157        {
158          if (index == pattern.length())
159            index = 0;
160            
161          sb.append(pattern.charAt(index));
162          index++;
163          numAdded++;
164        }
165      
166        return sb.toString();
167      }
168    
169      /**
170       * See above
171       */
172      public static String padding(double length)
173      {
174        return padding(length, " ");
175      }
176        
177      /**
178       * The str:split function splits up a string and returns a node set of token 
179       * elements, each containing one token from the string. 
180       * <p>
181       * The first argument is the string to be split. The second argument is a pattern 
182       * string. The string given by the first argument is split at any occurrence of 
183       * this pattern. For example: 
184       * <pre>
185       * str:split('a, simple, list', ', ') gives the node set consisting of: 
186       *
187       * <token>a</token>
188       * <token>simple</token>
189       * <token>list</token>
190       * </pre>
191       * If the second argument is omitted, the default is the string '&#x20;' (i.e. a space).
192       *
193       * @param str The string to be split
194       * @param pattern The pattern
195       *
196       * @return A node set of split tokens
197       */
198      public static NodeList split(String str, String pattern)
199      {
200    
201        
202        NodeSet resultSet = new NodeSet();
203        resultSet.setShouldCacheNodes(true);
204        
205        boolean done = false;
206        int fromIndex = 0;
207        int matchIndex = 0;
208        String token = null;
209        
210        while (!done && fromIndex < str.length())
211        {
212          matchIndex = str.indexOf(pattern, fromIndex);
213          if (matchIndex >= 0)
214          {
215            token = str.substring(fromIndex, matchIndex);
216            fromIndex = matchIndex + pattern.length();
217          }
218          else
219          {
220            done = true;
221            token = str.substring(fromIndex);
222          }
223    
224          Document doc = DocumentHolder.m_doc;
225          synchronized (doc)
226          {
227            Element element = doc.createElement("token");
228            Text text = doc.createTextNode(token);
229            element.appendChild(text);
230            resultSet.addNode(element);      
231          }
232        }
233        
234        return resultSet;
235      }
236      
237      /**
238       * See above
239       */
240      public static NodeList split(String str)
241      {
242        return split(str, " ");
243      }
244    
245      /**
246       * The str:tokenize function splits up a string and returns a node set of token 
247       * elements, each containing one token from the string. 
248       * <p>
249       * The first argument is the string to be tokenized. The second argument is a 
250       * string consisting of a number of characters. Each character in this string is 
251       * taken as a delimiting character. The string given by the first argument is split 
252       * at any occurrence of any of these characters. For example: 
253       * <pre>
254       * str:tokenize('2001-06-03T11:40:23', '-T:') gives the node set consisting of: 
255       *
256       * <token>2001</token>
257       * <token>06</token>
258       * <token>03</token>
259       * <token>11</token>
260       * <token>40</token>
261       * <token>23</token>
262       * </pre>
263       * If the second argument is omitted, the default is the string '&#x9;&#xA;&#xD;&#x20;' 
264       * (i.e. whitespace characters). 
265       * <p>
266       * If the second argument is an empty string, the function returns a set of token 
267       * elements, each of which holds a single character.
268       * <p>
269       * Note: This one is different from the tokenize extension function in the Xalan
270       * namespace. The one in Xalan returns a set of Text nodes, while this one wraps
271       * the Text nodes inside the token Element nodes.
272       *
273       * @param toTokenize The string to be tokenized
274       * @param delims The delimiter string
275       *
276       * @return A node set of split token elements
277       */
278      public static NodeList tokenize(String toTokenize, String delims)
279      {
280    
281    
282        NodeSet resultSet = new NodeSet();
283        
284        if (delims != null && delims.length() > 0)
285        {
286          StringTokenizer lTokenizer = new StringTokenizer(toTokenize, delims);
287    
288          Document doc = DocumentHolder.m_doc;
289          synchronized (doc)
290          {
291            while (lTokenizer.hasMoreTokens())
292            {
293              Element element = doc.createElement("token");
294              element.appendChild(doc.createTextNode(lTokenizer.nextToken()));
295              resultSet.addNode(element);      
296            }
297          }
298        }
299        // If the delimiter is an empty string, create one token Element for 
300        // every single character.
301        else
302        {
303    
304          Document doc = DocumentHolder.m_doc;
305          synchronized (doc)
306          {
307            for (int i = 0; i < toTokenize.length(); i++)
308            {
309              Element element = doc.createElement("token");
310              element.appendChild(doc.createTextNode(toTokenize.substring(i, i+1)));
311              resultSet.addNode(element);              
312            }
313          }
314        }
315    
316        return resultSet;
317      }
318    
319      /**
320       * See above
321       */
322      public static NodeList tokenize(String toTokenize)
323      {
324        return tokenize(toTokenize, " \t\n\r");
325      }
326        /**
327         * This class is not loaded until first referenced (see Java Language
328         * Specification by Gosling/Joy/Steele, section 12.4.1)
329         *
330         * The static members are created when this class is first referenced, as a
331         * lazy initialization not needing checking against null or any
332         * synchronization.
333         *
334         */
335        private static class DocumentHolder 
336        {
337            // Reuse the Document object to reduce memory usage.
338            private static final Document m_doc;
339            static {
340                try
341                {
342                    m_doc =DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
343                }
344               
345                catch(ParserConfigurationException pce)
346                {
347                      throw new org.apache.xml.utils.WrappedRuntimeException(pce);
348                }
349    
350            }
351        }
352      
353    }