001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: ExsltStrings.java 1225758 2011-12-30 05:44:27Z mrglavas $
020 */
021 package org.apache.xalan.lib;
022
023 import java.util.StringTokenizer;
024
025 import javax.xml.parsers.DocumentBuilderFactory;
026 import javax.xml.parsers.ParserConfigurationException;
027
028 import org.apache.xpath.NodeSet;
029 import org.w3c.dom.Document;
030 import org.w3c.dom.Element;
031 import org.w3c.dom.Node;
032 import org.w3c.dom.NodeList;
033 import org.w3c.dom.Text;
034
035 /**
036 * This class contains EXSLT strings extension functions.
037 *
038 * It is accessed by specifying a namespace URI as follows:
039 * <pre>
040 * xmlns:str="http://exslt.org/strings"
041 * </pre>
042 * The documentation for each function has been copied from the relevant
043 * EXSLT Implementer page.
044 *
045 * @see <a href="http://www.exslt.org/">EXSLT</a>
046
047 * @xsl.usage general
048 */
049 public class ExsltStrings extends ExsltBase
050 {
051 /**
052 * The str:align function aligns a string within another string.
053 * <p>
054 * The first argument gives the target string to be aligned. The second argument gives
055 * the padding string within which it is to be aligned.
056 * <p>
057 * If the target string is shorter than the padding string then a range of characters
058 * in the padding string are repaced with those in the target string. Which characters
059 * are replaced depends on the value of the third argument, which gives the type of
060 * alignment. It can be one of 'left', 'right' or 'center'. If no third argument is
061 * given or if it is not one of these values, then it defaults to left alignment.
062 * <p>
063 * With left alignment, the range of characters replaced by the target string begins
064 * with the first character in the padding string. With right alignment, the range of
065 * characters replaced by the target string ends with the last character in the padding
066 * string. With center alignment, the range of characters replaced by the target string
067 * is in the middle of the padding string, such that either the number of unreplaced
068 * characters on either side of the range is the same or there is one less on the left
069 * than there is on the right.
070 * <p>
071 * If the target string is longer than the padding string, then it is truncated to be
072 * the same length as the padding string and returned.
073 *
074 * @param targetStr The target string
075 * @param paddingStr The padding string
076 * @param type The type of alignment
077 *
078 * @return The string after alignment
079 */
080 public static String align(String targetStr, String paddingStr, String type)
081 {
082 if (targetStr.length() >= paddingStr.length())
083 return targetStr.substring(0, paddingStr.length());
084
085 if (type.equals("right"))
086 {
087 return paddingStr.substring(0, paddingStr.length() - targetStr.length()) + targetStr;
088 }
089 else if (type.equals("center"))
090 {
091 int startIndex = (paddingStr.length() - targetStr.length()) / 2;
092 return paddingStr.substring(0, startIndex) + targetStr + paddingStr.substring(startIndex + targetStr.length());
093 }
094 // Default is left
095 else
096 {
097 return targetStr + paddingStr.substring(targetStr.length());
098 }
099 }
100
101 /**
102 * See above
103 */
104 public static String align(String targetStr, String paddingStr)
105 {
106 return align(targetStr, paddingStr, "left");
107 }
108
109 /**
110 * The str:concat function takes a node set and returns the concatenation of the
111 * string values of the nodes in that node set. If the node set is empty, it returns
112 * an empty string.
113 *
114 * @param nl A node set
115 * @return The concatenation of the string values of the nodes in that node set
116 */
117 public static String concat(NodeList nl)
118 {
119 StringBuffer sb = new StringBuffer();
120 for (int i = 0; i < nl.getLength(); i++)
121 {
122 Node node = nl.item(i);
123 String value = toString(node);
124
125 if (value != null && value.length() > 0)
126 sb.append(value);
127 }
128
129 return sb.toString();
130 }
131
132 /**
133 * The str:padding function creates a padding string of a certain length.
134 * The first argument gives the length of the padding string to be created.
135 * The second argument gives a string to be used to create the padding. This
136 * string is repeated as many times as is necessary to create a string of the
137 * length specified by the first argument; if the string is more than a character
138 * long, it may have to be truncated to produce the required length. If no second
139 * argument is specified, it defaults to a space (' '). If the second argument is
140 * an empty string, str:padding returns an empty string.
141 *
142 * @param length The length of the padding string to be created
143 * @param pattern The string to be used as pattern
144 *
145 * @return A padding string of the given length
146 */
147 public static String padding(double length, String pattern)
148 {
149 if (pattern == null || pattern.length() == 0)
150 return "";
151
152 StringBuffer sb = new StringBuffer();
153 int len = (int)length;
154 int numAdded = 0;
155 int index = 0;
156 while (numAdded < len)
157 {
158 if (index == pattern.length())
159 index = 0;
160
161 sb.append(pattern.charAt(index));
162 index++;
163 numAdded++;
164 }
165
166 return sb.toString();
167 }
168
169 /**
170 * See above
171 */
172 public static String padding(double length)
173 {
174 return padding(length, " ");
175 }
176
177 /**
178 * The str:split function splits up a string and returns a node set of token
179 * elements, each containing one token from the string.
180 * <p>
181 * The first argument is the string to be split. The second argument is a pattern
182 * string. The string given by the first argument is split at any occurrence of
183 * this pattern. For example:
184 * <pre>
185 * str:split('a, simple, list', ', ') gives the node set consisting of:
186 *
187 * <token>a</token>
188 * <token>simple</token>
189 * <token>list</token>
190 * </pre>
191 * If the second argument is omitted, the default is the string ' ' (i.e. a space).
192 *
193 * @param str The string to be split
194 * @param pattern The pattern
195 *
196 * @return A node set of split tokens
197 */
198 public static NodeList split(String str, String pattern)
199 {
200
201
202 NodeSet resultSet = new NodeSet();
203 resultSet.setShouldCacheNodes(true);
204
205 boolean done = false;
206 int fromIndex = 0;
207 int matchIndex = 0;
208 String token = null;
209
210 while (!done && fromIndex < str.length())
211 {
212 matchIndex = str.indexOf(pattern, fromIndex);
213 if (matchIndex >= 0)
214 {
215 token = str.substring(fromIndex, matchIndex);
216 fromIndex = matchIndex + pattern.length();
217 }
218 else
219 {
220 done = true;
221 token = str.substring(fromIndex);
222 }
223
224 Document doc = DocumentHolder.m_doc;
225 synchronized (doc)
226 {
227 Element element = doc.createElement("token");
228 Text text = doc.createTextNode(token);
229 element.appendChild(text);
230 resultSet.addNode(element);
231 }
232 }
233
234 return resultSet;
235 }
236
237 /**
238 * See above
239 */
240 public static NodeList split(String str)
241 {
242 return split(str, " ");
243 }
244
245 /**
246 * The str:tokenize function splits up a string and returns a node set of token
247 * elements, each containing one token from the string.
248 * <p>
249 * The first argument is the string to be tokenized. The second argument is a
250 * string consisting of a number of characters. Each character in this string is
251 * taken as a delimiting character. The string given by the first argument is split
252 * at any occurrence of any of these characters. For example:
253 * <pre>
254 * str:tokenize('2001-06-03T11:40:23', '-T:') gives the node set consisting of:
255 *
256 * <token>2001</token>
257 * <token>06</token>
258 * <token>03</token>
259 * <token>11</token>
260 * <token>40</token>
261 * <token>23</token>
262 * </pre>
263 * If the second argument is omitted, the default is the string '	

 '
264 * (i.e. whitespace characters).
265 * <p>
266 * If the second argument is an empty string, the function returns a set of token
267 * elements, each of which holds a single character.
268 * <p>
269 * Note: This one is different from the tokenize extension function in the Xalan
270 * namespace. The one in Xalan returns a set of Text nodes, while this one wraps
271 * the Text nodes inside the token Element nodes.
272 *
273 * @param toTokenize The string to be tokenized
274 * @param delims The delimiter string
275 *
276 * @return A node set of split token elements
277 */
278 public static NodeList tokenize(String toTokenize, String delims)
279 {
280
281
282 NodeSet resultSet = new NodeSet();
283
284 if (delims != null && delims.length() > 0)
285 {
286 StringTokenizer lTokenizer = new StringTokenizer(toTokenize, delims);
287
288 Document doc = DocumentHolder.m_doc;
289 synchronized (doc)
290 {
291 while (lTokenizer.hasMoreTokens())
292 {
293 Element element = doc.createElement("token");
294 element.appendChild(doc.createTextNode(lTokenizer.nextToken()));
295 resultSet.addNode(element);
296 }
297 }
298 }
299 // If the delimiter is an empty string, create one token Element for
300 // every single character.
301 else
302 {
303
304 Document doc = DocumentHolder.m_doc;
305 synchronized (doc)
306 {
307 for (int i = 0; i < toTokenize.length(); i++)
308 {
309 Element element = doc.createElement("token");
310 element.appendChild(doc.createTextNode(toTokenize.substring(i, i+1)));
311 resultSet.addNode(element);
312 }
313 }
314 }
315
316 return resultSet;
317 }
318
319 /**
320 * See above
321 */
322 public static NodeList tokenize(String toTokenize)
323 {
324 return tokenize(toTokenize, " \t\n\r");
325 }
326 /**
327 * This class is not loaded until first referenced (see Java Language
328 * Specification by Gosling/Joy/Steele, section 12.4.1)
329 *
330 * The static members are created when this class is first referenced, as a
331 * lazy initialization not needing checking against null or any
332 * synchronization.
333 *
334 */
335 private static class DocumentHolder
336 {
337 // Reuse the Document object to reduce memory usage.
338 private static final Document m_doc;
339 static {
340 try
341 {
342 m_doc =DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
343 }
344
345 catch(ParserConfigurationException pce)
346 {
347 throw new org.apache.xml.utils.WrappedRuntimeException(pce);
348 }
349
350 }
351 }
352
353 }