001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
020 */
021
022 package org.apache.xml.utils;
023
024 /**
025 * This class defines the basic XML character properties. The data
026 * in this class can be used to verify that a character is a valid
027 * XML character or if the character is a space, name start, or name
028 * character.
029 * <p>
030 * A series of convenience methods are supplied to ease the burden
031 * of the developer. Because inlining the checks can improve per
032 * character performance, the tables of character properties are
033 * public. Using the character as an index into the <code>CHARS</code>
034 * array and applying the appropriate mask flag (e.g.
035 * <code>MASK_VALID</code>), yields the same results as calling the
036 * convenience methods. There is one exception: check the comments
037 * for the <code>isValid</code> method for details.
038 *
039 * @author Glenn Marcy, IBM
040 * @author Andy Clark, IBM
041 * @author Eric Ye, IBM
042 * @author Arnaud Le Hors, IBM
043 * @author Rahul Srivastava, Sun Microsystems Inc.
044 *
045 * @version $Id: XMLChar.java 1225426 2011-12-29 04:13:08Z mrglavas $
046 */
047 public class XMLChar {
048
049 //
050 // Constants
051 //
052
053 /** Character flags. */
054 private static final byte[] CHARS = new byte[1 << 16];
055
056 /** Valid character mask. */
057 public static final int MASK_VALID = 0x01;
058
059 /** Space character mask. */
060 public static final int MASK_SPACE = 0x02;
061
062 /** Name start character mask. */
063 public static final int MASK_NAME_START = 0x04;
064
065 /** Name character mask. */
066 public static final int MASK_NAME = 0x08;
067
068 /** Pubid character mask. */
069 public static final int MASK_PUBID = 0x10;
070
071 /**
072 * Content character mask. Special characters are those that can
073 * be considered the start of markup, such as '<' and '&'.
074 * The various newline characters are considered special as well.
075 * All other valid XML characters can be considered content.
076 * <p>
077 * This is an optimization for the inner loop of character scanning.
078 */
079 public static final int MASK_CONTENT = 0x20;
080
081 /** NCName start character mask. */
082 public static final int MASK_NCNAME_START = 0x40;
083
084 /** NCName character mask. */
085 public static final int MASK_NCNAME = 0x80;
086
087 //
088 // Static initialization
089 //
090
091 static {
092
093 //
094 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
095 // [#xE000-#xFFFD] | [#x10000-#x10FFFF]
096 //
097
098 int charRange[] = {
099 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
100 };
101
102 //
103 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
104 //
105
106 int spaceChar[] = {
107 0x0020, 0x0009, 0x000D, 0x000A,
108 };
109
110 //
111 // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
112 // CombiningChar | Extender
113 //
114
115 int nameChar[] = {
116 0x002D, 0x002E, // '-' and '.'
117 };
118
119 //
120 // [5] Name ::= (Letter | '_' | ':') (NameChar)*
121 //
122
123 int nameStartChar[] = {
124 0x003A, 0x005F, // ':' and '_'
125 };
126
127 //
128 // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
129 //
130
131 int pubidChar[] = {
132 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
133 0x005F
134 };
135
136 int pubidRange[] = {
137 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
138 };
139
140 //
141 // [84] Letter ::= BaseChar | Ideographic
142 //
143
144 int letterRange[] = {
145 // BaseChar
146 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
147 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
148 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
149 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
150 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
151 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
152 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
153 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
154 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
155 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
156 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
157 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
158 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
159 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
160 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
161 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
162 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
163 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
164 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
165 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
166 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
167 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
168 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
169 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
170 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
171 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
172 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
173 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
174 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
175 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
176 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
177 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
178 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
179 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
180 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
181 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
182 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
183 0xAC00, 0xD7A3,
184 // Ideographic
185 0x3021, 0x3029, 0x4E00, 0x9FA5,
186 };
187 int letterChar[] = {
188 // BaseChar
189 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
190 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
191 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
192 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
193 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
194 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
195 0x1F5D, 0x1FBE, 0x2126, 0x212E,
196 // Ideographic
197 0x3007,
198 };
199
200 //
201 // [87] CombiningChar ::= ...
202 //
203
204 int combiningCharRange[] = {
205 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
206 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
207 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
208 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
209 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
210 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
211 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
212 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
213 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
214 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
215 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
216 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
217 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
218 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
219 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
220 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
221 0x20D0, 0x20DC, 0x302A, 0x302F,
222 };
223
224 int combiningCharChar[] = {
225 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
226 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
227 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
228 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
229 };
230
231 //
232 // [88] Digit ::= ...
233 //
234
235 int digitRange[] = {
236 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
237 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
238 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
239 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
240 };
241
242 //
243 // [89] Extender ::= ...
244 //
245
246 int extenderRange[] = {
247 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
248 };
249
250 int extenderChar[] = {
251 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
252 };
253
254 //
255 // SpecialChar ::= '<', '&', '\n', '\r', ']'
256 //
257
258 int specialChar[] = {
259 '<', '&', '\n', '\r', ']',
260 };
261
262 //
263 // Initialize
264 //
265
266 // set valid characters
267 for (int i = 0; i < charRange.length; i += 2) {
268 for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
269 CHARS[j] |= MASK_VALID | MASK_CONTENT;
270 }
271 }
272
273 // remove special characters
274 for (int i = 0; i < specialChar.length; i++) {
275 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
276 }
277
278 // set space characters
279 for (int i = 0; i < spaceChar.length; i++) {
280 CHARS[spaceChar[i]] |= MASK_SPACE;
281 }
282
283 // set name start characters
284 for (int i = 0; i < nameStartChar.length; i++) {
285 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
286 MASK_NCNAME_START | MASK_NCNAME;
287 }
288 for (int i = 0; i < letterRange.length; i += 2) {
289 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
290 CHARS[j] |= MASK_NAME_START | MASK_NAME |
291 MASK_NCNAME_START | MASK_NCNAME;
292 }
293 }
294 for (int i = 0; i < letterChar.length; i++) {
295 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
296 MASK_NCNAME_START | MASK_NCNAME;
297 }
298
299 // set name characters
300 for (int i = 0; i < nameChar.length; i++) {
301 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
302 }
303 for (int i = 0; i < digitRange.length; i += 2) {
304 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
305 CHARS[j] |= MASK_NAME | MASK_NCNAME;
306 }
307 }
308 for (int i = 0; i < combiningCharRange.length; i += 2) {
309 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
310 CHARS[j] |= MASK_NAME | MASK_NCNAME;
311 }
312 }
313 for (int i = 0; i < combiningCharChar.length; i++) {
314 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
315 }
316 for (int i = 0; i < extenderRange.length; i += 2) {
317 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
318 CHARS[j] |= MASK_NAME | MASK_NCNAME;
319 }
320 }
321 for (int i = 0; i < extenderChar.length; i++) {
322 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
323 }
324
325 // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
326 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
327
328 // set Pubid characters
329 for (int i = 0; i < pubidChar.length; i++) {
330 CHARS[pubidChar[i]] |= MASK_PUBID;
331 }
332 for (int i = 0; i < pubidRange.length; i += 2) {
333 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
334 CHARS[j] |= MASK_PUBID;
335 }
336 }
337
338 } // <clinit>()
339
340 //
341 // Public static methods
342 //
343
344 /**
345 * Returns true if the specified character is a supplemental character.
346 *
347 * @param c The character to check.
348 */
349 public static boolean isSupplemental(int c) {
350 return (c >= 0x10000 && c <= 0x10FFFF);
351 }
352
353 /**
354 * Returns true the supplemental character corresponding to the given
355 * surrogates.
356 *
357 * @param h The high surrogate.
358 * @param l The low surrogate.
359 */
360 public static int supplemental(char h, char l) {
361 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
362 }
363
364 /**
365 * Returns the high surrogate of a supplemental character
366 *
367 * @param c The supplemental character to "split".
368 */
369 public static char highSurrogate(int c) {
370 return (char) (((c - 0x00010000) >> 10) + 0xD800);
371 }
372
373 /**
374 * Returns the low surrogate of a supplemental character
375 *
376 * @param c The supplemental character to "split".
377 */
378 public static char lowSurrogate(int c) {
379 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
380 }
381
382 /**
383 * Returns whether the given character is a high surrogate
384 *
385 * @param c The character to check.
386 */
387 public static boolean isHighSurrogate(int c) {
388 return (0xD800 <= c && c <= 0xDBFF);
389 }
390
391 /**
392 * Returns whether the given character is a low surrogate
393 *
394 * @param c The character to check.
395 */
396 public static boolean isLowSurrogate(int c) {
397 return (0xDC00 <= c && c <= 0xDFFF);
398 }
399
400
401 /**
402 * Returns true if the specified character is valid. This method
403 * also checks the surrogate character range from 0x10000 to 0x10FFFF.
404 * <p>
405 * If the program chooses to apply the mask directly to the
406 * <code>CHARS</code> array, then they are responsible for checking
407 * the surrogate character range.
408 *
409 * @param c The character to check.
410 */
411 public static boolean isValid(int c) {
412 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
413 (0x10000 <= c && c <= 0x10FFFF);
414 } // isValid(int):boolean
415
416 /**
417 * Returns true if the specified character is invalid.
418 *
419 * @param c The character to check.
420 */
421 public static boolean isInvalid(int c) {
422 return !isValid(c);
423 } // isInvalid(int):boolean
424
425 /**
426 * Returns true if the specified character can be considered content.
427 *
428 * @param c The character to check.
429 */
430 public static boolean isContent(int c) {
431 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
432 (0x10000 <= c && c <= 0x10FFFF);
433 } // isContent(int):boolean
434
435 /**
436 * Returns true if the specified character can be considered markup.
437 * Markup characters include '<', '&', and '%'.
438 *
439 * @param c The character to check.
440 */
441 public static boolean isMarkup(int c) {
442 return c == '<' || c == '&' || c == '%';
443 } // isMarkup(int):boolean
444
445 /**
446 * Returns true if the specified character is a space character
447 * as defined by production [3] in the XML 1.0 specification.
448 *
449 * @param c The character to check.
450 */
451 public static boolean isSpace(int c) {
452 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
453 } // isSpace(int):boolean
454
455 /**
456 * Returns true if the specified character is a valid name start
457 * character as defined by production [5] in the XML 1.0
458 * specification.
459 *
460 * @param c The character to check.
461 */
462 public static boolean isNameStart(int c) {
463 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
464 } // isNameStart(int):boolean
465
466 /**
467 * Returns true if the specified character is a valid name
468 * character as defined by production [4] in the XML 1.0
469 * specification.
470 *
471 * @param c The character to check.
472 */
473 public static boolean isName(int c) {
474 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
475 } // isName(int):boolean
476
477 /**
478 * Returns true if the specified character is a valid NCName start
479 * character as defined by production [4] in Namespaces in XML
480 * recommendation.
481 *
482 * @param c The character to check.
483 */
484 public static boolean isNCNameStart(int c) {
485 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
486 } // isNCNameStart(int):boolean
487
488 /**
489 * Returns true if the specified character is a valid NCName
490 * character as defined by production [5] in Namespaces in XML
491 * recommendation.
492 *
493 * @param c The character to check.
494 */
495 public static boolean isNCName(int c) {
496 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
497 } // isNCName(int):boolean
498
499 /**
500 * Returns true if the specified character is a valid Pubid
501 * character as defined by production [13] in the XML 1.0
502 * specification.
503 *
504 * @param c The character to check.
505 */
506 public static boolean isPubid(int c) {
507 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
508 } // isPubid(int):boolean
509
510 /*
511 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
512 */
513 /**
514 * Check to see if a string is a valid Name according to [5]
515 * in the XML 1.0 Recommendation
516 *
517 * @param name string to check
518 * @return true if name is a valid Name
519 */
520 public static boolean isValidName(String name) {
521 if (name.length() == 0)
522 return false;
523 char ch = name.charAt(0);
524 if( isNameStart(ch) == false)
525 return false;
526 for (int i = 1; i < name.length(); i++ ) {
527 ch = name.charAt(i);
528 if( isName( ch ) == false ){
529 return false;
530 }
531 }
532 return true;
533 } // isValidName(String):boolean
534
535
536 /*
537 * from the namespace rec
538 * [4] NCName ::= (Letter | '_') (NCNameChar)*
539 */
540 /**
541 * Check to see if a string is a valid NCName according to [4]
542 * from the XML Namespaces 1.0 Recommendation
543 *
544 * @param ncName string to check
545 * @return true if name is a valid NCName
546 */
547 public static boolean isValidNCName(String ncName) {
548 if (ncName.length() == 0)
549 return false;
550 char ch = ncName.charAt(0);
551 if( isNCNameStart(ch) == false)
552 return false;
553 for (int i = 1; i < ncName.length(); i++ ) {
554 ch = ncName.charAt(i);
555 if( isNCName( ch ) == false ){
556 return false;
557 }
558 }
559 return true;
560 } // isValidNCName(String):boolean
561
562 /*
563 * [7] Nmtoken ::= (NameChar)+
564 */
565 /**
566 * Check to see if a string is a valid Nmtoken according to [7]
567 * in the XML 1.0 Recommendation
568 *
569 * @param nmtoken string to check
570 * @return true if nmtoken is a valid Nmtoken
571 */
572 public static boolean isValidNmtoken(String nmtoken) {
573 if (nmtoken.length() == 0)
574 return false;
575 for (int i = 0; i < nmtoken.length(); i++ ) {
576 char ch = nmtoken.charAt(i);
577 if( ! isName( ch ) ){
578 return false;
579 }
580 }
581 return true;
582 } // isValidName(String):boolean
583
584
585
586
587
588 // encodings
589
590 /**
591 * Returns true if the encoding name is a valid IANA encoding.
592 * This method does not verify that there is a decoder available
593 * for this encoding, only that the characters are valid for an
594 * IANA encoding name.
595 *
596 * @param ianaEncoding The IANA encoding name.
597 */
598 public static boolean isValidIANAEncoding(String ianaEncoding) {
599 if (ianaEncoding != null) {
600 int length = ianaEncoding.length();
601 if (length > 0) {
602 char c = ianaEncoding.charAt(0);
603 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
604 for (int i = 1; i < length; i++) {
605 c = ianaEncoding.charAt(i);
606 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
607 (c < '0' || c > '9') && c != '.' && c != '_' &&
608 c != '-') {
609 return false;
610 }
611 }
612 return true;
613 }
614 }
615 }
616 return false;
617 } // isValidIANAEncoding(String):boolean
618
619 /**
620 * Returns true if the encoding name is a valid Java encoding.
621 * This method does not verify that there is a decoder available
622 * for this encoding, only that the characters are valid for an
623 * Java encoding name.
624 *
625 * @param javaEncoding The Java encoding name.
626 */
627 public static boolean isValidJavaEncoding(String javaEncoding) {
628 if (javaEncoding != null) {
629 int length = javaEncoding.length();
630 if (length > 0) {
631 for (int i = 1; i < length; i++) {
632 char c = javaEncoding.charAt(i);
633 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
634 (c < '0' || c > '9') && c != '.' && c != '_' &&
635 c != '-') {
636 return false;
637 }
638 }
639 return true;
640 }
641 }
642 return false;
643 } // isValidIANAEncoding(String):boolean
644
645 /**
646 * Simple check to determine if qname is legal. If it returns false
647 * then <param>str</param> is illegal; if it returns true then
648 * <param>str</param> is legal.
649 */
650 public static boolean isValidQName(String str) {
651
652 final int colon = str.indexOf(':');
653
654 if (colon == 0 || colon == str.length() - 1) {
655 return false;
656 }
657
658 if (colon > 0) {
659 final String prefix = str.substring(0,colon);
660 final String localPart = str.substring(colon+1);
661 return isValidNCName(prefix) && isValidNCName(localPart);
662 }
663 else {
664 return isValidNCName(str);
665 }
666 }
667
668 } // class XMLChar