001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: URI.java 468654 2006-10-28 07:09:23Z minchau $
020 */
021 package org.apache.xml.serializer.utils;
022
023 import java.io.IOException;
024 import java.io.Serializable;
025
026
027 /**
028 * A class to represent a Uniform Resource Identifier (URI). This class
029 * is designed to handle the parsing of URIs and provide access to
030 * the various components (scheme, host, port, userinfo, path, query
031 * string and fragment) that may constitute a URI.
032 * <p>
033 * Parsing of a URI specification is done according to the URI
034 * syntax described in RFC 2396
035 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
036 * of a scheme, followed by a colon (':'), followed by a scheme-specific
037 * part. For URIs that follow the "generic URI" syntax, the scheme-
038 * specific part begins with two slashes ("//") and may be followed
039 * by an authority segment (comprised of user information, host, and
040 * port), path segment, query segment and fragment. Note that RFC 2396
041 * no longer specifies the use of the parameters segment and excludes
042 * the "user:password" syntax as part of the authority segment. If
043 * "user:password" appears in a URI, the entire user/password string
044 * is stored as userinfo.
045 * <p>
046 * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
047 * the entire scheme-specific part is treated as the "path" portion
048 * of the URI.
049 * <p>
050 * Note that, unlike the java.net.URL class, this class does not provide
051 * any built-in network access functionality nor does it provide any
052 * scheme-specific functionality (for example, it does not know a
053 * default port for a specific scheme). Rather, it only knows the
054 * grammar and basic set of operations that can be applied to a URI.
055 *
056 * This class is a copy of the one in org.apache.xml.utils.
057 * It exists to cut the serializers dependancy on that package.
058 *
059 * A minor change from the original is that this class no longer implements
060 * Serializable, and the serialVersionUID magic field is dropped, and
061 * the class is no longer "public".
062 *
063 * @xsl.usage internal
064 */
065 final class URI
066 {
067 /**
068 * MalformedURIExceptions are thrown in the process of building a URI
069 * or setting fields on a URI when an operation would result in an
070 * invalid URI specification.
071 *
072 */
073 public static class MalformedURIException extends IOException
074 {
075
076 /**
077 * Constructs a <code>MalformedURIException</code> with no specified
078 * detail message.
079 */
080 public MalformedURIException()
081 {
082 super();
083 }
084
085 /**
086 * Constructs a <code>MalformedURIException</code> with the
087 * specified detail message.
088 *
089 * @param p_msg the detail message.
090 */
091 public MalformedURIException(String p_msg)
092 {
093 super(p_msg);
094 }
095 }
096
097 /** reserved characters */
098 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
099
100 /**
101 * URI punctuation mark characters - these, combined with
102 * alphanumerics, constitute the "unreserved" characters
103 */
104 private static final String MARK_CHARACTERS = "-_.!~*'() ";
105
106 /** scheme can be composed of alphanumerics and these characters */
107 private static final String SCHEME_CHARACTERS = "+-.";
108
109 /**
110 * userinfo can be composed of unreserved, escaped and these
111 * characters
112 */
113 private static final String USERINFO_CHARACTERS = ";:&=+$,";
114
115 /** Stores the scheme (usually the protocol) for this URI.
116 * @serial */
117 private String m_scheme = null;
118
119 /** If specified, stores the userinfo for this URI; otherwise null.
120 * @serial */
121 private String m_userinfo = null;
122
123 /** If specified, stores the host for this URI; otherwise null.
124 * @serial */
125 private String m_host = null;
126
127 /** If specified, stores the port for this URI; otherwise -1.
128 * @serial */
129 private int m_port = -1;
130
131 /** If specified, stores the path for this URI; otherwise null.
132 * @serial */
133 private String m_path = null;
134
135 /**
136 * If specified, stores the query string for this URI; otherwise
137 * null.
138 * @serial
139 */
140 private String m_queryString = null;
141
142 /** If specified, stores the fragment for this URI; otherwise null.
143 * @serial */
144 private String m_fragment = null;
145
146 /** Indicate whether in DEBUG mode */
147 private static boolean DEBUG = false;
148
149 /**
150 * Construct a new and uninitialized URI.
151 */
152 public URI(){}
153
154 /**
155 * Construct a new URI from another URI. All fields for this URI are
156 * set equal to the fields of the URI passed in.
157 *
158 * @param p_other the URI to copy (cannot be null)
159 */
160 public URI(URI p_other)
161 {
162 initialize(p_other);
163 }
164
165 /**
166 * Construct a new URI from a URI specification string. If the
167 * specification follows the "generic URI" syntax, (two slashes
168 * following the first colon), the specification will be parsed
169 * accordingly - setting the scheme, userinfo, host,port, path, query
170 * string and fragment fields as necessary. If the specification does
171 * not follow the "generic URI" syntax, the specification is parsed
172 * into a scheme and scheme-specific part (stored as the path) only.
173 *
174 * @param p_uriSpec the URI specification string (cannot be null or
175 * empty)
176 *
177 * @throws MalformedURIException if p_uriSpec violates any syntax
178 * rules
179 */
180 public URI(String p_uriSpec) throws MalformedURIException
181 {
182 this((URI) null, p_uriSpec);
183 }
184
185 /**
186 * Construct a new URI from a base URI and a URI specification string.
187 * The URI specification string may be a relative URI.
188 *
189 * @param p_base the base URI (cannot be null if p_uriSpec is null or
190 * empty)
191 * @param p_uriSpec the URI specification string (cannot be null or
192 * empty if p_base is null)
193 *
194 * @throws MalformedURIException if p_uriSpec violates any syntax
195 * rules
196 */
197 public URI(URI p_base, String p_uriSpec) throws MalformedURIException
198 {
199 initialize(p_base, p_uriSpec);
200 }
201
202 /**
203 * Construct a new URI that does not follow the generic URI syntax.
204 * Only the scheme and scheme-specific part (stored as the path) are
205 * initialized.
206 *
207 * @param p_scheme the URI scheme (cannot be null or empty)
208 * @param p_schemeSpecificPart the scheme-specific part (cannot be
209 * null or empty)
210 *
211 * @throws MalformedURIException if p_scheme violates any
212 * syntax rules
213 */
214 public URI(String p_scheme, String p_schemeSpecificPart)
215 throws MalformedURIException
216 {
217
218 if (p_scheme == null || p_scheme.trim().length() == 0)
219 {
220 throw new MalformedURIException(
221 "Cannot construct URI with null/empty scheme!");
222 }
223
224 if (p_schemeSpecificPart == null
225 || p_schemeSpecificPart.trim().length() == 0)
226 {
227 throw new MalformedURIException(
228 "Cannot construct URI with null/empty scheme-specific part!");
229 }
230
231 setScheme(p_scheme);
232 setPath(p_schemeSpecificPart);
233 }
234
235 /**
236 * Construct a new URI that follows the generic URI syntax from its
237 * component parts. Each component is validated for syntax and some
238 * basic semantic checks are performed as well. See the individual
239 * setter methods for specifics.
240 *
241 * @param p_scheme the URI scheme (cannot be null or empty)
242 * @param p_host the hostname or IPv4 address for the URI
243 * @param p_path the URI path - if the path contains '?' or '#',
244 * then the query string and/or fragment will be
245 * set from the path; however, if the query and
246 * fragment are specified both in the path and as
247 * separate parameters, an exception is thrown
248 * @param p_queryString the URI query string (cannot be specified
249 * if path is null)
250 * @param p_fragment the URI fragment (cannot be specified if path
251 * is null)
252 *
253 * @throws MalformedURIException if any of the parameters violates
254 * syntax rules or semantic rules
255 */
256 public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment)
257 throws MalformedURIException
258 {
259 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
260 }
261
262 /**
263 * Construct a new URI that follows the generic URI syntax from its
264 * component parts. Each component is validated for syntax and some
265 * basic semantic checks are performed as well. See the individual
266 * setter methods for specifics.
267 *
268 * @param p_scheme the URI scheme (cannot be null or empty)
269 * @param p_userinfo the URI userinfo (cannot be specified if host
270 * is null)
271 * @param p_host the hostname or IPv4 address for the URI
272 * @param p_port the URI port (may be -1 for "unspecified"; cannot
273 * be specified if host is null)
274 * @param p_path the URI path - if the path contains '?' or '#',
275 * then the query string and/or fragment will be
276 * set from the path; however, if the query and
277 * fragment are specified both in the path and as
278 * separate parameters, an exception is thrown
279 * @param p_queryString the URI query string (cannot be specified
280 * if path is null)
281 * @param p_fragment the URI fragment (cannot be specified if path
282 * is null)
283 *
284 * @throws MalformedURIException if any of the parameters violates
285 * syntax rules or semantic rules
286 */
287 public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment)
288 throws MalformedURIException
289 {
290
291 if (p_scheme == null || p_scheme.trim().length() == 0)
292 {
293 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!");
294 }
295
296 if (p_host == null)
297 {
298 if (p_userinfo != null)
299 {
300 throw new MalformedURIException(
301 Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!");
302 }
303
304 if (p_port != -1)
305 {
306 throw new MalformedURIException(
307 Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!");
308 }
309 }
310
311 if (p_path != null)
312 {
313 if (p_path.indexOf('?') != -1 && p_queryString != null)
314 {
315 throw new MalformedURIException(
316 Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!");
317 }
318
319 if (p_path.indexOf('#') != -1 && p_fragment != null)
320 {
321 throw new MalformedURIException(
322 Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!");
323 }
324 }
325
326 setScheme(p_scheme);
327 setHost(p_host);
328 setPort(p_port);
329 setUserinfo(p_userinfo);
330 setPath(p_path);
331 setQueryString(p_queryString);
332 setFragment(p_fragment);
333 }
334
335 /**
336 * Initialize all fields of this URI from another URI.
337 *
338 * @param p_other the URI to copy (cannot be null)
339 */
340 private void initialize(URI p_other)
341 {
342
343 m_scheme = p_other.getScheme();
344 m_userinfo = p_other.getUserinfo();
345 m_host = p_other.getHost();
346 m_port = p_other.getPort();
347 m_path = p_other.getPath();
348 m_queryString = p_other.getQueryString();
349 m_fragment = p_other.getFragment();
350 }
351
352 /**
353 * Initializes this URI from a base URI and a URI specification string.
354 * See RFC 2396 Section 4 and Appendix B for specifications on parsing
355 * the URI and Section 5 for specifications on resolving relative URIs
356 * and relative paths.
357 *
358 * @param p_base the base URI (may be null if p_uriSpec is an absolute
359 * URI)
360 * @param p_uriSpec the URI spec string which may be an absolute or
361 * relative URI (can only be null/empty if p_base
362 * is not null)
363 *
364 * @throws MalformedURIException if p_base is null and p_uriSpec
365 * is not an absolute URI or if
366 * p_uriSpec violates syntax rules
367 */
368 private void initialize(URI p_base, String p_uriSpec)
369 throws MalformedURIException
370 {
371
372 if (p_base == null
373 && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
374 {
375 throw new MalformedURIException(
376 Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
377 }
378
379 // just make a copy of the base if spec is empty
380 if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
381 {
382 initialize(p_base);
383
384 return;
385 }
386
387 String uriSpec = p_uriSpec.trim();
388 int uriSpecLen = uriSpec.length();
389 int index = 0;
390
391 // check for scheme
392 int colonIndex = uriSpec.indexOf(':');
393 if (colonIndex < 0)
394 {
395 if (p_base == null)
396 {
397 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
398 }
399 }
400 else
401 {
402 initializeScheme(uriSpec);
403 uriSpec = uriSpec.substring(colonIndex+1);
404 uriSpecLen = uriSpec.length();
405 }
406
407 // two slashes means generic URI syntax, so we get the authority
408 if (uriSpec.startsWith("//"))
409 {
410 index += 2;
411
412 int startPos = index;
413
414 // get authority - everything up to path, query or fragment
415 char testChar = '\0';
416
417 while (index < uriSpecLen)
418 {
419 testChar = uriSpec.charAt(index);
420
421 if (testChar == '/' || testChar == '?' || testChar == '#')
422 {
423 break;
424 }
425
426 index++;
427 }
428
429 // if we found authority, parse it out, otherwise we set the
430 // host to empty string
431 if (index > startPos)
432 {
433 initializeAuthority(uriSpec.substring(startPos, index));
434 }
435 else
436 {
437 m_host = "";
438 }
439 }
440
441 initializePath(uriSpec.substring(index));
442
443 // Resolve relative URI to base URI - see RFC 2396 Section 5.2
444 // In some cases, it might make more sense to throw an exception
445 // (when scheme is specified is the string spec and the base URI
446 // is also specified, for example), but we're just following the
447 // RFC specifications
448 if (p_base != null)
449 {
450
451 // check to see if this is the current doc - RFC 2396 5.2 #2
452 // note that this is slightly different from the RFC spec in that
453 // we don't include the check for query string being null
454 // - this handles cases where the urispec is just a query
455 // string or a fragment (e.g. "?y" or "#s") -
456 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
457 // identified this as a bug in the RFC
458 if (m_path.length() == 0 && m_scheme == null && m_host == null)
459 {
460 m_scheme = p_base.getScheme();
461 m_userinfo = p_base.getUserinfo();
462 m_host = p_base.getHost();
463 m_port = p_base.getPort();
464 m_path = p_base.getPath();
465
466 if (m_queryString == null)
467 {
468 m_queryString = p_base.getQueryString();
469 }
470
471 return;
472 }
473
474 // check for scheme - RFC 2396 5.2 #3
475 // if we found a scheme, it means absolute URI, so we're done
476 if (m_scheme == null)
477 {
478 m_scheme = p_base.getScheme();
479 }
480
481 // check for authority - RFC 2396 5.2 #4
482 // if we found a host, then we've got a network path, so we're done
483 if (m_host == null)
484 {
485 m_userinfo = p_base.getUserinfo();
486 m_host = p_base.getHost();
487 m_port = p_base.getPort();
488 }
489 else
490 {
491 return;
492 }
493
494 // check for absolute path - RFC 2396 5.2 #5
495 if (m_path.length() > 0 && m_path.startsWith("/"))
496 {
497 return;
498 }
499
500 // if we get to this point, we need to resolve relative path
501 // RFC 2396 5.2 #6
502 String path = new String();
503 String basePath = p_base.getPath();
504
505 // 6a - get all but the last segment of the base URI path
506 if (basePath != null)
507 {
508 int lastSlash = basePath.lastIndexOf('/');
509
510 if (lastSlash != -1)
511 {
512 path = basePath.substring(0, lastSlash + 1);
513 }
514 }
515
516 // 6b - append the relative URI path
517 path = path.concat(m_path);
518
519 // 6c - remove all "./" where "." is a complete path segment
520 index = -1;
521
522 while ((index = path.indexOf("/./")) != -1)
523 {
524 path = path.substring(0, index + 1).concat(path.substring(index + 3));
525 }
526
527 // 6d - remove "." if path ends with "." as a complete path segment
528 if (path.endsWith("/."))
529 {
530 path = path.substring(0, path.length() - 1);
531 }
532
533 // 6e - remove all "<segment>/../" where "<segment>" is a complete
534 // path segment not equal to ".."
535 index = -1;
536
537 int segIndex = -1;
538 String tempString = null;
539
540 while ((index = path.indexOf("/../")) > 0)
541 {
542 tempString = path.substring(0, path.indexOf("/../"));
543 segIndex = tempString.lastIndexOf('/');
544
545 if (segIndex != -1)
546 {
547 if (!tempString.substring(segIndex++).equals(".."))
548 {
549 path = path.substring(0, segIndex).concat(path.substring(index
550 + 4));
551 }
552 }
553 }
554
555 // 6f - remove ending "<segment>/.." where "<segment>" is a
556 // complete path segment
557 if (path.endsWith("/.."))
558 {
559 tempString = path.substring(0, path.length() - 3);
560 segIndex = tempString.lastIndexOf('/');
561
562 if (segIndex != -1)
563 {
564 path = path.substring(0, segIndex + 1);
565 }
566 }
567
568 m_path = path;
569 }
570 }
571
572 /**
573 * Initialize the scheme for this URI from a URI string spec.
574 *
575 * @param p_uriSpec the URI specification (cannot be null)
576 *
577 * @throws MalformedURIException if URI does not have a conformant
578 * scheme
579 */
580 private void initializeScheme(String p_uriSpec) throws MalformedURIException
581 {
582
583 int uriSpecLen = p_uriSpec.length();
584 int index = 0;
585 String scheme = null;
586 char testChar = '\0';
587
588 while (index < uriSpecLen)
589 {
590 testChar = p_uriSpec.charAt(index);
591
592 if (testChar == ':' || testChar == '/' || testChar == '?'
593 || testChar == '#')
594 {
595 break;
596 }
597
598 index++;
599 }
600
601 scheme = p_uriSpec.substring(0, index);
602
603 if (scheme.length() == 0)
604 {
605 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI.");
606 }
607 else
608 {
609 setScheme(scheme);
610 }
611 }
612
613 /**
614 * Initialize the authority (userinfo, host and port) for this
615 * URI from a URI string spec.
616 *
617 * @param p_uriSpec the URI specification (cannot be null)
618 *
619 * @throws MalformedURIException if p_uriSpec violates syntax rules
620 */
621 private void initializeAuthority(String p_uriSpec)
622 throws MalformedURIException
623 {
624
625 int index = 0;
626 int start = 0;
627 int end = p_uriSpec.length();
628 char testChar = '\0';
629 String userinfo = null;
630
631 // userinfo is everything up @
632 if (p_uriSpec.indexOf('@', start) != -1)
633 {
634 while (index < end)
635 {
636 testChar = p_uriSpec.charAt(index);
637
638 if (testChar == '@')
639 {
640 break;
641 }
642
643 index++;
644 }
645
646 userinfo = p_uriSpec.substring(start, index);
647
648 index++;
649 }
650
651 // host is everything up to ':'
652 String host = null;
653
654 start = index;
655
656 while (index < end)
657 {
658 testChar = p_uriSpec.charAt(index);
659
660 if (testChar == ':')
661 {
662 break;
663 }
664
665 index++;
666 }
667
668 host = p_uriSpec.substring(start, index);
669
670 int port = -1;
671
672 if (host.length() > 0)
673 {
674
675 // port
676 if (testChar == ':')
677 {
678 index++;
679
680 start = index;
681
682 while (index < end)
683 {
684 index++;
685 }
686
687 String portStr = p_uriSpec.substring(start, index);
688
689 if (portStr.length() > 0)
690 {
691 for (int i = 0; i < portStr.length(); i++)
692 {
693 if (!isDigit(portStr.charAt(i)))
694 {
695 throw new MalformedURIException(
696 portStr + " is invalid. Port should only contain digits!");
697 }
698 }
699
700 try
701 {
702 port = Integer.parseInt(portStr);
703 }
704 catch (NumberFormatException nfe)
705 {
706
707 // can't happen
708 }
709 }
710 }
711 }
712
713 setHost(host);
714 setPort(port);
715 setUserinfo(userinfo);
716 }
717
718 /**
719 * Initialize the path for this URI from a URI string spec.
720 *
721 * @param p_uriSpec the URI specification (cannot be null)
722 *
723 * @throws MalformedURIException if p_uriSpec violates syntax rules
724 */
725 private void initializePath(String p_uriSpec) throws MalformedURIException
726 {
727
728 if (p_uriSpec == null)
729 {
730 throw new MalformedURIException(
731 "Cannot initialize path from null string!");
732 }
733
734 int index = 0;
735 int start = 0;
736 int end = p_uriSpec.length();
737 char testChar = '\0';
738
739 // path - everything up to query string or fragment
740 while (index < end)
741 {
742 testChar = p_uriSpec.charAt(index);
743
744 if (testChar == '?' || testChar == '#')
745 {
746 break;
747 }
748
749 // check for valid escape sequence
750 if (testChar == '%')
751 {
752 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
753 ||!isHex(p_uriSpec.charAt(index + 2)))
754 {
755 throw new MalformedURIException(
756 Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!");
757 }
758 }
759 else if (!isReservedCharacter(testChar)
760 &&!isUnreservedCharacter(testChar))
761 {
762 if ('\\' != testChar)
763 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{String.valueOf(testChar)})); //"Path contains invalid character: "
764 //+ testChar);
765 }
766
767 index++;
768 }
769
770 m_path = p_uriSpec.substring(start, index);
771
772 // query - starts with ? and up to fragment or end
773 if (testChar == '?')
774 {
775 index++;
776
777 start = index;
778
779 while (index < end)
780 {
781 testChar = p_uriSpec.charAt(index);
782
783 if (testChar == '#')
784 {
785 break;
786 }
787
788 if (testChar == '%')
789 {
790 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
791 ||!isHex(p_uriSpec.charAt(index + 2)))
792 {
793 throw new MalformedURIException(
794 "Query string contains invalid escape sequence!");
795 }
796 }
797 else if (!isReservedCharacter(testChar)
798 &&!isUnreservedCharacter(testChar))
799 {
800 throw new MalformedURIException(
801 "Query string contains invalid character:" + testChar);
802 }
803
804 index++;
805 }
806
807 m_queryString = p_uriSpec.substring(start, index);
808 }
809
810 // fragment - starts with #
811 if (testChar == '#')
812 {
813 index++;
814
815 start = index;
816
817 while (index < end)
818 {
819 testChar = p_uriSpec.charAt(index);
820
821 if (testChar == '%')
822 {
823 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
824 ||!isHex(p_uriSpec.charAt(index + 2)))
825 {
826 throw new MalformedURIException(
827 "Fragment contains invalid escape sequence!");
828 }
829 }
830 else if (!isReservedCharacter(testChar)
831 &&!isUnreservedCharacter(testChar))
832 {
833 throw new MalformedURIException(
834 "Fragment contains invalid character:" + testChar);
835 }
836
837 index++;
838 }
839
840 m_fragment = p_uriSpec.substring(start, index);
841 }
842 }
843
844 /**
845 * Get the scheme for this URI.
846 *
847 * @return the scheme for this URI
848 */
849 public String getScheme()
850 {
851 return m_scheme;
852 }
853
854 /**
855 * Get the scheme-specific part for this URI (everything following the
856 * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
857 *
858 * @return the scheme-specific part for this URI
859 */
860 public String getSchemeSpecificPart()
861 {
862
863 StringBuffer schemespec = new StringBuffer();
864
865 if (m_userinfo != null || m_host != null || m_port != -1)
866 {
867 schemespec.append("//");
868 }
869
870 if (m_userinfo != null)
871 {
872 schemespec.append(m_userinfo);
873 schemespec.append('@');
874 }
875
876 if (m_host != null)
877 {
878 schemespec.append(m_host);
879 }
880
881 if (m_port != -1)
882 {
883 schemespec.append(':');
884 schemespec.append(m_port);
885 }
886
887 if (m_path != null)
888 {
889 schemespec.append((m_path));
890 }
891
892 if (m_queryString != null)
893 {
894 schemespec.append('?');
895 schemespec.append(m_queryString);
896 }
897
898 if (m_fragment != null)
899 {
900 schemespec.append('#');
901 schemespec.append(m_fragment);
902 }
903
904 return schemespec.toString();
905 }
906
907 /**
908 * Get the userinfo for this URI.
909 *
910 * @return the userinfo for this URI (null if not specified).
911 */
912 public String getUserinfo()
913 {
914 return m_userinfo;
915 }
916
917 /**
918 * Get the host for this URI.
919 *
920 * @return the host for this URI (null if not specified).
921 */
922 public String getHost()
923 {
924 return m_host;
925 }
926
927 /**
928 * Get the port for this URI.
929 *
930 * @return the port for this URI (-1 if not specified).
931 */
932 public int getPort()
933 {
934 return m_port;
935 }
936
937 /**
938 * Get the path for this URI (optionally with the query string and
939 * fragment).
940 *
941 * @param p_includeQueryString if true (and query string is not null),
942 * then a "?" followed by the query string
943 * will be appended
944 * @param p_includeFragment if true (and fragment is not null),
945 * then a "#" followed by the fragment
946 * will be appended
947 *
948 * @return the path for this URI possibly including the query string
949 * and fragment
950 */
951 public String getPath(boolean p_includeQueryString,
952 boolean p_includeFragment)
953 {
954
955 StringBuffer pathString = new StringBuffer(m_path);
956
957 if (p_includeQueryString && m_queryString != null)
958 {
959 pathString.append('?');
960 pathString.append(m_queryString);
961 }
962
963 if (p_includeFragment && m_fragment != null)
964 {
965 pathString.append('#');
966 pathString.append(m_fragment);
967 }
968
969 return pathString.toString();
970 }
971
972 /**
973 * Get the path for this URI. Note that the value returned is the path
974 * only and does not include the query string or fragment.
975 *
976 * @return the path for this URI.
977 */
978 public String getPath()
979 {
980 return m_path;
981 }
982
983 /**
984 * Get the query string for this URI.
985 *
986 * @return the query string for this URI. Null is returned if there
987 * was no "?" in the URI spec, empty string if there was a
988 * "?" but no query string following it.
989 */
990 public String getQueryString()
991 {
992 return m_queryString;
993 }
994
995 /**
996 * Get the fragment for this URI.
997 *
998 * @return the fragment for this URI. Null is returned if there
999 * was no "#" in the URI spec, empty string if there was a
1000 * "#" but no fragment following it.
1001 */
1002 public String getFragment()
1003 {
1004 return m_fragment;
1005 }
1006
1007 /**
1008 * Set the scheme for this URI. The scheme is converted to lowercase
1009 * before it is set.
1010 *
1011 * @param p_scheme the scheme for this URI (cannot be null)
1012 *
1013 * @throws MalformedURIException if p_scheme is not a conformant
1014 * scheme name
1015 */
1016 public void setScheme(String p_scheme) throws MalformedURIException
1017 {
1018
1019 if (p_scheme == null)
1020 {
1021 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!");
1022 }
1023
1024 if (!isConformantSchemeName(p_scheme))
1025 {
1026 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant.");
1027 }
1028
1029 m_scheme = p_scheme.toLowerCase();
1030 }
1031
1032 /**
1033 * Set the userinfo for this URI. If a non-null value is passed in and
1034 * the host value is null, then an exception is thrown.
1035 *
1036 * @param p_userinfo the userinfo for this URI
1037 *
1038 * @throws MalformedURIException if p_userinfo contains invalid
1039 * characters
1040 */
1041 public void setUserinfo(String p_userinfo) throws MalformedURIException
1042 {
1043
1044 if (p_userinfo == null)
1045 {
1046 m_userinfo = null;
1047 }
1048 else
1049 {
1050 if (m_host == null)
1051 {
1052 throw new MalformedURIException(
1053 "Userinfo cannot be set when host is null!");
1054 }
1055
1056 // userinfo can contain alphanumerics, mark characters, escaped
1057 // and ';',':','&','=','+','$',','
1058 int index = 0;
1059 int end = p_userinfo.length();
1060 char testChar = '\0';
1061
1062 while (index < end)
1063 {
1064 testChar = p_userinfo.charAt(index);
1065
1066 if (testChar == '%')
1067 {
1068 if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1))
1069 ||!isHex(p_userinfo.charAt(index + 2)))
1070 {
1071 throw new MalformedURIException(
1072 "Userinfo contains invalid escape sequence!");
1073 }
1074 }
1075 else if (!isUnreservedCharacter(testChar)
1076 && USERINFO_CHARACTERS.indexOf(testChar) == -1)
1077 {
1078 throw new MalformedURIException(
1079 "Userinfo contains invalid character:" + testChar);
1080 }
1081
1082 index++;
1083 }
1084 }
1085
1086 m_userinfo = p_userinfo;
1087 }
1088
1089 /**
1090 * Set the host for this URI. If null is passed in, the userinfo
1091 * field is also set to null and the port is set to -1.
1092 *
1093 * @param p_host the host for this URI
1094 *
1095 * @throws MalformedURIException if p_host is not a valid IP
1096 * address or DNS hostname.
1097 */
1098 public void setHost(String p_host) throws MalformedURIException
1099 {
1100
1101 if (p_host == null || p_host.trim().length() == 0)
1102 {
1103 m_host = p_host;
1104 m_userinfo = null;
1105 m_port = -1;
1106 }
1107 else if (!isWellFormedAddress(p_host))
1108 {
1109 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!");
1110 }
1111
1112 m_host = p_host;
1113 }
1114
1115 /**
1116 * Set the port for this URI. -1 is used to indicate that the port is
1117 * not specified, otherwise valid port numbers are between 0 and 65535.
1118 * If a valid port number is passed in and the host field is null,
1119 * an exception is thrown.
1120 *
1121 * @param p_port the port number for this URI
1122 *
1123 * @throws MalformedURIException if p_port is not -1 and not a
1124 * valid port number
1125 */
1126 public void setPort(int p_port) throws MalformedURIException
1127 {
1128
1129 if (p_port >= 0 && p_port <= 65535)
1130 {
1131 if (m_host == null)
1132 {
1133 throw new MalformedURIException(
1134 Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!");
1135 }
1136 }
1137 else if (p_port != -1)
1138 {
1139 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!");
1140 }
1141
1142 m_port = p_port;
1143 }
1144
1145 /**
1146 * Set the path for this URI. If the supplied path is null, then the
1147 * query string and fragment are set to null as well. If the supplied
1148 * path includes a query string and/or fragment, these fields will be
1149 * parsed and set as well. Note that, for URIs following the "generic
1150 * URI" syntax, the path specified should start with a slash.
1151 * For URIs that do not follow the generic URI syntax, this method
1152 * sets the scheme-specific part.
1153 *
1154 * @param p_path the path for this URI (may be null)
1155 *
1156 * @throws MalformedURIException if p_path contains invalid
1157 * characters
1158 */
1159 public void setPath(String p_path) throws MalformedURIException
1160 {
1161
1162 if (p_path == null)
1163 {
1164 m_path = null;
1165 m_queryString = null;
1166 m_fragment = null;
1167 }
1168 else
1169 {
1170 initializePath(p_path);
1171 }
1172 }
1173
1174 /**
1175 * Append to the end of the path of this URI. If the current path does
1176 * not end in a slash and the path to be appended does not begin with
1177 * a slash, a slash will be appended to the current path before the
1178 * new segment is added. Also, if the current path ends in a slash
1179 * and the new segment begins with a slash, the extra slash will be
1180 * removed before the new segment is appended.
1181 *
1182 * @param p_addToPath the new segment to be added to the current path
1183 *
1184 * @throws MalformedURIException if p_addToPath contains syntax
1185 * errors
1186 */
1187 public void appendPath(String p_addToPath) throws MalformedURIException
1188 {
1189
1190 if (p_addToPath == null || p_addToPath.trim().length() == 0)
1191 {
1192 return;
1193 }
1194
1195 if (!isURIString(p_addToPath))
1196 {
1197 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{p_addToPath})); //"Path contains invalid character!");
1198 }
1199
1200 if (m_path == null || m_path.trim().length() == 0)
1201 {
1202 if (p_addToPath.startsWith("/"))
1203 {
1204 m_path = p_addToPath;
1205 }
1206 else
1207 {
1208 m_path = "/" + p_addToPath;
1209 }
1210 }
1211 else if (m_path.endsWith("/"))
1212 {
1213 if (p_addToPath.startsWith("/"))
1214 {
1215 m_path = m_path.concat(p_addToPath.substring(1));
1216 }
1217 else
1218 {
1219 m_path = m_path.concat(p_addToPath);
1220 }
1221 }
1222 else
1223 {
1224 if (p_addToPath.startsWith("/"))
1225 {
1226 m_path = m_path.concat(p_addToPath);
1227 }
1228 else
1229 {
1230 m_path = m_path.concat("/" + p_addToPath);
1231 }
1232 }
1233 }
1234
1235 /**
1236 * Set the query string for this URI. A non-null value is valid only
1237 * if this is an URI conforming to the generic URI syntax and
1238 * the path value is not null.
1239 *
1240 * @param p_queryString the query string for this URI
1241 *
1242 * @throws MalformedURIException if p_queryString is not null and this
1243 * URI does not conform to the generic
1244 * URI syntax or if the path is null
1245 */
1246 public void setQueryString(String p_queryString)
1247 throws MalformedURIException
1248 {
1249
1250 if (p_queryString == null)
1251 {
1252 m_queryString = null;
1253 }
1254 else if (!isGenericURI())
1255 {
1256 throw new MalformedURIException(
1257 "Query string can only be set for a generic URI!");
1258 }
1259 else if (getPath() == null)
1260 {
1261 throw new MalformedURIException(
1262 "Query string cannot be set when path is null!");
1263 }
1264 else if (!isURIString(p_queryString))
1265 {
1266 throw new MalformedURIException(
1267 "Query string contains invalid character!");
1268 }
1269 else
1270 {
1271 m_queryString = p_queryString;
1272 }
1273 }
1274
1275 /**
1276 * Set the fragment for this URI. A non-null value is valid only
1277 * if this is a URI conforming to the generic URI syntax and
1278 * the path value is not null.
1279 *
1280 * @param p_fragment the fragment for this URI
1281 *
1282 * @throws MalformedURIException if p_fragment is not null and this
1283 * URI does not conform to the generic
1284 * URI syntax or if the path is null
1285 */
1286 public void setFragment(String p_fragment) throws MalformedURIException
1287 {
1288
1289 if (p_fragment == null)
1290 {
1291 m_fragment = null;
1292 }
1293 else if (!isGenericURI())
1294 {
1295 throw new MalformedURIException(
1296 Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!");
1297 }
1298 else if (getPath() == null)
1299 {
1300 throw new MalformedURIException(
1301 Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!");
1302 }
1303 else if (!isURIString(p_fragment))
1304 {
1305 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!");
1306 }
1307 else
1308 {
1309 m_fragment = p_fragment;
1310 }
1311 }
1312
1313 /**
1314 * Determines if the passed-in Object is equivalent to this URI.
1315 *
1316 * @param p_test the Object to test for equality.
1317 *
1318 * @return true if p_test is a URI with all values equal to this
1319 * URI, false otherwise
1320 */
1321 public boolean equals(Object p_test)
1322 {
1323
1324 if (p_test instanceof URI)
1325 {
1326 URI testURI = (URI) p_test;
1327
1328 if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals(
1329 testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals(
1330 testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals(
1331 testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals(
1332 testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals(
1333 testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals(
1334 testURI.m_fragment))))
1335 {
1336 return true;
1337 }
1338 }
1339
1340 return false;
1341 }
1342
1343 /**
1344 * Get the URI as a string specification. See RFC 2396 Section 5.2.
1345 *
1346 * @return the URI string specification
1347 */
1348 public String toString()
1349 {
1350
1351 StringBuffer uriSpecString = new StringBuffer();
1352
1353 if (m_scheme != null)
1354 {
1355 uriSpecString.append(m_scheme);
1356 uriSpecString.append(':');
1357 }
1358
1359 uriSpecString.append(getSchemeSpecificPart());
1360
1361 return uriSpecString.toString();
1362 }
1363
1364 /**
1365 * Get the indicator as to whether this URI uses the "generic URI"
1366 * syntax.
1367 *
1368 * @return true if this URI uses the "generic URI" syntax, false
1369 * otherwise
1370 */
1371 public boolean isGenericURI()
1372 {
1373
1374 // presence of the host (whether valid or empty) means
1375 // double-slashes which means generic uri
1376 return (m_host != null);
1377 }
1378
1379 /**
1380 * Determine whether a scheme conforms to the rules for a scheme name.
1381 * A scheme is conformant if it starts with an alphanumeric, and
1382 * contains only alphanumerics, '+','-' and '.'.
1383 *
1384 *
1385 * @param p_scheme The sheme name to check
1386 * @return true if the scheme is conformant, false otherwise
1387 */
1388 public static boolean isConformantSchemeName(String p_scheme)
1389 {
1390
1391 if (p_scheme == null || p_scheme.trim().length() == 0)
1392 {
1393 return false;
1394 }
1395
1396 if (!isAlpha(p_scheme.charAt(0)))
1397 {
1398 return false;
1399 }
1400
1401 char testChar;
1402
1403 for (int i = 1; i < p_scheme.length(); i++)
1404 {
1405 testChar = p_scheme.charAt(i);
1406
1407 if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1)
1408 {
1409 return false;
1410 }
1411 }
1412
1413 return true;
1414 }
1415
1416 /**
1417 * Determine whether a string is syntactically capable of representing
1418 * a valid IPv4 address or the domain name of a network host. A valid
1419 * IPv4 address consists of four decimal digit groups separated by a
1420 * '.'. A hostname consists of domain labels (each of which must
1421 * begin and end with an alphanumeric but may contain '-') separated
1422 * & by a '.'. See RFC 2396 Section 3.2.2.
1423 *
1424 *
1425 * @param p_address The address string to check
1426 * @return true if the string is a syntactically valid IPv4 address
1427 * or hostname
1428 */
1429 public static boolean isWellFormedAddress(String p_address)
1430 {
1431
1432 if (p_address == null)
1433 {
1434 return false;
1435 }
1436
1437 String address = p_address.trim();
1438 int addrLength = address.length();
1439
1440 if (addrLength == 0 || addrLength > 255)
1441 {
1442 return false;
1443 }
1444
1445 if (address.startsWith(".") || address.startsWith("-"))
1446 {
1447 return false;
1448 }
1449
1450 // rightmost domain label starting with digit indicates IP address
1451 // since top level domain label can only start with an alpha
1452 // see RFC 2396 Section 3.2.2
1453 int index = address.lastIndexOf('.');
1454
1455 if (address.endsWith("."))
1456 {
1457 index = address.substring(0, index).lastIndexOf('.');
1458 }
1459
1460 if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1)))
1461 {
1462 char testChar;
1463 int numDots = 0;
1464
1465 // make sure that 1) we see only digits and dot separators, 2) that
1466 // any dot separator is preceded and followed by a digit and
1467 // 3) that we find 3 dots
1468 for (int i = 0; i < addrLength; i++)
1469 {
1470 testChar = address.charAt(i);
1471
1472 if (testChar == '.')
1473 {
1474 if (!isDigit(address.charAt(i - 1))
1475 || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1))))
1476 {
1477 return false;
1478 }
1479
1480 numDots++;
1481 }
1482 else if (!isDigit(testChar))
1483 {
1484 return false;
1485 }
1486 }
1487
1488 if (numDots != 3)
1489 {
1490 return false;
1491 }
1492 }
1493 else
1494 {
1495
1496 // domain labels can contain alphanumerics and '-"
1497 // but must start and end with an alphanumeric
1498 char testChar;
1499
1500 for (int i = 0; i < addrLength; i++)
1501 {
1502 testChar = address.charAt(i);
1503
1504 if (testChar == '.')
1505 {
1506 if (!isAlphanum(address.charAt(i - 1)))
1507 {
1508 return false;
1509 }
1510
1511 if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1)))
1512 {
1513 return false;
1514 }
1515 }
1516 else if (!isAlphanum(testChar) && testChar != '-')
1517 {
1518 return false;
1519 }
1520 }
1521 }
1522
1523 return true;
1524 }
1525
1526 /**
1527 * Determine whether a char is a digit.
1528 *
1529 *
1530 * @param p_char the character to check
1531 * @return true if the char is betweeen '0' and '9', false otherwise
1532 */
1533 private static boolean isDigit(char p_char)
1534 {
1535 return p_char >= '0' && p_char <= '9';
1536 }
1537
1538 /**
1539 * Determine whether a character is a hexadecimal character.
1540 *
1541 *
1542 * @param p_char the character to check
1543 * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1544 * or 'A' and 'F', false otherwise
1545 */
1546 private static boolean isHex(char p_char)
1547 {
1548 return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f')
1549 || (p_char >= 'A' && p_char <= 'F'));
1550 }
1551
1552 /**
1553 * Determine whether a char is an alphabetic character: a-z or A-Z
1554 *
1555 *
1556 * @param p_char the character to check
1557 * @return true if the char is alphabetic, false otherwise
1558 */
1559 private static boolean isAlpha(char p_char)
1560 {
1561 return ((p_char >= 'a' && p_char <= 'z')
1562 || (p_char >= 'A' && p_char <= 'Z'));
1563 }
1564
1565 /**
1566 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1567 *
1568 *
1569 * @param p_char the character to check
1570 * @return true if the char is alphanumeric, false otherwise
1571 */
1572 private static boolean isAlphanum(char p_char)
1573 {
1574 return (isAlpha(p_char) || isDigit(p_char));
1575 }
1576
1577 /**
1578 * Determine whether a character is a reserved character:
1579 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1580 *
1581 *
1582 * @param p_char the character to check
1583 * @return true if the string contains any reserved characters
1584 */
1585 private static boolean isReservedCharacter(char p_char)
1586 {
1587 return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1588 }
1589
1590 /**
1591 * Determine whether a char is an unreserved character.
1592 *
1593 *
1594 * @param p_char the character to check
1595 * @return true if the char is unreserved, false otherwise
1596 */
1597 private static boolean isUnreservedCharacter(char p_char)
1598 {
1599 return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1600 }
1601
1602 /**
1603 * Determine whether a given string contains only URI characters (also
1604 * called "uric" in RFC 2396). uric consist of all reserved
1605 * characters, unreserved characters and escaped characters.
1606 *
1607 *
1608 * @param p_uric URI string
1609 * @return true if the string is comprised of uric, false otherwise
1610 */
1611 private static boolean isURIString(String p_uric)
1612 {
1613
1614 if (p_uric == null)
1615 {
1616 return false;
1617 }
1618
1619 int end = p_uric.length();
1620 char testChar = '\0';
1621
1622 for (int i = 0; i < end; i++)
1623 {
1624 testChar = p_uric.charAt(i);
1625
1626 if (testChar == '%')
1627 {
1628 if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1))
1629 ||!isHex(p_uric.charAt(i + 2)))
1630 {
1631 return false;
1632 }
1633 else
1634 {
1635 i += 2;
1636
1637 continue;
1638 }
1639 }
1640
1641 if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar))
1642 {
1643 continue;
1644 }
1645 else
1646 {
1647 return false;
1648 }
1649 }
1650
1651 return true;
1652 }
1653 }