001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: URI.java 468654 2006-10-28 07:09:23Z minchau $
020     */
021    package org.apache.xml.serializer.utils;
022    
023    import java.io.IOException;
024    import java.io.Serializable;
025    
026    
027    /**
028     * A class to represent a Uniform Resource Identifier (URI). This class
029     * is designed to handle the parsing of URIs and provide access to
030     * the various components (scheme, host, port, userinfo, path, query
031     * string and fragment) that may constitute a URI.
032     * <p>
033     * Parsing of a URI specification is done according to the URI
034     * syntax described in RFC 2396
035     * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
036     * of a scheme, followed by a colon (':'), followed by a scheme-specific
037     * part. For URIs that follow the "generic URI" syntax, the scheme-
038     * specific part begins with two slashes ("//") and may be followed
039     * by an authority segment (comprised of user information, host, and
040     * port), path segment, query segment and fragment. Note that RFC 2396
041     * no longer specifies the use of the parameters segment and excludes
042     * the "user:password" syntax as part of the authority segment. If
043     * "user:password" appears in a URI, the entire user/password string
044     * is stored as userinfo.
045     * <p>
046     * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
047     * the entire scheme-specific part is treated as the "path" portion
048     * of the URI.
049     * <p>
050     * Note that, unlike the java.net.URL class, this class does not provide
051     * any built-in network access functionality nor does it provide any
052     * scheme-specific functionality (for example, it does not know a
053     * default port for a specific scheme). Rather, it only knows the
054     * grammar and basic set of operations that can be applied to a URI.
055     *
056     * This class is a copy of the one in org.apache.xml.utils. 
057     * It exists to cut the serializers dependancy on that package.
058     * 
059     * A minor change from the original is that this class no longer implements
060     * Serializable, and the serialVersionUID magic field is dropped, and
061     * the class is no longer "public".
062     *  
063     * @xsl.usage internal
064     */
065    final class URI
066    {
067      /**
068       * MalformedURIExceptions are thrown in the process of building a URI
069       * or setting fields on a URI when an operation would result in an
070       * invalid URI specification.
071       *
072       */
073      public static class MalformedURIException extends IOException
074      {
075    
076        /**
077         * Constructs a <code>MalformedURIException</code> with no specified
078         * detail message.
079         */
080        public MalformedURIException()
081        {
082          super();
083        }
084    
085        /**
086         * Constructs a <code>MalformedURIException</code> with the
087         * specified detail message.
088         *
089         * @param p_msg the detail message.
090         */
091        public MalformedURIException(String p_msg)
092        {
093          super(p_msg);
094        }
095      }
096    
097      /** reserved characters */
098      private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
099    
100      /**
101       * URI punctuation mark characters - these, combined with
102       *   alphanumerics, constitute the "unreserved" characters 
103       */
104      private static final String MARK_CHARACTERS = "-_.!~*'() ";
105    
106      /** scheme can be composed of alphanumerics and these characters */
107      private static final String SCHEME_CHARACTERS = "+-.";
108    
109      /**
110       * userinfo can be composed of unreserved, escaped and these
111       *   characters 
112       */
113      private static final String USERINFO_CHARACTERS = ";:&=+$,";
114    
115      /** Stores the scheme (usually the protocol) for this URI.
116       *  @serial */
117      private String m_scheme = null;
118    
119      /** If specified, stores the userinfo for this URI; otherwise null.
120       *  @serial */
121      private String m_userinfo = null;
122    
123      /** If specified, stores the host for this URI; otherwise null.
124       *  @serial */
125      private String m_host = null;
126    
127      /** If specified, stores the port for this URI; otherwise -1.
128       *  @serial */
129      private int m_port = -1;
130    
131      /** If specified, stores the path for this URI; otherwise null.
132       *  @serial */
133      private String m_path = null;
134    
135      /**
136       * If specified, stores the query string for this URI; otherwise
137       *   null. 
138       * @serial 
139       */
140      private String m_queryString = null;
141    
142      /** If specified, stores the fragment for this URI; otherwise null.
143       *  @serial */
144      private String m_fragment = null;
145    
146      /** Indicate whether in DEBUG mode          */
147      private static boolean DEBUG = false;
148    
149      /**
150       * Construct a new and uninitialized URI.
151       */
152      public URI(){}
153    
154      /**
155       * Construct a new URI from another URI. All fields for this URI are
156       * set equal to the fields of the URI passed in.
157       *
158       * @param p_other the URI to copy (cannot be null)
159       */
160      public URI(URI p_other)
161      {
162        initialize(p_other);
163      }
164    
165      /**
166       * Construct a new URI from a URI specification string. If the
167       * specification follows the "generic URI" syntax, (two slashes
168       * following the first colon), the specification will be parsed
169       * accordingly - setting the scheme, userinfo, host,port, path, query
170       * string and fragment fields as necessary. If the specification does
171       * not follow the "generic URI" syntax, the specification is parsed
172       * into a scheme and scheme-specific part (stored as the path) only.
173       *
174       * @param p_uriSpec the URI specification string (cannot be null or
175       *                  empty)
176       *
177       * @throws MalformedURIException if p_uriSpec violates any syntax
178       *                                   rules
179       */
180      public URI(String p_uriSpec) throws MalformedURIException
181      {
182        this((URI) null, p_uriSpec);
183      }
184    
185      /**
186       * Construct a new URI from a base URI and a URI specification string.
187       * The URI specification string may be a relative URI.
188       *
189       * @param p_base the base URI (cannot be null if p_uriSpec is null or
190       *               empty)
191       * @param p_uriSpec the URI specification string (cannot be null or
192       *                  empty if p_base is null)
193       *
194       * @throws MalformedURIException if p_uriSpec violates any syntax
195       *                                  rules
196       */
197      public URI(URI p_base, String p_uriSpec) throws MalformedURIException
198      {
199        initialize(p_base, p_uriSpec);
200      }
201    
202      /**
203       * Construct a new URI that does not follow the generic URI syntax.
204       * Only the scheme and scheme-specific part (stored as the path) are
205       * initialized.
206       *
207       * @param p_scheme the URI scheme (cannot be null or empty)
208       * @param p_schemeSpecificPart the scheme-specific part (cannot be
209       *                             null or empty)
210       *
211       * @throws MalformedURIException if p_scheme violates any
212       *                                  syntax rules
213       */
214      public URI(String p_scheme, String p_schemeSpecificPart)
215              throws MalformedURIException
216      {
217    
218        if (p_scheme == null || p_scheme.trim().length() == 0)
219        {
220          throw new MalformedURIException(
221            "Cannot construct URI with null/empty scheme!");
222        }
223    
224        if (p_schemeSpecificPart == null
225                || p_schemeSpecificPart.trim().length() == 0)
226        {
227          throw new MalformedURIException(
228            "Cannot construct URI with null/empty scheme-specific part!");
229        }
230    
231        setScheme(p_scheme);
232        setPath(p_schemeSpecificPart);
233      }
234    
235      /**
236       * Construct a new URI that follows the generic URI syntax from its
237       * component parts. Each component is validated for syntax and some
238       * basic semantic checks are performed as well.  See the individual
239       * setter methods for specifics.
240       *
241       * @param p_scheme the URI scheme (cannot be null or empty)
242       * @param p_host the hostname or IPv4 address for the URI
243       * @param p_path the URI path - if the path contains '?' or '#',
244       *               then the query string and/or fragment will be
245       *               set from the path; however, if the query and
246       *               fragment are specified both in the path and as
247       *               separate parameters, an exception is thrown
248       * @param p_queryString the URI query string (cannot be specified
249       *                      if path is null)
250       * @param p_fragment the URI fragment (cannot be specified if path
251       *                   is null)
252       *
253       * @throws MalformedURIException if any of the parameters violates
254       *                                  syntax rules or semantic rules
255       */
256      public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment)
257              throws MalformedURIException
258      {
259        this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
260      }
261    
262      /**
263       * Construct a new URI that follows the generic URI syntax from its
264       * component parts. Each component is validated for syntax and some
265       * basic semantic checks are performed as well.  See the individual
266       * setter methods for specifics.
267       *
268       * @param p_scheme the URI scheme (cannot be null or empty)
269       * @param p_userinfo the URI userinfo (cannot be specified if host
270       *                   is null)
271       * @param p_host the hostname or IPv4 address for the URI
272       * @param p_port the URI port (may be -1 for "unspecified"; cannot
273       *               be specified if host is null)
274       * @param p_path the URI path - if the path contains '?' or '#',
275       *               then the query string and/or fragment will be
276       *               set from the path; however, if the query and
277       *               fragment are specified both in the path and as
278       *               separate parameters, an exception is thrown
279       * @param p_queryString the URI query string (cannot be specified
280       *                      if path is null)
281       * @param p_fragment the URI fragment (cannot be specified if path
282       *                   is null)
283       *
284       * @throws MalformedURIException if any of the parameters violates
285       *                                  syntax rules or semantic rules
286       */
287      public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment)
288              throws MalformedURIException
289      {
290    
291        if (p_scheme == null || p_scheme.trim().length() == 0)
292        {
293          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!");
294        }
295    
296        if (p_host == null)
297        {
298          if (p_userinfo != null)
299          {
300            throw new MalformedURIException(
301              Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!");
302          }
303    
304          if (p_port != -1)
305          {
306            throw new MalformedURIException(
307              Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!");
308          }
309        }
310    
311        if (p_path != null)
312        {
313          if (p_path.indexOf('?') != -1 && p_queryString != null)
314          {
315            throw new MalformedURIException(
316              Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!");
317          }
318    
319          if (p_path.indexOf('#') != -1 && p_fragment != null)
320          {
321            throw new MalformedURIException(
322              Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!");
323          }
324        }
325    
326        setScheme(p_scheme);
327        setHost(p_host);
328        setPort(p_port);
329        setUserinfo(p_userinfo);
330        setPath(p_path);
331        setQueryString(p_queryString);
332        setFragment(p_fragment);
333      }
334    
335      /**
336       * Initialize all fields of this URI from another URI.
337       *
338       * @param p_other the URI to copy (cannot be null)
339       */
340      private void initialize(URI p_other)
341      {
342    
343        m_scheme = p_other.getScheme();
344        m_userinfo = p_other.getUserinfo();
345        m_host = p_other.getHost();
346        m_port = p_other.getPort();
347        m_path = p_other.getPath();
348        m_queryString = p_other.getQueryString();
349        m_fragment = p_other.getFragment();
350      }
351    
352      /**
353       * Initializes this URI from a base URI and a URI specification string.
354       * See RFC 2396 Section 4 and Appendix B for specifications on parsing
355       * the URI and Section 5 for specifications on resolving relative URIs
356       * and relative paths.
357       *
358       * @param p_base the base URI (may be null if p_uriSpec is an absolute
359       *               URI)
360       * @param p_uriSpec the URI spec string which may be an absolute or
361       *                  relative URI (can only be null/empty if p_base
362       *                  is not null)
363       *
364       * @throws MalformedURIException if p_base is null and p_uriSpec
365       *                                  is not an absolute URI or if
366       *                                  p_uriSpec violates syntax rules
367       */
368      private void initialize(URI p_base, String p_uriSpec)
369              throws MalformedURIException
370      {
371    
372        if (p_base == null
373                && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
374        {
375          throw new MalformedURIException(
376            Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
377        }
378    
379        // just make a copy of the base if spec is empty
380        if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
381        {
382          initialize(p_base);
383    
384          return;
385        }
386    
387        String uriSpec = p_uriSpec.trim();
388        int uriSpecLen = uriSpec.length();
389        int index = 0;
390    
391        // check for scheme
392        int colonIndex = uriSpec.indexOf(':');
393        if (colonIndex < 0)
394        {
395          if (p_base == null)
396          {
397            throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
398          }
399        }
400        else
401        {
402          initializeScheme(uriSpec);
403          uriSpec = uriSpec.substring(colonIndex+1);
404          uriSpecLen = uriSpec.length();
405        }
406    
407        // two slashes means generic URI syntax, so we get the authority
408        if (uriSpec.startsWith("//"))
409        {
410          index += 2;
411    
412          int startPos = index;
413    
414          // get authority - everything up to path, query or fragment
415          char testChar = '\0';
416    
417          while (index < uriSpecLen)
418          {
419            testChar = uriSpec.charAt(index);
420    
421            if (testChar == '/' || testChar == '?' || testChar == '#')
422            {
423              break;
424            }
425    
426            index++;
427          }
428    
429          // if we found authority, parse it out, otherwise we set the
430          // host to empty string
431          if (index > startPos)
432          {
433            initializeAuthority(uriSpec.substring(startPos, index));
434          }
435          else
436          {
437            m_host = "";
438          }
439        }
440    
441        initializePath(uriSpec.substring(index));
442    
443        // Resolve relative URI to base URI - see RFC 2396 Section 5.2
444        // In some cases, it might make more sense to throw an exception
445        // (when scheme is specified is the string spec and the base URI
446        // is also specified, for example), but we're just following the
447        // RFC specifications 
448        if (p_base != null)
449        {
450    
451          // check to see if this is the current doc - RFC 2396 5.2 #2
452          // note that this is slightly different from the RFC spec in that
453          // we don't include the check for query string being null
454          // - this handles cases where the urispec is just a query
455          // string or a fragment (e.g. "?y" or "#s") - 
456          // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
457          // identified this as a bug in the RFC
458          if (m_path.length() == 0 && m_scheme == null && m_host == null)
459          {
460            m_scheme = p_base.getScheme();
461            m_userinfo = p_base.getUserinfo();
462            m_host = p_base.getHost();
463            m_port = p_base.getPort();
464            m_path = p_base.getPath();
465    
466            if (m_queryString == null)
467            {
468              m_queryString = p_base.getQueryString();
469            }
470    
471            return;
472          }
473    
474          // check for scheme - RFC 2396 5.2 #3
475          // if we found a scheme, it means absolute URI, so we're done
476          if (m_scheme == null)
477          {
478            m_scheme = p_base.getScheme();
479          }
480    
481          // check for authority - RFC 2396 5.2 #4
482          // if we found a host, then we've got a network path, so we're done
483          if (m_host == null)
484          {
485            m_userinfo = p_base.getUserinfo();
486            m_host = p_base.getHost();
487            m_port = p_base.getPort();
488          }
489          else
490          {
491            return;
492          }
493    
494          // check for absolute path - RFC 2396 5.2 #5
495          if (m_path.length() > 0 && m_path.startsWith("/"))
496          {
497            return;
498          }
499    
500          // if we get to this point, we need to resolve relative path
501          // RFC 2396 5.2 #6
502          String path = new String();
503          String basePath = p_base.getPath();
504    
505          // 6a - get all but the last segment of the base URI path
506          if (basePath != null)
507          {
508            int lastSlash = basePath.lastIndexOf('/');
509    
510            if (lastSlash != -1)
511            {
512              path = basePath.substring(0, lastSlash + 1);
513            }
514          }
515    
516          // 6b - append the relative URI path
517          path = path.concat(m_path);
518    
519          // 6c - remove all "./" where "." is a complete path segment
520          index = -1;
521    
522          while ((index = path.indexOf("/./")) != -1)
523          {
524            path = path.substring(0, index + 1).concat(path.substring(index + 3));
525          }
526    
527          // 6d - remove "." if path ends with "." as a complete path segment
528          if (path.endsWith("/."))
529          {
530            path = path.substring(0, path.length() - 1);
531          }
532    
533          // 6e - remove all "<segment>/../" where "<segment>" is a complete 
534          // path segment not equal to ".."
535          index = -1;
536    
537          int segIndex = -1;
538          String tempString = null;
539    
540          while ((index = path.indexOf("/../")) > 0)
541          {
542            tempString = path.substring(0, path.indexOf("/../"));
543            segIndex = tempString.lastIndexOf('/');
544    
545            if (segIndex != -1)
546            {
547              if (!tempString.substring(segIndex++).equals(".."))
548              {
549                path = path.substring(0, segIndex).concat(path.substring(index
550                        + 4));
551              }
552            }
553          }
554    
555          // 6f - remove ending "<segment>/.." where "<segment>" is a 
556          // complete path segment
557          if (path.endsWith("/.."))
558          {
559            tempString = path.substring(0, path.length() - 3);
560            segIndex = tempString.lastIndexOf('/');
561    
562            if (segIndex != -1)
563            {
564              path = path.substring(0, segIndex + 1);
565            }
566          }
567    
568          m_path = path;
569        }
570      }
571    
572      /**
573       * Initialize the scheme for this URI from a URI string spec.
574       *
575       * @param p_uriSpec the URI specification (cannot be null)
576       *
577       * @throws MalformedURIException if URI does not have a conformant
578       *                                  scheme
579       */
580      private void initializeScheme(String p_uriSpec) throws MalformedURIException
581      {
582    
583        int uriSpecLen = p_uriSpec.length();
584        int index = 0;
585        String scheme = null;
586        char testChar = '\0';
587    
588        while (index < uriSpecLen)
589        {
590          testChar = p_uriSpec.charAt(index);
591    
592          if (testChar == ':' || testChar == '/' || testChar == '?'
593                  || testChar == '#')
594          {
595            break;
596          }
597    
598          index++;
599        }
600    
601        scheme = p_uriSpec.substring(0, index);
602    
603        if (scheme.length() == 0)
604        {
605          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI.");
606        }
607        else
608        {
609          setScheme(scheme);
610        }
611      }
612    
613      /**
614       * Initialize the authority (userinfo, host and port) for this
615       * URI from a URI string spec.
616       *
617       * @param p_uriSpec the URI specification (cannot be null)
618       *
619       * @throws MalformedURIException if p_uriSpec violates syntax rules
620       */
621      private void initializeAuthority(String p_uriSpec)
622              throws MalformedURIException
623      {
624    
625        int index = 0;
626        int start = 0;
627        int end = p_uriSpec.length();
628        char testChar = '\0';
629        String userinfo = null;
630    
631        // userinfo is everything up @
632        if (p_uriSpec.indexOf('@', start) != -1)
633        {
634          while (index < end)
635          {
636            testChar = p_uriSpec.charAt(index);
637    
638            if (testChar == '@')
639            {
640              break;
641            }
642    
643            index++;
644          }
645    
646          userinfo = p_uriSpec.substring(start, index);
647    
648          index++;
649        }
650    
651        // host is everything up to ':'
652        String host = null;
653    
654        start = index;
655    
656        while (index < end)
657        {
658          testChar = p_uriSpec.charAt(index);
659    
660          if (testChar == ':')
661          {
662            break;
663          }
664    
665          index++;
666        }
667    
668        host = p_uriSpec.substring(start, index);
669    
670        int port = -1;
671    
672        if (host.length() > 0)
673        {
674    
675          // port
676          if (testChar == ':')
677          {
678            index++;
679    
680            start = index;
681    
682            while (index < end)
683            {
684              index++;
685            }
686    
687            String portStr = p_uriSpec.substring(start, index);
688    
689            if (portStr.length() > 0)
690            {
691              for (int i = 0; i < portStr.length(); i++)
692              {
693                if (!isDigit(portStr.charAt(i)))
694                {
695                  throw new MalformedURIException(
696                    portStr + " is invalid. Port should only contain digits!");
697                }
698              }
699    
700              try
701              {
702                port = Integer.parseInt(portStr);
703              }
704              catch (NumberFormatException nfe)
705              {
706    
707                // can't happen
708              }
709            }
710          }
711        }
712    
713        setHost(host);
714        setPort(port);
715        setUserinfo(userinfo);
716      }
717    
718      /**
719       * Initialize the path for this URI from a URI string spec.
720       *
721       * @param p_uriSpec the URI specification (cannot be null)
722       *
723       * @throws MalformedURIException if p_uriSpec violates syntax rules
724       */
725      private void initializePath(String p_uriSpec) throws MalformedURIException
726      {
727    
728        if (p_uriSpec == null)
729        {
730          throw new MalformedURIException(
731            "Cannot initialize path from null string!");
732        }
733    
734        int index = 0;
735        int start = 0;
736        int end = p_uriSpec.length();
737        char testChar = '\0';
738    
739        // path - everything up to query string or fragment
740        while (index < end)
741        {
742          testChar = p_uriSpec.charAt(index);
743    
744          if (testChar == '?' || testChar == '#')
745          {
746            break;
747          }
748    
749          // check for valid escape sequence
750          if (testChar == '%')
751          {
752            if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
753                    ||!isHex(p_uriSpec.charAt(index + 2)))
754            {
755              throw new MalformedURIException(
756                Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!");
757            }
758          }
759          else if (!isReservedCharacter(testChar)
760                   &&!isUnreservedCharacter(testChar))
761          {
762            if ('\\' != testChar)
763              throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{String.valueOf(testChar)})); //"Path contains invalid character: "
764                                              //+ testChar);
765          }
766    
767          index++;
768        }
769    
770        m_path = p_uriSpec.substring(start, index);
771    
772        // query - starts with ? and up to fragment or end
773        if (testChar == '?')
774        {
775          index++;
776    
777          start = index;
778    
779          while (index < end)
780          {
781            testChar = p_uriSpec.charAt(index);
782    
783            if (testChar == '#')
784            {
785              break;
786            }
787    
788            if (testChar == '%')
789            {
790              if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
791                      ||!isHex(p_uriSpec.charAt(index + 2)))
792              {
793                throw new MalformedURIException(
794                  "Query string contains invalid escape sequence!");
795              }
796            }
797            else if (!isReservedCharacter(testChar)
798                     &&!isUnreservedCharacter(testChar))
799            {
800              throw new MalformedURIException(
801                "Query string contains invalid character:" + testChar);
802            }
803    
804            index++;
805          }
806    
807          m_queryString = p_uriSpec.substring(start, index);
808        }
809    
810        // fragment - starts with #
811        if (testChar == '#')
812        {
813          index++;
814    
815          start = index;
816    
817          while (index < end)
818          {
819            testChar = p_uriSpec.charAt(index);
820    
821            if (testChar == '%')
822            {
823              if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
824                      ||!isHex(p_uriSpec.charAt(index + 2)))
825              {
826                throw new MalformedURIException(
827                  "Fragment contains invalid escape sequence!");
828              }
829            }
830            else if (!isReservedCharacter(testChar)
831                     &&!isUnreservedCharacter(testChar))
832            {
833              throw new MalformedURIException(
834                "Fragment contains invalid character:" + testChar);
835            }
836    
837            index++;
838          }
839    
840          m_fragment = p_uriSpec.substring(start, index);
841        }
842      }
843    
844      /**
845       * Get the scheme for this URI.
846       *
847       * @return the scheme for this URI
848       */
849      public String getScheme()
850      {
851        return m_scheme;
852      }
853    
854      /**
855       * Get the scheme-specific part for this URI (everything following the
856       * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
857       *
858       * @return the scheme-specific part for this URI
859       */
860      public String getSchemeSpecificPart()
861      {
862    
863        StringBuffer schemespec = new StringBuffer();
864    
865        if (m_userinfo != null || m_host != null || m_port != -1)
866        {
867          schemespec.append("//");
868        }
869    
870        if (m_userinfo != null)
871        {
872          schemespec.append(m_userinfo);
873          schemespec.append('@');
874        }
875    
876        if (m_host != null)
877        {
878          schemespec.append(m_host);
879        }
880    
881        if (m_port != -1)
882        {
883          schemespec.append(':');
884          schemespec.append(m_port);
885        }
886    
887        if (m_path != null)
888        {
889          schemespec.append((m_path));
890        }
891    
892        if (m_queryString != null)
893        {
894          schemespec.append('?');
895          schemespec.append(m_queryString);
896        }
897    
898        if (m_fragment != null)
899        {
900          schemespec.append('#');
901          schemespec.append(m_fragment);
902        }
903    
904        return schemespec.toString();
905      }
906    
907      /**
908       * Get the userinfo for this URI.
909       *
910       * @return the userinfo for this URI (null if not specified).
911       */
912      public String getUserinfo()
913      {
914        return m_userinfo;
915      }
916    
917      /**
918       * Get the host for this URI.
919       *
920       * @return the host for this URI (null if not specified).
921       */
922      public String getHost()
923      {
924        return m_host;
925      }
926    
927      /**
928       * Get the port for this URI.
929       *
930       * @return the port for this URI (-1 if not specified).
931       */
932      public int getPort()
933      {
934        return m_port;
935      }
936    
937      /**
938       * Get the path for this URI (optionally with the query string and
939       * fragment).
940       *
941       * @param p_includeQueryString if true (and query string is not null),
942       *                             then a "?" followed by the query string
943       *                             will be appended
944       * @param p_includeFragment if true (and fragment is not null),
945       *                             then a "#" followed by the fragment
946       *                             will be appended
947       *
948       * @return the path for this URI possibly including the query string
949       *         and fragment
950       */
951      public String getPath(boolean p_includeQueryString,
952                            boolean p_includeFragment)
953      {
954    
955        StringBuffer pathString = new StringBuffer(m_path);
956    
957        if (p_includeQueryString && m_queryString != null)
958        {
959          pathString.append('?');
960          pathString.append(m_queryString);
961        }
962    
963        if (p_includeFragment && m_fragment != null)
964        {
965          pathString.append('#');
966          pathString.append(m_fragment);
967        }
968    
969        return pathString.toString();
970      }
971    
972      /**
973       * Get the path for this URI. Note that the value returned is the path
974       * only and does not include the query string or fragment.
975       *
976       * @return the path for this URI.
977       */
978      public String getPath()
979      {
980        return m_path;
981      }
982    
983      /**
984       * Get the query string for this URI.
985       *
986       * @return the query string for this URI. Null is returned if there
987       *         was no "?" in the URI spec, empty string if there was a
988       *         "?" but no query string following it.
989       */
990      public String getQueryString()
991      {
992        return m_queryString;
993      }
994    
995      /**
996       * Get the fragment for this URI.
997       *
998       * @return the fragment for this URI. Null is returned if there
999       *         was no "#" in the URI spec, empty string if there was a
1000       *         "#" but no fragment following it.
1001       */
1002      public String getFragment()
1003      {
1004        return m_fragment;
1005      }
1006    
1007      /**
1008       * Set the scheme for this URI. The scheme is converted to lowercase
1009       * before it is set.
1010       *
1011       * @param p_scheme the scheme for this URI (cannot be null)
1012       *
1013       * @throws MalformedURIException if p_scheme is not a conformant
1014       *                                  scheme name
1015       */
1016      public void setScheme(String p_scheme) throws MalformedURIException
1017      {
1018    
1019        if (p_scheme == null)
1020        {
1021          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!");
1022        }
1023    
1024        if (!isConformantSchemeName(p_scheme))
1025        {
1026          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant.");
1027        }
1028    
1029        m_scheme = p_scheme.toLowerCase();
1030      }
1031    
1032      /**
1033       * Set the userinfo for this URI. If a non-null value is passed in and
1034       * the host value is null, then an exception is thrown.
1035       *
1036       * @param p_userinfo the userinfo for this URI
1037       *
1038       * @throws MalformedURIException if p_userinfo contains invalid
1039       *                                  characters
1040       */
1041      public void setUserinfo(String p_userinfo) throws MalformedURIException
1042      {
1043    
1044        if (p_userinfo == null)
1045        {
1046          m_userinfo = null;
1047        }
1048        else
1049        {
1050          if (m_host == null)
1051          {
1052            throw new MalformedURIException(
1053              "Userinfo cannot be set when host is null!");
1054          }
1055    
1056          // userinfo can contain alphanumerics, mark characters, escaped
1057          // and ';',':','&','=','+','$',','
1058          int index = 0;
1059          int end = p_userinfo.length();
1060          char testChar = '\0';
1061    
1062          while (index < end)
1063          {
1064            testChar = p_userinfo.charAt(index);
1065    
1066            if (testChar == '%')
1067            {
1068              if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1))
1069                      ||!isHex(p_userinfo.charAt(index + 2)))
1070              {
1071                throw new MalformedURIException(
1072                  "Userinfo contains invalid escape sequence!");
1073              }
1074            }
1075            else if (!isUnreservedCharacter(testChar)
1076                     && USERINFO_CHARACTERS.indexOf(testChar) == -1)
1077            {
1078              throw new MalformedURIException(
1079                "Userinfo contains invalid character:" + testChar);
1080            }
1081    
1082            index++;
1083          }
1084        }
1085    
1086        m_userinfo = p_userinfo;
1087      }
1088    
1089      /**
1090       * Set the host for this URI. If null is passed in, the userinfo
1091       * field is also set to null and the port is set to -1.
1092       *
1093       * @param p_host the host for this URI
1094       *
1095       * @throws MalformedURIException if p_host is not a valid IP
1096       *                                  address or DNS hostname.
1097       */
1098      public void setHost(String p_host) throws MalformedURIException
1099      {
1100    
1101        if (p_host == null || p_host.trim().length() == 0)
1102        {
1103          m_host = p_host;
1104          m_userinfo = null;
1105          m_port = -1;
1106        }
1107        else if (!isWellFormedAddress(p_host))
1108        {
1109          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!");
1110        }
1111    
1112        m_host = p_host;
1113      }
1114    
1115      /**
1116       * Set the port for this URI. -1 is used to indicate that the port is
1117       * not specified, otherwise valid port numbers are  between 0 and 65535.
1118       * If a valid port number is passed in and the host field is null,
1119       * an exception is thrown.
1120       *
1121       * @param p_port the port number for this URI
1122       *
1123       * @throws MalformedURIException if p_port is not -1 and not a
1124       *                                  valid port number
1125       */
1126      public void setPort(int p_port) throws MalformedURIException
1127      {
1128    
1129        if (p_port >= 0 && p_port <= 65535)
1130        {
1131          if (m_host == null)
1132          {
1133            throw new MalformedURIException(
1134              Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!");
1135          }
1136        }
1137        else if (p_port != -1)
1138        {
1139          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!");
1140        }
1141    
1142        m_port = p_port;
1143      }
1144    
1145      /**
1146       * Set the path for this URI. If the supplied path is null, then the
1147       * query string and fragment are set to null as well. If the supplied
1148       * path includes a query string and/or fragment, these fields will be
1149       * parsed and set as well. Note that, for URIs following the "generic
1150       * URI" syntax, the path specified should start with a slash.
1151       * For URIs that do not follow the generic URI syntax, this method
1152       * sets the scheme-specific part.
1153       *
1154       * @param p_path the path for this URI (may be null)
1155       *
1156       * @throws MalformedURIException if p_path contains invalid
1157       *                                  characters
1158       */
1159      public void setPath(String p_path) throws MalformedURIException
1160      {
1161    
1162        if (p_path == null)
1163        {
1164          m_path = null;
1165          m_queryString = null;
1166          m_fragment = null;
1167        }
1168        else
1169        {
1170          initializePath(p_path);
1171        }
1172      }
1173    
1174      /**
1175       * Append to the end of the path of this URI. If the current path does
1176       * not end in a slash and the path to be appended does not begin with
1177       * a slash, a slash will be appended to the current path before the
1178       * new segment is added. Also, if the current path ends in a slash
1179       * and the new segment begins with a slash, the extra slash will be
1180       * removed before the new segment is appended.
1181       *
1182       * @param p_addToPath the new segment to be added to the current path
1183       *
1184       * @throws MalformedURIException if p_addToPath contains syntax
1185       *                                  errors
1186       */
1187      public void appendPath(String p_addToPath) throws MalformedURIException
1188      {
1189    
1190        if (p_addToPath == null || p_addToPath.trim().length() == 0)
1191        {
1192          return;
1193        }
1194    
1195        if (!isURIString(p_addToPath))
1196        {
1197          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{p_addToPath})); //"Path contains invalid character!");
1198        }
1199    
1200        if (m_path == null || m_path.trim().length() == 0)
1201        {
1202          if (p_addToPath.startsWith("/"))
1203          {
1204            m_path = p_addToPath;
1205          }
1206          else
1207          {
1208            m_path = "/" + p_addToPath;
1209          }
1210        }
1211        else if (m_path.endsWith("/"))
1212        {
1213          if (p_addToPath.startsWith("/"))
1214          {
1215            m_path = m_path.concat(p_addToPath.substring(1));
1216          }
1217          else
1218          {
1219            m_path = m_path.concat(p_addToPath);
1220          }
1221        }
1222        else
1223        {
1224          if (p_addToPath.startsWith("/"))
1225          {
1226            m_path = m_path.concat(p_addToPath);
1227          }
1228          else
1229          {
1230            m_path = m_path.concat("/" + p_addToPath);
1231          }
1232        }
1233      }
1234    
1235      /**
1236       * Set the query string for this URI. A non-null value is valid only
1237       * if this is an URI conforming to the generic URI syntax and
1238       * the path value is not null.
1239       *
1240       * @param p_queryString the query string for this URI
1241       *
1242       * @throws MalformedURIException if p_queryString is not null and this
1243       *                                  URI does not conform to the generic
1244       *                                  URI syntax or if the path is null
1245       */
1246      public void setQueryString(String p_queryString)
1247              throws MalformedURIException
1248      {
1249    
1250        if (p_queryString == null)
1251        {
1252          m_queryString = null;
1253        }
1254        else if (!isGenericURI())
1255        {
1256          throw new MalformedURIException(
1257            "Query string can only be set for a generic URI!");
1258        }
1259        else if (getPath() == null)
1260        {
1261          throw new MalformedURIException(
1262            "Query string cannot be set when path is null!");
1263        }
1264        else if (!isURIString(p_queryString))
1265        {
1266          throw new MalformedURIException(
1267            "Query string contains invalid character!");
1268        }
1269        else
1270        {
1271          m_queryString = p_queryString;
1272        }
1273      }
1274    
1275      /**
1276       * Set the fragment for this URI. A non-null value is valid only
1277       * if this is a URI conforming to the generic URI syntax and
1278       * the path value is not null.
1279       *
1280       * @param p_fragment the fragment for this URI
1281       *
1282       * @throws MalformedURIException if p_fragment is not null and this
1283       *                                  URI does not conform to the generic
1284       *                                  URI syntax or if the path is null
1285       */
1286      public void setFragment(String p_fragment) throws MalformedURIException
1287      {
1288    
1289        if (p_fragment == null)
1290        {
1291          m_fragment = null;
1292        }
1293        else if (!isGenericURI())
1294        {
1295          throw new MalformedURIException(
1296            Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!");
1297        }
1298        else if (getPath() == null)
1299        {
1300          throw new MalformedURIException(
1301            Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!");
1302        }
1303        else if (!isURIString(p_fragment))
1304        {
1305          throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!");
1306        }
1307        else
1308        {
1309          m_fragment = p_fragment;
1310        }
1311      }
1312    
1313      /**
1314       * Determines if the passed-in Object is equivalent to this URI.
1315       *
1316       * @param p_test the Object to test for equality.
1317       *
1318       * @return true if p_test is a URI with all values equal to this
1319       *         URI, false otherwise
1320       */
1321      public boolean equals(Object p_test)
1322      {
1323    
1324        if (p_test instanceof URI)
1325        {
1326          URI testURI = (URI) p_test;
1327    
1328          if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals(
1329                  testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals(
1330                  testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals(
1331                  testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals(
1332                  testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals(
1333                  testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals(
1334                  testURI.m_fragment))))
1335          {
1336            return true;
1337          }
1338        }
1339    
1340        return false;
1341      }
1342    
1343      /**
1344       * Get the URI as a string specification. See RFC 2396 Section 5.2.
1345       *
1346       * @return the URI string specification
1347       */
1348      public String toString()
1349      {
1350    
1351        StringBuffer uriSpecString = new StringBuffer();
1352    
1353        if (m_scheme != null)
1354        {
1355          uriSpecString.append(m_scheme);
1356          uriSpecString.append(':');
1357        }
1358    
1359        uriSpecString.append(getSchemeSpecificPart());
1360    
1361        return uriSpecString.toString();
1362      }
1363    
1364      /**
1365       * Get the indicator as to whether this URI uses the "generic URI"
1366       * syntax.
1367       *
1368       * @return true if this URI uses the "generic URI" syntax, false
1369       *         otherwise
1370       */
1371      public boolean isGenericURI()
1372      {
1373    
1374        // presence of the host (whether valid or empty) means 
1375        // double-slashes which means generic uri
1376        return (m_host != null);
1377      }
1378    
1379      /**
1380       * Determine whether a scheme conforms to the rules for a scheme name.
1381       * A scheme is conformant if it starts with an alphanumeric, and
1382       * contains only alphanumerics, '+','-' and '.'.
1383       *
1384       *
1385       * @param p_scheme The sheme name to check
1386       * @return true if the scheme is conformant, false otherwise
1387       */
1388      public static boolean isConformantSchemeName(String p_scheme)
1389      {
1390    
1391        if (p_scheme == null || p_scheme.trim().length() == 0)
1392        {
1393          return false;
1394        }
1395    
1396        if (!isAlpha(p_scheme.charAt(0)))
1397        {
1398          return false;
1399        }
1400    
1401        char testChar;
1402    
1403        for (int i = 1; i < p_scheme.length(); i++)
1404        {
1405          testChar = p_scheme.charAt(i);
1406    
1407          if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1)
1408          {
1409            return false;
1410          }
1411        }
1412    
1413        return true;
1414      }
1415    
1416      /**
1417       * Determine whether a string is syntactically capable of representing
1418       * a valid IPv4 address or the domain name of a network host. A valid
1419       * IPv4 address consists of four decimal digit groups separated by a
1420       * '.'. A hostname consists of domain labels (each of which must
1421       * begin and end with an alphanumeric but may contain '-') separated
1422       * & by a '.'. See RFC 2396 Section 3.2.2.
1423       *
1424       *
1425       * @param p_address The address string to check
1426       * @return true if the string is a syntactically valid IPv4 address
1427       *              or hostname
1428       */
1429      public static boolean isWellFormedAddress(String p_address)
1430      {
1431    
1432        if (p_address == null)
1433        {
1434          return false;
1435        }
1436    
1437        String address = p_address.trim();
1438        int addrLength = address.length();
1439    
1440        if (addrLength == 0 || addrLength > 255)
1441        {
1442          return false;
1443        }
1444    
1445        if (address.startsWith(".") || address.startsWith("-"))
1446        {
1447          return false;
1448        }
1449    
1450        // rightmost domain label starting with digit indicates IP address
1451        // since top level domain label can only start with an alpha
1452        // see RFC 2396 Section 3.2.2
1453        int index = address.lastIndexOf('.');
1454    
1455        if (address.endsWith("."))
1456        {
1457          index = address.substring(0, index).lastIndexOf('.');
1458        }
1459    
1460        if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1)))
1461        {
1462          char testChar;
1463          int numDots = 0;
1464    
1465          // make sure that 1) we see only digits and dot separators, 2) that
1466          // any dot separator is preceded and followed by a digit and 
1467          // 3) that we find 3 dots
1468          for (int i = 0; i < addrLength; i++)
1469          {
1470            testChar = address.charAt(i);
1471    
1472            if (testChar == '.')
1473            {
1474              if (!isDigit(address.charAt(i - 1))
1475                      || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1))))
1476              {
1477                return false;
1478              }
1479    
1480              numDots++;
1481            }
1482            else if (!isDigit(testChar))
1483            {
1484              return false;
1485            }
1486          }
1487    
1488          if (numDots != 3)
1489          {
1490            return false;
1491          }
1492        }
1493        else
1494        {
1495    
1496          // domain labels can contain alphanumerics and '-"
1497          // but must start and end with an alphanumeric
1498          char testChar;
1499    
1500          for (int i = 0; i < addrLength; i++)
1501          {
1502            testChar = address.charAt(i);
1503    
1504            if (testChar == '.')
1505            {
1506              if (!isAlphanum(address.charAt(i - 1)))
1507              {
1508                return false;
1509              }
1510    
1511              if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1)))
1512              {
1513                return false;
1514              }
1515            }
1516            else if (!isAlphanum(testChar) && testChar != '-')
1517            {
1518              return false;
1519            }
1520          }
1521        }
1522    
1523        return true;
1524      }
1525    
1526      /**
1527       * Determine whether a char is a digit.
1528       *
1529       *
1530       * @param p_char the character to check
1531       * @return true if the char is betweeen '0' and '9', false otherwise
1532       */
1533      private static boolean isDigit(char p_char)
1534      {
1535        return p_char >= '0' && p_char <= '9';
1536      }
1537    
1538      /**
1539       * Determine whether a character is a hexadecimal character.
1540       *
1541       *
1542       * @param p_char the character to check
1543       * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1544       *         or 'A' and 'F', false otherwise
1545       */
1546      private static boolean isHex(char p_char)
1547      {
1548        return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f')
1549                || (p_char >= 'A' && p_char <= 'F'));
1550      }
1551    
1552      /**
1553       * Determine whether a char is an alphabetic character: a-z or A-Z
1554       *
1555       *
1556       * @param p_char the character to check
1557       * @return true if the char is alphabetic, false otherwise
1558       */
1559      private static boolean isAlpha(char p_char)
1560      {
1561        return ((p_char >= 'a' && p_char <= 'z')
1562                || (p_char >= 'A' && p_char <= 'Z'));
1563      }
1564    
1565      /**
1566       * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1567       *
1568       *
1569       * @param p_char the character to check
1570       * @return true if the char is alphanumeric, false otherwise
1571       */
1572      private static boolean isAlphanum(char p_char)
1573      {
1574        return (isAlpha(p_char) || isDigit(p_char));
1575      }
1576    
1577      /**
1578       * Determine whether a character is a reserved character:
1579       * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1580       *
1581       *
1582       * @param p_char the character to check
1583       * @return true if the string contains any reserved characters
1584       */
1585      private static boolean isReservedCharacter(char p_char)
1586      {
1587        return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1588      }
1589    
1590      /**
1591       * Determine whether a char is an unreserved character.
1592       *
1593       *
1594       * @param p_char the character to check
1595       * @return true if the char is unreserved, false otherwise
1596       */
1597      private static boolean isUnreservedCharacter(char p_char)
1598      {
1599        return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1600      }
1601    
1602      /**
1603       * Determine whether a given string contains only URI characters (also
1604       * called "uric" in RFC 2396). uric consist of all reserved
1605       * characters, unreserved characters and escaped characters.
1606       *
1607       *
1608       * @param p_uric URI string
1609       * @return true if the string is comprised of uric, false otherwise
1610       */
1611      private static boolean isURIString(String p_uric)
1612      {
1613    
1614        if (p_uric == null)
1615        {
1616          return false;
1617        }
1618    
1619        int end = p_uric.length();
1620        char testChar = '\0';
1621    
1622        for (int i = 0; i < end; i++)
1623        {
1624          testChar = p_uric.charAt(i);
1625    
1626          if (testChar == '%')
1627          {
1628            if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1))
1629                    ||!isHex(p_uric.charAt(i + 2)))
1630            {
1631              return false;
1632            }
1633            else
1634            {
1635              i += 2;
1636    
1637              continue;
1638            }
1639          }
1640    
1641          if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar))
1642          {
1643            continue;
1644          }
1645          else
1646          {
1647            return false;
1648          }
1649        }
1650    
1651        return true;
1652      }
1653    }