151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/*
22c87ad3a45cecf9e344487cad1abfdebe79f2c7cNarayan Kamath * Copyright (C) 2014 The Android Open Source Project
37f4b1b8935a58d3f44351083cf5ef19045761de3Yi Kong * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is free software; you can redistribute it and/or modify it
751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * under the terms of the GNU General Public License version 2 only, as
851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * published by the Free Software Foundation.  Oracle designates this
951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * particular file as subject to the "Classpath" exception as provided
1051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * by Oracle in the LICENSE file that accompanied this code.
1151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is distributed in the hope that it will be useful, but WITHOUT
1351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * version 2 for more details (a copy is included in the LICENSE file that
1651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * accompanied this code).
1751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * You should have received a copy of the GNU General Public License version
1951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * 2 along with this work; if not, write to the Free Software Foundation,
2051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
2251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * or visit www.oracle.com if you need additional information or have any
2451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * questions.
2551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
2651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskipackage java.net;
2851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.IOException;
3051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.InvalidObjectException;
3151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.ObjectInputStream;
3251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.ObjectOutputStream;
3351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.Serializable;
3451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.ByteBuffer;
3551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.CharBuffer;
3651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CharsetDecoder;
3751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CoderResult;
3851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CodingErrorAction;
3951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CharacterCodingException;
4051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.text.Normalizer;
4151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport sun.nio.cs.ThreadLocalCoders;
4251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.lang.Character;             // for javadoc
4451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.lang.NullPointerException;  // for javadoc
4551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/**
4851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Represents a Uniform Resource Identifier (URI) reference.
4951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
5051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> Aside from some minor deviations noted below, an instance of this
5151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * class represents a URI reference as defined by
5251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
5351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
5451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
5551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
5651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * also supports scope_ids. The syntax and usage of scope_ids is described
5751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <a href="Inet6Address.html#scoped">here</a>.
5851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This class provides constructors for creating URI instances from
5951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * their components or by parsing their string forms, methods for accessing the
6051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * various components of an instance, and methods for normalizing, resolving,
6151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * and relativizing URI instances.  Instances of this class are immutable.
6251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
643a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <h3> URI syntax and components </h3>
6551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * At the highest level a URI reference (hereinafter simply "URI") in string
6751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * form has the syntax
6851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
703a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]
7151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
7251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
7351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * where square brackets [...] delineate optional components and the characters
743a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.
7551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
7651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
7751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * said to be <i>relative</i>.  URIs are also classified according to whether
7851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * they are <i>opaque</i> or <i>hierarchical</i>.
7951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
813a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * not begin with a slash character ({@code '/'}).  Opaque URIs are not
8251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * subject to further parsing.  Some examples of opaque URIs are:
8351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
853a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>{@code mailto:java-net@java.sun.com}<td></tr>
863a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>{@code news:comp.lang.java}<td></tr>
873a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>{@code urn:isbn:096139210x}</td></tr>
8851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
8951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
9051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> A <i>hierarchical</i> URI is either an absolute URI whose
9151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme-specific part begins with a slash character, or a relative URI, that
9251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * is, a URI that does not specify a scheme.  Some examples of hierarchical
9351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs are:
9451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
9551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
963a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com/j2se/1.3/}<br>
973a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code docs/guide/collections/designfaq.html#28}<br>
983a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code ../../../demo/jfc/SwingSet2/src/SwingSet2.java}<br>
993a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code file:///~/calendar}
10051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
10151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> A hierarchical URI is subject to further parsing according to the syntax
10351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1053a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]
10651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
10751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1083a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * where the characters <b>{@code :}</b>, <b>{@code /}</b>,
1093a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves.  The
11051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme-specific part of a hierarchical URI consists of the characters
11151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * between the scheme and fragment components.
11251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
11351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The authority component of a hierarchical URI is, if specified, either
11451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
11551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * parses according to the familiar syntax
11651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
11751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1183a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]
11951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
12051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1213a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for
12251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * themselves.  Nearly all URI schemes currently in use are server-based.  An
12351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority component that does not parse in this way is considered to be
12451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * registry-based.
12551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
12651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The path component of a hierarchical URI is itself said to be absolute
1273a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * if it begins with a slash character ({@code '/'}); otherwise it is
12851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * relative.  The path of a hierarchical URI that is either absolute or
12951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * specifies an authority is always absolute.
13051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
13151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> All told, then, a URI instance has the following nine components:
13251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
13351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
13451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
1353a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>scheme</td><td>{@code String}</td></tr>
1363a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>scheme-specific-part&nbsp;&nbsp;&nbsp;&nbsp;</td><td>{@code String}</td></tr>
1373a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>authority</td><td>{@code String}</td></tr>
1383a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>user-info</td><td>{@code String}</td></tr>
1393a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>host</td><td>{@code String}</td></tr>
1403a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>port</td><td>{@code int}</td></tr>
1413a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>path</td><td>{@code String}</td></tr>
1423a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>query</td><td>{@code String}</td></tr>
1433a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <tr><td>fragment</td><td>{@code String}</td></tr>
14451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
14551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
14651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * In a given instance any particular component is either <i>undefined</i> or
14751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>defined</i> with a distinct value.  Undefined string components are
1483a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * represented by {@code null}, while undefined integer components are
1493a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * represented by {@code -1}.  A string component may be defined to have the
15051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * empty string as its value; this is not equivalent to that component being
15151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * undefined.
15251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
15351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> Whether a particular component is or is not defined in an instance
15451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * depends upon the type of the URI being represented.  An absolute URI has a
15551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
15651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * possibly a fragment, but has no other components.  A hierarchical URI always
15751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * has a path (though it may be empty) and a scheme-specific-part (which at
15851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * least contains the path), and may have any of the other components.  If the
15951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority component is present and is server-based then the host component
16051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * will be defined and the user-information and port components may be defined.
16151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Operations on URI instances </h4>
16451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The key operations supported by this class are those of
16651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
16751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1683a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}
1693a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * and {@code ".."} segments from the path component of a hierarchical URI.
1703a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * Each {@code "."} segment is simply removed.  A {@code ".."} segment is
1713a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * removed only if it is preceded by a non-{@code ".."} segment.
17251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Normalization has no effect upon opaque URIs.
17351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
17451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> <i>Resolution</i> is the process of resolving one URI against another,
17551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>base</i> URI.  The resulting URI is constructed from components of both
17651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs in the manner specified by RFC&nbsp;2396, taking components from the
17751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * base URI for those not specified in the original.  For hierarchical URIs,
17851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the path of the original is resolved against the path of the base and then
17951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * normalized.  The result, for example, of resolving
18051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
18151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1823a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code docs/guide/collections/designfaq.html#28}
1833a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
1843a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * &nbsp;&nbsp;&nbsp;&nbsp;(1)
18551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
18651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1873a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * against the base URI {@code http://java.sun.com/j2se/1.3/} is the result
18851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URI
18951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1913a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28}
19251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
19351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resolving the relative URI
19551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
1973a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code ../../../demo/jfc/SwingSet2/src/SwingSet2.java&nbsp;&nbsp;&nbsp;&nbsp;}(2)
19851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
19951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
20051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against this result yields, in turn,
20151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
20251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
2033a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java}
20451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
20551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
20651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resolution of both absolute and relative URIs, and of both absolute and
20751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * relative paths in the case of hierarchical URIs, is supported.  Resolving
2083a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * the URI {@code file:///~calendar} against any other URI simply yields the
20951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * original URI, since it is absolute.  Resolving the relative URI (2) above
21051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against the relative base URI (1) yields the normalized, but still relative,
21151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URI
21251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
21351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
2143a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code demo/jfc/SwingSet2/src/SwingSet2.java}
21551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
21651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
21751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
21851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,
21951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
2213a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;and<br>
2223a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;.<br>
22351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
22451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This operation is often useful when constructing a document containing URIs
22651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * that must be made relative to the base URI of the document wherever
22751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * possible.  For example, relativizing the URI
22851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
2303a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com/j2se/1.3/docs/guide/index.html}
23151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
23251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against the base URI
23451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
2363a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com/j2se/1.3}
23751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
23851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
2393a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * yields the relative URI {@code docs/guide/index.html}.
24051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Character categories </h4>
24351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * RFC&nbsp;2396 specifies precisely which characters are permitted in the
24551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * various components of a URI reference.  The following categories, most of
24651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * which are taken from that specification, are used below to describe these
24751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * constraints:
24851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
25051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>alpha</i></th>
25151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The US-ASCII alphabetic characters,
2523a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *        {@code 'A'}&nbsp;through&nbsp;{@code 'Z'}
2533a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *        and {@code 'a'}&nbsp;through&nbsp;{@code 'z'}</td></tr>
25451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>digit</i></th>
25551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The US-ASCII decimal digit characters,
2563a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *       {@code '0'}&nbsp;through&nbsp;{@code '9'}</td></tr>
25751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>alphanum</i></th>
25851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
25951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>unreserved</i>&nbsp;&nbsp;&nbsp;&nbsp;</th>
26051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>alphanum</i> characters together with those in the string
2613a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *        {@code "_-!.~'()*"}</td></tr>
26251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>punct</i></th>
2633a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *       <td>The characters in the string {@code ",;:$&+="}</td></tr>
26451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>reserved</i></th>
26551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>punct</i> characters together with those in the string
2663a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *        {@code "?/[]@"}</td></tr>
26751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>escaped</i></th>
26851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>Escaped octets, that is, triplets consisting of the percent
2693a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *           character ({@code '%'}) followed by two hexadecimal digits
2703a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *           ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and
2713a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *           {@code 'a'}-{@code 'f'})</td></tr>
27251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>other</i></th>
27351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The Unicode characters that are not in the US-ASCII character set,
27451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           are not control characters (according to the {@link
27551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           java.lang.Character#isISOControl(char) Character.isISOControl}
27651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           method), and are not space characters (according to the {@link
27751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
27851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
27951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           limited to US-ASCII)</i></td></tr>
28051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
28151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="legal-chars"></a> The set of all legal URI characters consists of
28351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
28451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * characters.
28551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Escaped octets, quotation, encoding, and decoding </h4>
28851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
29051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * fragment components.  Escaping serves two purposes in URIs:
29151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
29351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
29551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
29651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters.  </p></li>
29751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
29951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   component.  The user-info, path, query, and fragment components differ
30051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   slightly in terms of which characters are considered legal and illegal.
30151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   </p></li>
30251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
30451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * These purposes are served in this class by three related operations:
30651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
30851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
31051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   with the sequence of escaped octets that represent that character in the
3113a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   UTF-8 character set.  The Euro currency symbol ({@code '&#92;u20AC'}),
3123a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   for example, is encoded as {@code "%E2%82%AC"}.  <i>(<b>Deviation from
31351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   RFC&nbsp;2396</b>, which does not specify any particular character
31451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   set.)</i> </p></li>
31551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
31651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
31751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   encoding it.  The space character, for example, is quoted by replacing it
3183a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   with {@code "%20"}.  UTF-8 contains US-ASCII, hence for US-ASCII
31951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters this transformation has exactly the effect required by
32051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   RFC&nbsp;2396. </p></li>
32151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
32251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="decode"></a>
32351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   A sequence of escaped octets is <i>decoded</i> by
32451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   replacing it with the sequence of characters that it represents in the
32551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
32651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   effect of de-quoting any quoted US-ASCII characters as well as that of
32751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   decoding any encoded non-US-ASCII characters.  If a <a
32851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
32951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   when decoding the escaped octets then the erroneous octets are replaced by
3303a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   {@code '&#92;uFFFD'}, the Unicode replacement character.  </p></li>
33151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
33351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * These operations are exposed in the constructors and methods of this class
33551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * as follows:
33651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
33851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
3393a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   <li><p> The {@linkplain #URI(java.lang.String) single-argument
3403a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   constructor} requires any illegal characters in its argument to be
34151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   quoted and preserves any escaped octets and <i>other</i> characters that
34251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   are present.  </p></li>
34351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
3443a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   <li><p> The {@linkplain
34551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
3463a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   multi-argument constructors} quote illegal characters as
34751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   required by the components in which they appear.  The percent character
3483a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *   ({@code '%'}) is always quoted by these constructors.  Any <i>other</i>
34951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters are preserved.  </p></li>
35051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
35151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
35251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
35351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
35451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
35551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   values of their corresponding components in raw form, without interpreting
35651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   any escaped octets.  The strings returned by these methods may contain
35751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   both escaped octets and <i>other</i> characters, and will not contain any
35851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   illegal characters.  </p></li>
35951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
36051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
36151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
36251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
36351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
36451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   octets in their corresponding components.  The strings returned by these
36551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   methods may contain both <i>other</i> characters and illegal characters,
36651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   and will not contain any escaped octets.  </p></li>
36751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
36851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #toString() toString} method returns a URI string with
36951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   all necessary quotation but which may contain <i>other</i> characters.
37051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   </p></li>
37151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
37351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   quoted and encoded URI string that does not contain any <i>other</i>
37451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters.  </p></li>
37551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
37751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Identities </h4>
38051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
38151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * For any URI <i>u</i>, it is always the case that
38251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
38351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
3843a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )}&nbsp;.
38551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
38651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
38751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * For any URI <i>u</i> that does not contain redundant syntax such as two
3883a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * slashes before an empty authority (as in {@code file:///tmp/}&nbsp;) or a
38951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * colon following a host name but no port (as in
3903a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code http://java.sun.com:}&nbsp;), and that does not encode characters
39151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * except those that must be quoted, the following identities also hold:
3923a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <pre>
3933a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     new URI(<i>u</i>.getScheme(),
3943a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getSchemeSpecificPart(),
3953a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getFragment())
3963a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     .equals(<i>u</i>)</pre>
39751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * in all cases,
3983a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <pre>
3993a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     new URI(<i>u</i>.getScheme(),
4003a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getUserInfo(), <i>u</i>.getAuthority(),
4013a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
4023a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getFragment())
4033a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     .equals(<i>u</i>)</pre>
40451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * if <i>u</i> is hierarchical, and
4053a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * <pre>
4063a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     new URI(<i>u</i>.getScheme(),
4073a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),
4083a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
4093a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *             <i>u</i>.getFragment())
4103a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong *     .equals(<i>u</i>)</pre>
41151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * if <i>u</i> is hierarchical and has either no authority or a server-based
41251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority.
41351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
41451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
41551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> URIs, URLs, and URNs </h4>
41651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
41751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
41851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
41951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * not every URI is a URL.  This is because there is another subcategory of
42051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
4213a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * specify how to locate them.  The {@code mailto}, {@code news}, and
4223a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong * {@code isbn} URIs shown above are examples of URNs.
42351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
42451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The conceptual distinction between URIs and URLs is reflected in the
42551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * differences between this class and the {@link URL} class.
42651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
42751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An instance of this class represents a URI reference in the syntactic
42851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
42951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * A URI string is parsed according to the generic syntax without regard to the
43051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme, if any, that it specifies.  No lookup of the host, if any, is
43151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * performed, and no scheme-dependent stream handler is constructed.  Equality,
43251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * hashing, and comparison are defined strictly in terms of the character
43351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * content of the instance.  In other words, a URI instance is little more than
43451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * a structured string that supports the syntactic, scheme-independent
43551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * operations of comparison, normalization, resolution, and relativization.
43651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
43751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An instance of the {@link URL} class, by contrast, represents the
43851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * syntactic components of a URL together with some of the information required
43951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * to access the resource that it describes.  A URL must be absolute, that is,
44051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * it must always specify a scheme.  A URL string is parsed according to its
44151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme.  A stream handler is always established for a URL, and in fact it is
44251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * impossible to create a URL instance for a scheme for which no handler is
44351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * available.  Equality and hashing depend upon both the scheme and the
44451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Internet address of the host, if any; comparison is not defined.  In other
44551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * words, a URL is a structured string that supports the syntactic operation of
44651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * resolution as well as the network I/O operations of looking up the host and
44751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * opening a connection to the specified resource.
44851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
44951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
45051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * @author Mark Reinhold
45151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * @since 1.4
45251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
4539e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2279.txt">RFC&nbsp;2279: UTF-8, a transformation format of ISO 10646</a>
4549e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373: IPv6 Addressing Architecture</a>
4559e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396: Uniform Resource Identifiers (URI): Generic Syntax</a>
4569e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732: Format for Literal IPv6 Addresses in URLs</a>
45751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
4586975f84c2ed72e1e26d20190b6f318718c849008Tobias Thierer// Android-changed: Reformat @see links.
45951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskipublic final class URI
46051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    implements Comparable<URI>, Serializable
46151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski{
4623a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong
46351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Note: Comments containing the word "ASSERT" indicate places where a
46451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // throw of an InternalError should be replaced by an appropriate assertion
46551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // statement once asserts are enabled in the build.
46651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
46751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static final long serialVersionUID = -6052424284110960213L;
46851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
46951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Properties and components of this instance --
47151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
47351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String scheme;            // null ==> relative URI
47451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String fragment;
47551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Hierarchical URI components: [//<authority>]<path>[?<query>]
47751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String authority;         // Registry or server
47851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Server-based authority: [<userInfo>@]<host>[:<port>]
48051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String userInfo;
48151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String host;              // null ==> registry-based
48251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient int port = -1;            // -1 ==> undefined
48351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
48451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Remaining components of hierarchical URIs
48551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String path;              // null ==> opaque
48651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String query;
48751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
48851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The remaining fields may be computed on demand
48951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String schemeSpecificPart;
49151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient int hash;        // Zero ==> undefined
49251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedUserInfo = null;
49451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedAuthority = null;
49551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedPath = null;
49651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedQuery = null;
49751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedFragment = null;
49851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedSchemeSpecificPart = null;
49951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
50051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
50151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The string form of this URI.
50251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
50351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @serial
50451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
50551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile String string;             // The only serializable field
50651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
50751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
50851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
50951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Constructors and factories --
51051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private URI() { }                           // Used internally
51251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
51451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URI by parsing the given string.
51551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
51651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor parses the given string exactly as specified by the
51751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * grammar in <a
51851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
51951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
52051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
5213a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <ul>
52251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
52351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> An empty authority component is permitted as long as it is
52451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   followed by a non-empty path, a query component, or a fragment
52551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   component.  This allows the parsing of URIs such as
5263a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   {@code "file:///foo/bar"}, which seems to be the intent of
52751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   RFC&nbsp;2396 although the grammar does not permit it.  If the
52851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority component is empty then the user-information, host, and port
52951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   components are undefined. </p></li>
53051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
53151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Empty relative paths are permitted; this seems to be the
53251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
53351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   primary consequence of this deviation is that a standalone fragment
5343a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   such as {@code "#foo"} parses as a relative URI with an empty path
53551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   and the given fragment, and can be usefully <a
53651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="#resolve-frag">resolved</a> against a base URI.
53751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
53851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> IPv4 addresses in host components are parsed rigorously, as
53951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   specified by <a
54051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
54151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   element of a dotted-quad address must contain no more than three
54251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   decimal digits.  Each element is further constrained to have a value
54351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   no greater than 255. </p></li>
54451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
54551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li> <p> Hostnames in host components that comprise only a single
54651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   domain label are permitted to start with an <i>alphanum</i>
54751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character. This seems to be the intent of <a
54851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
54951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   section&nbsp;3.2.2 although the grammar does not permit it. The
55051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   consequence of this deviation is that the authority component of a
5513a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   hierarchical URI such as {@code s://123}, will parse as a server-based
55251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority. </p></li>
55351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
55451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
5553a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   address must be enclosed in square brackets ({@code '['} and
5563a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   {@code ']'}) as specified by <a
55751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
55851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   IPv6 address itself must parse according to <a
55951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
56051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   addresses are further constrained to describe no more than sixteen
56151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   bytes of address information, a constraint implicit in RFC&nbsp;2373
56251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   but not expressible in the grammar. </p></li>
56351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
56451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Characters in the <i>other</i> category are permitted wherever
56551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
56651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   user-information, path, query, and fragment components, as well as in
56751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the authority component if the authority is registry-based.  This
56851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   allows URIs to contain Unicode characters beyond those in the US-ASCII
56951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character set. </p></li>
57051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
57251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
57451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
5763a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          If {@code str} is {@code null}
57751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
57951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396, as augmented
58051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          by the above deviations
58151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
58251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String str) throws URISyntaxException {
58351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(str).parse(false);
58451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
58551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
58651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
58751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
58851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
58951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If a scheme is given then the path, if also given, must either be
5903a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * empty or begin with a slash character ({@code '/'}).  Otherwise a
5913a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * component of the new URI may be left undefined by passing {@code null}
5923a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * for the corresponding parameter or, in the case of the {@code port}
5933a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * parameter, by passing {@code -1}.
59451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
59551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI string from the given components
59651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * according to the rules specified in <a
59751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
59851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7: </p>
59951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
60151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty. </p></li>
60351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
6053a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   followed by a colon character ({@code ':'}).  </p></li>
60651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If user information, a host, or a port are given then the
6083a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   string {@code "//"} is appended.  </p></li>
60951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
61051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If user information is given then it is appended, followed by
6113a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   a commercial-at character ({@code '@'}).  Any character not in the
61251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
61351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories is <a href="#quote">quoted</a>.  </p></li>
61451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
61551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a host is given then it is appended.  If the host is a
61651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   literal IPv6 address but is not enclosed in square brackets
6173a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '['} and {@code ']'}) then the square brackets are added.
61851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
61951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
62051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a port number is given then a colon character
6213a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code ':'}) is appended, followed by the port number in decimal.
62251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
62351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
62451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a path is given then it is appended.  Any character not in
62551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
6263a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   categories, and not equal to the slash character ({@code '/'}) or the
6273a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   commercial-at character ({@code '@'}), is quoted.  </p></li>
62851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
62951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a query is given then a question-mark character
6303a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '?'}) is appended, followed by the query.  Any character that
63151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
63251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
63351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
63451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
6353a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '#'}) is appended, followed by the fragment.  Any character
63651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   that is not a legal URI character is quoted.  </p></li>
63751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
63851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
63951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
64051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed as if by invoking the {@link
64151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor and then invoking the {@link
64251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #parseServerAuthority()} method upon the result; this may cause a {@link
64351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URISyntaxException} to be thrown.  </p>
64451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
64551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
64651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   userInfo  User name and authorization information
64751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   host      Host name
64851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   port      Port number
64951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path      Path
65051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   query     Query
65151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
65251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
65351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws URISyntaxException
65451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         If both a scheme and a path are given but the path is relative,
65551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         if the URI string constructed from the given components violates
65651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         RFC&nbsp;2396, or if the authority component of the string is
65751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         present but cannot be parsed as a server-based authority
65851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
65951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme,
66051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String userInfo, String host, int port,
66151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String path, String query, String fragment)
66251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
66351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
66451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = toString(scheme, null,
66551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, userInfo, host, port,
66651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            path, query, fragment);
66751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        checkPath(s, scheme, path);
66851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(s).parse(true);
66951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
67051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
67151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
67251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
67351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
67451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If a scheme is given then the path, if also given, must either be
6753a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * empty or begin with a slash character ({@code '/'}).  Otherwise a
6763a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * component of the new URI may be left undefined by passing {@code null}
67751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the corresponding parameter.
67851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
67951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI string from the given components
68051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * according to the rules specified in <a
68151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
68251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7: </p>
68351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
68451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
68551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
68651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty.  </p></li>
68751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
68851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
6893a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   followed by a colon character ({@code ':'}).  </p></li>
69051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
6913a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <li><p> If an authority is given then the string {@code "//"} is
69251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   appended, followed by the authority.  If the authority contains a
69351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   literal IPv6 address then the address must be enclosed in square
6943a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   brackets ({@code '['} and {@code ']'}).  Any character not in the
69551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
69651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories, and not equal to the commercial-at character
6973a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '@'}), is <a href="#quote">quoted</a>.  </p></li>
69851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
69951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a path is given then it is appended.  Any character not in
70051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
7013a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   categories, and not equal to the slash character ({@code '/'}) or the
7023a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   commercial-at character ({@code '@'}), is quoted.  </p></li>
70351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
70451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a query is given then a question-mark character
7053a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '?'}) is appended, followed by the query.  Any character that
70651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
70751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
70851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
70951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
7103a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '#'}) is appended, followed by the fragment.  Any character
71151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   that is not a legal URI character is quoted.  </p></li>
71251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
71351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
71451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
71551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed as if by invoking the {@link
71651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor and then invoking the {@link
71751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #parseServerAuthority()} method upon the result; this may cause a {@link
71851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URISyntaxException} to be thrown.  </p>
71951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
72051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme     Scheme name
72151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   authority  Authority
72251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path       Path
72351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   query      Query
72451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment   Fragment
72551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
72651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws URISyntaxException
72751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         If both a scheme and a path are given but the path is relative,
72851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         if the URI string constructed from the given components violates
72951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         RFC&nbsp;2396, or if the authority component of the string is
73051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         present but cannot be parsed as a server-based authority
73151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
73251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme,
73351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String authority,
73451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String path, String query, String fragment)
73551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
73651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
73751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = toString(scheme, null,
73851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            authority, null, null, -1,
73951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            path, query, fragment);
74051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        checkPath(s, scheme, path);
74151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(s).parse(false);
74251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
74351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
74451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
74551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
74651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
7473a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <p> A component may be left undefined by passing {@code null}.
74851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
74951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience constructor works as if by invoking the
75051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * seven-argument constructor as follows:
75151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
7523a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <blockquote>
7533a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * {@code new} {@link #URI(String, String, String, int, String, String, String)
7543a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * URI}{@code (scheme, null, host, -1, path, null, fragment);}
7553a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * </blockquote>
75651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
75751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
75851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   host      Host name
75951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path      Path
76051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
76151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
76251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
76351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the URI string constructed from the given components
76451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          violates RFC&nbsp;2396
76551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
76651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme, String host, String path, String fragment)
76751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
76851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
76951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        this(scheme, null, host, -1, path, null, fragment);
77051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
77151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
77251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
77351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URI from the given components.
77451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
7753a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <p> A component may be left undefined by passing {@code null}.
77651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
77751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI in string form using the given
77851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components as follows:  </p>
77951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
78151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty.  </p></li>
78351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
7853a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   followed by a colon character ({@code ':'}).  </p></li>
78651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme-specific part is given then it is appended.  Any
78851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character that is not a <a href="#legal-chars">legal URI character</a>
78951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is <a href="#quote">quoted</a>.  </p></li>
79051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
7923a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code '#'}) is appended to the string, followed by the fragment.
79351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   Any character that is not a legal URI character is quoted.  </p></li>
79451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
79651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed in order to create the new
79851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI instance as if by invoking the {@link #URI(String)} constructor;
79951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this may cause a {@link URISyntaxException} to be thrown.  </p>
80051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
80151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
80251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   ssp       Scheme-specific part
80351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
80451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
80551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
80651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the URI string constructed from the given components
80751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          violates RFC&nbsp;2396
80851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
80951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme, String ssp, String fragment)
81051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
81151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
81251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(toString(scheme, ssp,
81351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, null, null, -1,
81451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, null, fragment))
81551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .parse(false);
81651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
81751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
81851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
81951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Creates a URI by parsing the given string.
82051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
82151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience factory method works as if by invoking the {@link
82251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
82351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * constructor is caught and wrapped in a new {@link
82451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * IllegalArgumentException} object, which is then thrown.
82551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
82651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method is provided for use in situations where it is known that
82751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the given string is a legal URI, for example for URI constants declared
82851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * within in a program, and so it would be considered a programming error
82951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the string not to parse as such.  The constructors, which throw
83051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link URISyntaxException} directly, should be used situations where a
83151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI is being constructed from user input or from some other source that
83251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * may be prone to errors.  </p>
83351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
83451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
83551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The new URI
83651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
83751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
8383a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          If {@code str} is {@code null}
83951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
84051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
84151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396
84251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
84351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public static URI create(String str) {
84451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
84551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return new URI(str);
84651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (URISyntaxException x) {
84751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new IllegalArgumentException(x.getMessage(), x);
84851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
84951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
85051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
85151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
85251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Operations --
85351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
85451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
85551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Attempts to parse this URI's authority component, if defined, into
85651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.
85751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
85851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI's authority component has already been recognized as
85951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * being server-based then it will already have been parsed into
86051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.  In this case, or if this
86151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI has no authority component, this method simply returns this URI.
86251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
86351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> Otherwise this method attempts once more to parse the authority
86451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component into user-information, host, and port components, and throws
86551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * an exception describing why the authority component could not be parsed
86651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * in that way.
86751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
86851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method is provided because the generic URI syntax specified in
86951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
87051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * cannot always distinguish a malformed server-based authority from a
87151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * legitimate registry-based authority.  It must therefore treat some
87251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * instances of the former as instances of the latter.  The authority
8733a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * component in the URI string {@code "//foo:bar"}, for example, is not a
87451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * legal server-based authority but it is legal as a registry-based
87551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * authority.
87651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
87751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> In many common situations, for example when working URIs that are
87851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * known to be either URNs or URLs, the hierarchical URIs being used will
87951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * always be server-based.  They therefore must either be parsed as such or
88051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * treated as an error.  In these cases a statement such as
88151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
88251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <blockquote>
8833a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * {@code URI }<i>u</i>{@code  = new URI(str).parseServerAuthority();}
88451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </blockquote>
88551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
88651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
88751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * it has an authority component, has a server-based authority with proper
88851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.  Invoking this method also
88951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * ensures that if the authority could not be parsed in that way then an
89051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * appropriate diagnostic message can be issued based upon the exception
89151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * that is thrown. </p>
89251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
89351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URI whose authority field has been parsed
89451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          as a server-based authority
89551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
89651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
89751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the authority component of this URI is defined
89851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          but cannot be parsed as a server-based authority
89951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          according to RFC&nbsp;2396
90051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
90151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI parseServerAuthority()
90251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
90351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
90451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We could be clever and cache the error message and index from the
90551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // exception thrown during the original parse, but that would require
90651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // either more fields or a more-obscure representation.
90751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((host != null) || (authority == null))
90851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return this;
90951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
91051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(string).parse(true);
91151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return this;
91251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
91351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
91451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
91551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Normalizes this URI's path.
91651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
91751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI is opaque, or if its path is already in normal form,
91851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * then this URI is returned.  Otherwise a new URI is constructed that is
91951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * identical to this URI except that its path is computed by normalizing
92051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this URI's path in a manner consistent with <a
92151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
92251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
92351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </p>
92451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
92551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
92651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
9273a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <li><p> All {@code "."} segments are removed. </p></li>
92851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
9293a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}
93051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   segment then both of these segments are removed.  This step is
93151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   repeated until it is no longer applicable. </p></li>
93251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
93351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If the path is relative, and if its first segment contains a
9343a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   colon character ({@code ':'}), then a {@code "."} segment is
93551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   prepended.  This prevents a relative URI with a path such as
9363a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a
9373a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.
93851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
93951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
94051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
94151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
9423a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <p> A normalized path will begin with one or more {@code ".."} segments
9433a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * if there were insufficient non-{@code ".."} segments preceding them to
9443a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * allow their removal.  A normalized path will begin with a {@code "."}
94551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * segment if one was inserted by step 3 above.  Otherwise, a normalized
9463a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * path will not contain any {@code "."} or {@code ".."} segments. </p>
94751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
94851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URI equivalent to this URI,
94951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          but whose path is in normal form
95051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
95151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI normalize() {
95251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return normalize(this);
95351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
95451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
95551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
95651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Resolves the given URI against this URI.
95751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
95851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If the given URI is already absolute, or if this URI is opaque, then
95951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the given URI is returned.
96051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
96151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p><a name="resolve-frag"></a> If the given URI's fragment component is
96251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * defined, its path component is empty, and its scheme, authority, and
96351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * query components are undefined, then a URI with the given fragment but
96451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * with all other components equal to those of this URI is returned.  This
96551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * allows a URI representing a standalone fragment reference, such as
9663a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * {@code "#foo"}, to be usefully resolved against a base URI.
96751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
96851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> Otherwise this method constructs a new hierarchical URI in a manner
96951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * consistent with <a
97051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
97151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2; that is: </p>
97251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
97351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
97451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
97551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A new URI is constructed with this URI's scheme and the given
97651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   URI's query and fragment components. </p></li>
97751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
97851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If the given URI has an authority component then the new URI's
97951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority and path are taken from the given URI. </p></li>
98051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Otherwise the new URI's authority component is copied from
98251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   this URI, and its path is computed as follows: </p>
98351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
9843a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <ol>
98551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If the given URI's path is absolute then the new URI's path
98751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     is taken from the given URI. </p></li>
98851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> Otherwise the given URI's path is relative, and so the new
99051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     URI's path is computed by resolving the path of the given URI
99151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     against the path of this URI.  This is done by concatenating all but
99251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     the last segment of this URI's path, if any, with the given URI's
99351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     path and then normalizing the result as if by invoking the {@link
99451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     #normalize() normalize} method. </p></li>
99551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
99651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </ol></li>
99751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
99851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
99951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The result of this method is absolute if, and only if, either this
100151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI is absolute or the given URI is absolute.  </p>
100251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  uri  The URI to be resolved against this URI
100451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
100551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
10073a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          If {@code uri} is {@code null}
100851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
100951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI resolve(URI uri) {
101051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return resolve(this, uri);
101151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
101251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
101351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
101451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a new URI by parsing the given string and then resolving it
101551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * against this URI.
101651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
101751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience method works as if invoking it were equivalent to
10183a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * evaluating the expression {@link #resolve(java.net.URI)
10193a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>
102051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
102151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
102251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
102351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
102451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
10253a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          If {@code str} is {@code null}
102651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
102751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
102851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396
102951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
103051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI resolve(String str) {
103151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return resolve(URI.create(str));
103251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
103351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
103451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
103551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Relativizes the given URI against this URI.
103651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
103751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The relativization of the given URI against this URI is computed as
103851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * follows: </p>
103951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
104151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If either this URI or the given URI are opaque, or if the
104351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   scheme and authority components of the two URIs are not identical, or
104451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   if the path of this URI is not a prefix of the path of the given URI,
104551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   then the given URI is returned. </p></li>
104651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Otherwise a new relative hierarchical URI is constructed with
104851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   query and fragment components taken from the given URI and with a path
104951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   component computed by removing this URI's path from the beginning of
105051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the given URI's path. </p></li>
105151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
105251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
105351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
105451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  uri  The URI to be relativized against this URI
105551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
105651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
105751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
10583a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          If {@code uri} is {@code null}
105951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
106051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI relativize(URI uri) {
106151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return relativize(this, uri);
106251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
106351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
106451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
106551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URL from this URI.
106651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
106751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience method works as if invoking it were equivalent to
10683a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * evaluating the expression {@code new&nbsp;URL(this.toString())} after
106951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * first checking that this URI is absolute. </p>
107051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URL constructed from this URI
107251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
107451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If this URL is not absolute
107551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  MalformedURLException
107751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If a protocol handler for the URL could not be found,
107851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or if some other error occurred while constructing the URL
107951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
108051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URL toURL()
108151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws MalformedURLException {
108251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!isAbsolute())
108351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new IllegalArgumentException("URI is not absolute");
108451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return new URL(toString());
108551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
108651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
108751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Component access methods --
108851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
108951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
109051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the scheme component of this URI.
109151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
109251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The scheme component of a URI, if defined, only contains characters
10933a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * in the <i>alphanum</i> category and in the string {@code "-.+"}.  A
109451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * scheme always starts with an <i>alpha</i> character. <p>
109551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
109651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The scheme component of a URI cannot contain escaped octets, hence this
109751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method does not perform any decoding.
109851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
109951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The scheme component of this URI,
11003a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the scheme is undefined
110151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
110251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getScheme() {
110351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return scheme;
110451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
110551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
110651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
110751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tells whether or not this URI is absolute.
110851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
110951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A URI is absolute if, and only if, it has a scheme component. </p>
111051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
11113a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * @return  {@code true} if, and only if, this URI is absolute
111251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
111351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean isAbsolute() {
111451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return scheme != null;
111551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
111651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
111751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
111851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tells whether or not this URI is opaque.
111951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
112051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A URI is opaque if, and only if, it is absolute and its
112151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * scheme-specific part does not begin with a slash character ('/').
112251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * An opaque URI has a scheme, a scheme-specific part, and possibly
112351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * a fragment; all other components are undefined. </p>
112451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
11253a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * @return  {@code true} if, and only if, this URI is opaque
112651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
112751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean isOpaque() {
112851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return path == null;
112951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
113051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
113151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
113251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw scheme-specific part of this URI.  The scheme-specific
113351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * part is never undefined, though it may be empty.
113451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
113551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The scheme-specific part of a URI only contains legal URI
113651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
113751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
113851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw scheme-specific part of this URI
11393a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          (never {@code null})
114051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
114151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawSchemeSpecificPart() {
114251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineSchemeSpecificPart();
114351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return schemeSpecificPart;
114451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
114551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
114651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
114751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded scheme-specific part of this URI.
114851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
114951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
115051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
115151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * except that all sequences of escaped octets are <a
115251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="#decode">decoded</a>.  </p>
115351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
115451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded scheme-specific part of this URI
11553a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          (never {@code null})
115651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
115751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getSchemeSpecificPart() {
115851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (decodedSchemeSpecificPart == null)
115951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
116051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedSchemeSpecificPart;
116151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
116251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
116351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
116451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw authority component of this URI.
116551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
116651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The authority component of a URI, if defined, only contains the
11673a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * commercial-at character ({@code '@'}) and characters in the
116851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
116951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * categories.  If the authority is server-based then it is further
117051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * constrained to have valid user-information, host, and port
117151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components. </p>
117251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
117351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw authority component of this URI,
11743a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the authority is undefined
117551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
117651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawAuthority() {
117751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return authority;
117851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
117951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
118051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
118151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded authority component of this URI.
118251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
118351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
118451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawAuthority() getRawAuthority} method except that all
118551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
118651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
118751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded authority component of this URI,
11883a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the authority is undefined
118951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
119051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getAuthority() {
119151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (decodedAuthority == null)
119251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedAuthority = decode(authority);
119351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedAuthority;
119451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
119551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
119651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
119751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw user-information component of this URI.
119851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
119951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The user-information component of a URI, if defined, only contains
120051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
120151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <i>other</i> categories. </p>
120251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
120351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw user-information component of this URI,
12043a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the user information is undefined
120551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
120651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawUserInfo() {
120751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return userInfo;
120851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
120951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
121051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
121151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded user-information component of this URI.
121251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
121351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
121451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawUserInfo() getRawUserInfo} method except that all
121551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
121651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
121751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded user-information component of this URI,
12183a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the user information is undefined
121951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
122051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getUserInfo() {
122151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedUserInfo == null) && (userInfo != null))
122251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedUserInfo = decode(userInfo);
122351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedUserInfo;
122451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
122551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
122651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
122751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the host component of this URI.
122851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
122951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The host component of a URI, if defined, will have one of the
123051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * following forms: </p>
123151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
12323a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <ul>
123351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
123451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A domain name consisting of one or more <i>labels</i>
12353a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   separated by period characters ({@code '.'}), optionally followed by
123651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   a period character.  Each label consists of <i>alphanum</i> characters
12373a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   as well as hyphen characters ({@code '-'}), though hyphens never
123851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   occur as the first or last characters in a label. The rightmost
123951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   label of a domain name consisting of two or more labels, begins
124051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   with an <i>alpha</i> character. </li>
124151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
124251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A dotted-quad IPv4 address of the form
12433a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},
124451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   where no <i>digit</i> sequence is longer than three characters and no
124551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   sequence has a value larger than 255. </p></li>
124651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
12473a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   <li><p> An IPv6 address enclosed in square brackets ({@code '['} and
12483a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   {@code ']'}) and consisting of hexadecimal digits, colon characters
12493a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *   ({@code ':'}), and possibly an embedded IPv4 address.  The full
125051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   syntax of IPv6 addresses is specified in <a
125151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
125251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   Addressing Architecture</i></a>.  </p></li>
125351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
125451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
125551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
125651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The host component of a URI cannot contain escaped octets, hence this
125751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method does not perform any decoding.
125851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
125951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The host component of this URI,
12603a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the host is undefined
126151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
126251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getHost() {
126351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return host;
126451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
126551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
126651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
126751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the port number of this URI.
126851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
126951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The port component of a URI, if defined, is a non-negative
127051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * integer. </p>
127151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
127251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The port component of this URI,
12733a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code -1} if the port is undefined
127451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
127551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int getPort() {
127651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return port;
127751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
127851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
127951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
128051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw path component of this URI.
128151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
128251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The path component of a URI, if defined, only contains the slash
12833a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * character ({@code '/'}), the commercial-at character ({@code '@'}),
128451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
128551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and <i>other</i> categories. </p>
128651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
128751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The path component of this URI,
12883a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the path is undefined
128951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
129051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawPath() {
129151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return path;
129251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
129351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
129451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
129551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded path component of this URI.
129651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
129751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
129851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawPath() getRawPath} method except that all sequences of
129951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * escaped octets are <a href="#decode">decoded</a>.  </p>
130051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
130151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded path component of this URI,
13023a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the path is undefined
130351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
130451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getPath() {
130551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedPath == null) && (path != null))
130651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedPath = decode(path);
130751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedPath;
130851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
130951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
131051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
131151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw query component of this URI.
131251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
131351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The query component of a URI, if defined, only contains legal URI
131451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
131551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
131651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw query component of this URI,
13173a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the query is undefined
131851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
131951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawQuery() {
132051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return query;
132151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
132251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
132351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
132451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded query component of this URI.
132551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
132651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
132751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawQuery() getRawQuery} method except that all sequences of
132851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * escaped octets are <a href="#decode">decoded</a>.  </p>
132951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
133051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded query component of this URI,
13313a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the query is undefined
133251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
133351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getQuery() {
133451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedQuery == null) && (query != null))
133551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedQuery = decode(query);
133651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedQuery;
133751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
133851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
133951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
134051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw fragment component of this URI.
134151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
134251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The fragment component of a URI, if defined, only contains legal URI
134351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
134451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
134551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw fragment component of this URI,
13463a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the fragment is undefined
134751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
134851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawFragment() {
134951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return fragment;
135051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
135151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
135251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
135351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded fragment component of this URI.
135451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
135551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
135651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawFragment() getRawFragment} method except that all
135751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
135851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
135951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded fragment component of this URI,
13603a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     *          or {@code null} if the fragment is undefined
136151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
136251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getFragment() {
136351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedFragment == null) && (fragment != null))
136451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedFragment = decode(fragment);
136551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedFragment;
136651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
136751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
136851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
136951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Equality, comparison, hash code, toString, and serialization --
137051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
137151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
137251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tests this URI for equality with another object.
137351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
137451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If the given object is not a URI then this method immediately
13753a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * returns {@code false}.
137651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
137751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two URIs to be considered equal requires that either both are
137851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * opaque or both are hierarchical.  Their schemes must either both be
137951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * undefined or else be equal without regard to case. Their fragments
138051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * must either both be undefined or else be equal.
138151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
138251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two opaque URIs to be considered equal, their scheme-specific
138351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * parts must be equal.
138451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
138551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two hierarchical URIs to be considered equal, their paths must
138651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * be equal and their queries must either both be undefined or else be
138751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * equal.  Their authorities must either both be undefined, or both be
138851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * registry-based, or both be server-based.  If their authorities are
138951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * defined and are registry-based, then they must be equal.  If their
139051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * authorities are defined and are server-based, then their hosts must be
139151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * equal without regard to case, their port numbers must be equal, and
139251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * their user-information components must be equal.
139351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
139451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> When testing the user-information, path, query, fragment, authority,
139551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * or scheme-specific parts of two URIs for equality, the raw forms rather
139651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * than the encoded forms of these components are compared and the
139751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * hexadecimal digits of escaped octets are compared without regard to
139851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * case.
139951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
140051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method satisfies the general contract of the {@link
140151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * java.lang.Object#equals(Object) Object.equals} method. </p>
140251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
140351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   ob   The object to which this object is to be compared
140451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
14053a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * @return  {@code true} if, and only if, the given object is a URI that
140651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          is identical to this URI
140751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
140851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean equals(Object ob) {
140951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (ob == this)
141051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
141151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!(ob instanceof URI))
141251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
141351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI that = (URI)ob;
141451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque() != that.isOpaque()) return false;
141551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
141651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.fragment, that.fragment)) return false;
141751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
141851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Opaque
141951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque())
142051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return equal(this.schemeSpecificPart, that.schemeSpecificPart);
142151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
142251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Hierarchical
142351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.path, that.path)) return false;
142451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.query, that.query)) return false;
142551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
142651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Authorities
142751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.authority == that.authority) return true;
142851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.host != null) {
142951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Server-based
143051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equal(this.userInfo, that.userInfo)) return false;
143151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equalIgnoringCase(this.host, that.host)) return false;
143251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (this.port != that.port) return false;
143351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (this.authority != null) {
143451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Registry-based
143551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equal(this.authority, that.authority)) return false;
143651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (this.authority != that.authority) {
143751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
143851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
143951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
144051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return true;
144151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
144251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
144351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
144451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a hash-code value for this URI.  The hash code is based upon all
144551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * of the URI's components, and satisfies the general contract of the
144651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link java.lang.Object#hashCode() Object.hashCode} method.
144751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
144851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A hash-code value for this URI
144951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
145051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int hashCode() {
145151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (hash != 0)
145251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return hash;
145351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int h = hashIgnoringCase(0, scheme);
145451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        h = hash(h, fragment);
145551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (isOpaque()) {
145651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, schemeSpecificPart);
145751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
145851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, path);
145951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, query);
146051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (host != null) {
146151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hash(h, userInfo);
146251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hashIgnoringCase(h, host);
146351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h += 1949 * port;
146451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
146551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hash(h, authority);
146651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
146751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
146851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        hash = h;
146951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return h;
147051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
147151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
147251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
147351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Compares this URI to another object, which must be a URI.
147451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
147551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> When comparing corresponding components of two URIs, if one
147651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component is undefined but the other is defined then the first is
147751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * considered to be less than the second.  Unless otherwise noted, string
147851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components are ordered according to their natural, case-sensitive
147951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * ordering as defined by the {@link java.lang.String#compareTo(Object)
148051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * String.compareTo} method.  String components that are subject to
148151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * encoding are compared by comparing their raw forms rather than their
148251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * encoded forms.
148351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
148451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The ordering of URIs is defined as follows: </p>
148551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
148651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ul type=disc>
148751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
148851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two URIs with different schemes are ordered according the
148951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   ordering of their schemes, without regard to case. </p></li>
149051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A hierarchical URI is considered to be less than an opaque URI
149251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   with an identical scheme. </p></li>
149351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two opaque URIs with identical schemes are ordered according
149551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   to the ordering of their scheme-specific parts. </p></li>
149651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two opaque URIs with identical schemes and scheme-specific
149851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   parts are ordered according to the ordering of their
149951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   fragments. </p></li>
150051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two hierarchical URIs with identical schemes are ordered
150251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   according to the ordering of their authority components: </p>
150351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <ul type=disc>
150551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If both authority components are server-based then the URIs
150751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     are ordered according to their user-information components; if these
150851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     components are identical then the URIs are ordered according to the
150951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     ordering of their hosts, without regard to case; if the hosts are
151051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     identical then the URIs are ordered according to the ordering of
151151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     their ports. </p></li>
151251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If one or both authority components are registry-based then
151451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     the URIs are ordered according to the ordering of their authority
151551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     components. </p></li>
151651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </ul></li>
151851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, two hierarchical URIs with identical schemes and
152051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority components are ordered according to the ordering of their
152151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   paths; if their paths are identical then they are ordered according to
152251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the ordering of their queries; if the queries are identical then they
152351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   are ordered according to the order of their fragments. </p></li>
152451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
152551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
152651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
152751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method satisfies the general contract of the {@link
152851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
152951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method. </p>
153051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   that
153251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          The object to which this URI is to be compared
153351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A negative integer, zero, or a positive integer as this URI is
153551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          less than, equal to, or greater than the given URI
153651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  ClassCastException
153851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given object is not a URI
153951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
154051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int compareTo(URI that) {
154151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int c;
154251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
154351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
154451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c;
154551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
154651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque()) {
154751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (that.isOpaque()) {
154851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Both opaque
154951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((c = compare(this.schemeSpecificPart,
155051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 that.schemeSpecificPart)) != 0)
155151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return c;
155251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return compare(this.fragment, that.fragment);
155351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
155451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return +1;                  // Opaque > hierarchical
155551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (that.isOpaque()) {
155651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;                  // Hierarchical < opaque
155751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
155851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
155951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Hierarchical
156051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((this.host != null) && (that.host != null)) {
156151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Both server-based
156251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compare(this.userInfo, that.userInfo)) != 0)
156351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
156451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compareIgnoringCase(this.host, that.host)) != 0)
156551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
156651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = this.port - that.port) != 0)
156751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
156851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
156951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // If one or both authorities are registry-based then we simply
157051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // compare them in the usual, case-sensitive way.  If one is
157151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // registry-based and one is server-based then the strings are
157251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // guaranteed to be unequal, hence the comparison will never return
157351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // zero and the compareTo and equals methods will remain
157451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // consistent.
157551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compare(this.authority, that.authority)) != 0) return c;
157651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
157751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
157851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compare(this.path, that.path)) != 0) return c;
157951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compare(this.query, that.query)) != 0) return c;
158051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return compare(this.fragment, that.fragment);
158151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
158251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
158351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
158451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the content of this URI as a string.
158551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
158651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI was created by invoking one of the constructors in this
158751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * class then a string equivalent to the original input string, or to the
158851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * string computed from the originally-given components, as appropriate, is
158951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * returned.  Otherwise this URI was created by normalization, resolution,
159051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * or relativization, and so a string is constructed from this URI's
159151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components according to the rules specified in <a
159251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
159351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7. </p>
159451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
159551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The string form of this URI
159651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
159751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String toString() {
159851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
159951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return string;
160051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
160151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
160251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
160351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the content of this URI as a US-ASCII string.
160451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
160551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI does not contain any characters in the <i>other</i>
160651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * category then an invocation of this method will return the same value as
160751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * an invocation of the {@link #toString() toString} method.  Otherwise
160851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this method works as if by invoking that method and then <a
160951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="#encode">encoding</a> the result.  </p>
161051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
161151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The string form of this URI, encoded as needed
161251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          so that it only contains characters in the US-ASCII
161351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          charset
161451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
161551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String toASCIIString() {
161651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
161751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return encode(string);
161851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
161951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Serialization support --
162251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
162451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Saves the content of this URI to the given serial stream.
162551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
16263a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * <p> The only serializable field of a URI instance is its {@code string}
162751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * field.  That field is given a value, if it does not have one already,
162851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
162951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method of the given object-output stream is invoked. </p>
163051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
163151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  os  The object-output stream to which this object
163251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *             is to be written
163351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
163451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void writeObject(ObjectOutputStream os)
163551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws IOException
163651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
163751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
163851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        os.defaultWriteObject();        // Writes the string field only
163951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
164051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
164151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
164251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Reconstitutes a URI from the given serial stream.
164351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
164451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
16453a6411ec91b24e73f36301d0075bc7b052894ae9Yi Kong     * invoked to read the value of the {@code string} field.  The result is
164651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * then parsed in the usual way.
164751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
164851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  is  The object-input stream from which this object
164951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *             is being read
165051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
165151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void readObject(ObjectInputStream is)
165251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws ClassNotFoundException, IOException
165351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
165451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        port = -1;                      // Argh
165551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        is.defaultReadObject();
165651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
165751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            new Parser(string).parse(false);
165851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (URISyntaxException x) {
165951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            IOException y = new InvalidObjectException("Invalid URI");
166051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            y.initCause(x);
166151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw y;
166251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
166351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
166451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
166551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
166651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- End of public methods --
166751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
166851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
166951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Utility methods for string-field comparison and hashing --
167051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // These methods return appropriate values for null string arguments,
167251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // thereby simplifying the equals, hashCode, and compareTo methods.
167351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
167451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The case-ignoring methods should only be applied to strings whose
167551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // characters are all known to be US-ASCII.  Because of this restriction,
167651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // these methods are faster than the similar methods in the String class.
167751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
167951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int toLower(char c) {
168051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'A') && (c <= 'Z'))
168151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c + ('a' - 'A');
168251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return c;
168351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
168451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
1685e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong    // US-ASCII only
1686e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong    private static int toUpper(char c) {
1687e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        if ((c >= 'a') && (c <= 'z'))
1688e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong            return c - ('a' - 'A');
1689e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        return c;
1690e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong    }
1691e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong
169251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean equal(String s, String t) {
169351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return true;
169451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((s != null) && (t != null)) {
169551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.length() != t.length())
169651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
169751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.indexOf('%') < 0)
169851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s.equals(t);
169951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = s.length();
170051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (int i = 0; i < n;) {
170151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = s.charAt(i);
170251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char d = t.charAt(i);
170351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (c != '%') {
170451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (c != d)
170551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        return false;
170651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    i++;
170751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
170851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
170951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (d != '%')
171051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
171351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
171651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
171951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
172051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
172151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
172251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
172351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
172451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
172551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean equalIgnoringCase(String s, String t) {
172651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return true;
172751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((s != null) && (t != null)) {
172851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = s.length();
172951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t.length() != n)
173051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
173151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (int i = 0; i < n; i++) {
173251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
173351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
173451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
173551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
173651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
173751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
173851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
173951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
174051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int hash(int hash, String s) {
174151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null) return hash;
1742e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()
1743e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                                  : normalizedHash(hash, s);
1744e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong    }
1745e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong
1746e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong
1747e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong    private static int normalizedHash(int hash, String s) {
1748e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        int h = 0;
1749e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        for (int index = 0; index < s.length(); index++) {
1750e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong            char ch = s.charAt(index);
1751e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong            h = 31 * h + ch;
1752e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong            if (ch == '%') {
1753e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                /*
1754e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                 * Process the next two encoded characters
1755e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                 */
1756e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                for (int i = index + 1; i < index + 3; i++)
1757e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                    h = 31 * h + toUpper(s.charAt(i));
1758e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong                index += 2;
1759e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong            }
1760e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        }
1761e0bf2a3f0b8c80d303185e4e6daa9e3f3431c622Yi Kong        return hash * 127 + h;
176251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
176351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
176451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
176551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int hashIgnoringCase(int hash, String s) {
176651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null) return hash;
176751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int h = hash;
176851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
176951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++)
177051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = 31 * h + toLower(s.charAt(i));
177151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return h;
177251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
177351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
177451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int compare(String s, String t) {
177551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return 0;
177651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s != null) {
177751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t != null)
177851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s.compareTo(t);
177951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
178051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return +1;
178151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
178251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
178351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
178451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
178551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
178651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
178751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int compareIgnoringCase(String s, String t) {
178851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return 0;
178951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s != null) {
179051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t != null) {
179151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int sn = s.length();
179251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int tn = t.length();
179351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int n = sn < tn ? sn : tn;
179451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                for (int i = 0; i < n; i++) {
179551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
179651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (c != 0)
179751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        return c;
179851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
179951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return sn - tn;
180051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
180151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return +1;
180251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
180351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
180451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
180551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
180651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
180751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
180851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- String construction --
180951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
181051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If a scheme is given then the path, if given, must be absolute
181151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
181251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void checkPath(String s, String scheme, String path)
181351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
181451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
181551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
181651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((path != null)
181751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && ((path.length() > 0) && (path.charAt(0) != '/')))
181851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                throw new URISyntaxException(s,
181951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                             "Relative path in absolute URI");
182051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
182151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
182251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
182351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendAuthority(StringBuffer sb,
182451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String authority,
182551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String userInfo,
182651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String host,
182751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 int port)
182851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
182951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (host != null) {
183051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append("//");
183151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (userInfo != null) {
183251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
183351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('@');
183451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
183551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean needBrackets = ((host.indexOf(':') >= 0)
183651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.startsWith("[")
183751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.endsWith("]"));
183851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (needBrackets) sb.append('[');
183951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(host);
184051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (needBrackets) sb.append(']');
184151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (port != -1) {
184251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(':');
184351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(port);
184451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
184551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (authority != null) {
184651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append("//");
184751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (authority.startsWith("[")) {
184851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // authority should (but may not) contain an embedded IPv6 address
184951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int end = authority.indexOf("]");
185051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                String doquote = authority, dontquote = "";
185151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (end != -1 && authority.indexOf(":") != -1) {
185251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // the authority contains an IPv6 address
185351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (end == authority.length()) {
185451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = authority;
185551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = "";
185651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
185751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = authority.substring(0 , end + 1);
185851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = authority.substring(end + 1);
185951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
186051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
186151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(dontquote);
186251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(doquote,
186351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            L_REG_NAME | L_SERVER,
186451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            H_REG_NAME | H_SERVER));
186551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
186651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(authority,
186751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            L_REG_NAME | L_SERVER,
186851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            H_REG_NAME | H_SERVER));
186951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
187051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
187151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
187251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
187351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendSchemeSpecificPart(StringBuffer sb,
187451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String opaquePart,
187551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String authority,
187651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String userInfo,
187751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String host,
187851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          int port,
187951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String path,
188051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String query)
188151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
188251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (opaquePart != null) {
188351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            /* check if SSP begins with an IPv6 address
188451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski             * because we must not quote a literal IPv6 address
188551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski             */
188651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (opaquePart.startsWith("//[")) {
188751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int end =  opaquePart.indexOf("]");
188851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (end != -1 && opaquePart.indexOf(":")!=-1) {
188951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    String doquote, dontquote;
189051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (end == opaquePart.length()) {
189151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = opaquePart;
189251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = "";
189351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
189451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = opaquePart.substring(0,end+1);
189551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = opaquePart.substring(end+1);
189651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
189751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append (dontquote);
189851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(quote(doquote, L_URIC, H_URIC));
189951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
190051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
190151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(opaquePart, L_URIC, H_URIC));
190251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
190351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
190451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            appendAuthority(sb, authority, userInfo, host, port);
190551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path != null)
190651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(path, L_PATH, H_PATH));
190751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (query != null) {
190851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('?');
190951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(query, L_URIC, H_URIC));
191051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
191151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
191251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
191351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
191451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendFragment(StringBuffer sb, String fragment) {
191551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (fragment != null) {
191651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append('#');
191751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(quote(fragment, L_URIC, H_URIC));
191851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
191951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
192051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
192151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private String toString(String scheme,
192251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String opaquePart,
192351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String authority,
192451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String userInfo,
192551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String host,
192651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            int port,
192751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String path,
192851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String query,
192951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String fragment)
193051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
193151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
193251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
193351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(scheme);
193451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(':');
193551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
193651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendSchemeSpecificPart(sb, opaquePart,
193751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 authority, userInfo, host, port,
193851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 path, query);
193951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendFragment(sb, fragment);
194051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
194151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
194251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
194351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void defineSchemeSpecificPart() {
194451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (schemeSpecificPart != null) return;
194551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
194651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
194751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 host, port, getPath(), getQuery());
194851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (sb.length() == 0) return;
194951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        schemeSpecificPart = sb.toString();
195051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
195151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
195251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void defineString() {
195351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (string != null) return;
195451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
195551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
195651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
195751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(scheme);
195851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(':');
195951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
196051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (isOpaque()) {
196151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(schemeSpecificPart);
196251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
196351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (host != null) {
196451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append("//");
196551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (userInfo != null) {
196651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(userInfo);
196751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append('@');
196851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
196951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                boolean needBrackets = ((host.indexOf(':') >= 0)
197051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.startsWith("[")
197151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.endsWith("]"));
197251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (needBrackets) sb.append('[');
197351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(host);
197451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (needBrackets) sb.append(']');
197551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (port != -1) {
197651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(':');
197751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(port);
197851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
197951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (authority != null) {
198051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append("//");
198151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(authority);
198251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
198351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path != null)
198451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(path);
198551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (query != null) {
198651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('?');
198751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(query);
198851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
198951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
199051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (fragment != null) {
199151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append('#');
199251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(fragment);
199351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
199451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        string = sb.toString();
199551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
199651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
199751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
199851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Normalization, resolution, and relativization --
199951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
200051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 5.2 (6)
200151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String resolvePath(String base, String child,
200251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                      boolean absolute)
200351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
200451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int i = base.lastIndexOf('/');
200551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int cn = child.length();
200651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String path = "";
200751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
200851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (cn == 0) {
200951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6a)
201051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (i >= 0)
201151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                path = base.substring(0, i + 1);
201251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
201351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            StringBuffer sb = new StringBuffer(base.length() + cn);
201451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6a)
201551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (i >= 0)
201651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(base.substring(0, i + 1));
201751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6b)
201851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(child);
201951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path = sb.toString();
202051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
202151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
202251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (6c-f)
20239af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        String np = normalize(path, true);
202451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
202551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (6g): If the result is absolute but the path begins with "../",
202651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // then we simply leave the path as-is
202751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
202851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return np;
202951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
203051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
203151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 5.2
203251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI resolve(URI base, URI child) {
203351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // check if child if opaque first so that NPE is thrown
203451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // if child is null.
203551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.isOpaque() || base.isOpaque())
203651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
203751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
203851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (2): Reference to current document (lone fragment)
203951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((child.scheme == null) && (child.authority == null)
204051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            && child.path.equals("") && (child.fragment != null)
204151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            && (child.query == null)) {
204251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((base.fragment != null)
204351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && child.fragment.equals(base.fragment)) {
204451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return base;
204551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
204651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            URI ru = new URI();
204751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.scheme = base.scheme;
204851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = base.authority;
204951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = base.userInfo;
205051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = base.host;
205151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = base.port;
205251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.path = base.path;
205351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.fragment = child.fragment;
205451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.query = base.query;
205551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ru;
205651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
205751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
205851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (3): Child is absolute
205951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.scheme != null)
206051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
206151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
206251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI ru = new URI();             // Resolved URI
206351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.scheme = base.scheme;
206451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.query = child.query;
206551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.fragment = child.fragment;
206651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
206751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (4): Authority
206851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.authority == null) {
206951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = base.authority;
207051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = base.host;
207151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = base.userInfo;
207251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = base.port;
207351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2074bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong            if (child.path == null || child.path.isEmpty()) {
2075bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // This is an addtional path from RFC 3986 RI, which fixes following RFC 2396
2076bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // "normal" examples:
2077bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // Base: http://a/b/c/d;p?q
2078bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //   "?y" = "http://a/b/c/d;p?y"
2079bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //   ""   = "http://a/b/c/d;p?q"
2080bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // http://b/25897693
2081bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.path = base.path;
2082bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.query = child.query != null ? child.query : base.query;
2083bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong            } else if ((child.path.length() > 0) && (child.path.charAt(0) == '/')) {
208451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // 5.2 (5): Child path is absolute
2085bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //
2086bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // There is an additional step from RFC 3986 RI, requiring to remove dots for
2087bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // absolute path as well.
2088bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // http://b/25897693
20899af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong                ru.path = normalize(child.path, true);
209051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
209151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // 5.2 (6): Resolve relative path
2092bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.path = resolvePath(base.path, child.path, base.isAbsolute());
209351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
209451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
209551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = child.authority;
209651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = child.host;
209751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = child.userInfo;
209851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = child.host;
209951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = child.port;
210051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.path = child.path;
210151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
210251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
210351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (7): Recombine (nothing to do here)
210451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return ru;
210551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
210651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
210751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If the given URI's path is normal then return the URI;
210851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // o.w., return a new URI containing the normalized path.
210951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
211051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI normalize(URI u) {
211151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
211251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return u;
211351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
211451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String np = normalize(u.path);
211551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (np == u.path)
211651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return u;
211751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
211851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI v = new URI();
211951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.scheme = u.scheme;
212051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.fragment = u.fragment;
212151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.authority = u.authority;
212251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.userInfo = u.userInfo;
212351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.host = u.host;
212451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.port = u.port;
212551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.path = np;
212651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.query = u.query;
212751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return v;
212851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
212951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
213051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If both URIs are hierarchical, their scheme and authority components are
213151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // identical, and the base path is a prefix of the child's path, then
213251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // return a relative URI that, when resolved against the base, yields the
213351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // child; otherwise, return the child.
213451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
213551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI relativize(URI base, URI child) {
213651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // check if child if opaque first so that NPE is thrown
213751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // if child is null.
213851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.isOpaque() || base.isOpaque())
213951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
214051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equalIgnoringCase(base.scheme, child.scheme)
214151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            || !equal(base.authority, child.authority))
214251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
214351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
214451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String bp = normalize(base.path);
214551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String cp = normalize(child.path);
214651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!bp.equals(cp)) {
214731651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // Android-changed: The original OpenJdk implementation would append a trailing slash
214831651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // to paths like "/a/b" before relativizing them. This would relativize /a/b/c to
214931651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // "/c" against "/a/b" the android implementation did not do this. It would assume that
215031651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // "b" wasn't a directory and relativize the path to "/b/c". The spec is pretty vague
215131651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // about this but this change is being made because we have several tests that expect
215231651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // this behaviour.
215331651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            if (bp.indexOf('/') != -1) {
215431651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath                bp = bp.substring(0, bp.lastIndexOf('/') + 1);
215531651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            }
215631651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath
215751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!cp.startsWith(bp))
215851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return child;
215951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
216051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
216151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI v = new URI();
216251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.path = cp.substring(bp.length());
216351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.query = child.query;
216451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.fragment = child.fragment;
216551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return v;
216651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
216751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
216851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
216951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
217051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Path normalization --
217151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
217251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The following algorithm for path normalization avoids the creation of a
217351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // string object for each segment, as well as the use of a string buffer to
217451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // compute the final result, by using a single char array and editing it in
217551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // place.  The array is first split into segments, replacing each slash
217651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // with '\0' and creating a segment-index array, each element of which is
217751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // the index of the first char in the corresponding segment.  We then walk
217851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // through both arrays, removing ".", "..", and other segments as necessary
217951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // by setting their entries in the index array to -1.  Finally, the two
218051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // arrays are used to rejoin the segments and compute the final result.
218151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
218251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This code is based upon src/solaris/native/java/io/canonicalize_md.c
218351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
218451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
218551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Check the given path to see if it might need normalization.  A path
218651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // might need normalization if it contains duplicate slashes, a "."
218751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segment, or a ".." segment.  Return -1 if no further normalization is
218851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // possible, otherwise return the number of segments found.
218951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
219051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This method takes a string argument rather than a char array so that
219151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // this test can be performed without invoking path.toCharArray().
219251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
219351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private int needsNormalization(String path) {
219451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean normal = true;
219551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = 0;                     // Number of segments
219651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length() - 1;    // Index of last char in path
219751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next char in path
219851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
219951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Skip initial slashes
220051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
220151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path.charAt(p) != '/') break;
220251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p++;
220351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
220451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (p > 1) normal = false;
220551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
220651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan segments
220751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
220851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
220951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Looking at "." or ".." ?
221051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((path.charAt(p) == '.')
221151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && ((p == end)
221251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    || ((path.charAt(p + 1) == '/')
221351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        || ((path.charAt(p + 1) == '.')
221451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            && ((p + 1 == end)
221551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                || (path.charAt(p + 2) == '/')))))) {
221651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                normal = false;
221751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
221851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ns++;
221951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
222051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find beginning of next segment
222151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p <= end) {
222251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path.charAt(p++) != '/')
222351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
222451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
222551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Skip redundant slashes
222651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while (p <= end) {
222751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (path.charAt(p) != '/') break;
222851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    normal = false;
222951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
223051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
223151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
223251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
223351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
223451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
223551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
223651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return normal ? -1 : ns;
223751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
223851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
223951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
224051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Split the given path into segments, replacing slashes with nulls and
224151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // filling in the given segment-index array.
224251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
224351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Preconditions:
224451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs.length == Number of segments in path
224551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
224651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Postconditions:
224751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   All slashes in path replaced by '\0'
224851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
224951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
225051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private void split(char[] path, int[] segs) {
225151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;      // Index of last char in path
225251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next char in path
225351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int i = 0;                      // Index of current segment
225451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
225551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Skip initial slashes
225651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
225751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path[p] != '/') break;
225851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path[p] = '\0';
225951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p++;
226051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
226151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
226351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Note start of segment
226551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            segs[i++] = p++;
226651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find beginning of next segment
226851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p <= end) {
226951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path[p++] != '/')
227051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
227151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                path[p - 1] = '\0';
227251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
227351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Skip redundant slashes
227451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while (p <= end) {
227551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (path[p] != '/') break;
227651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '\0';
227751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
227851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
227951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
228051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
228151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
228251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (i != segs.length)
228351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new InternalError();  // ASSERT
228451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
228551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
228651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
228751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Join the segments in the given path according to the given segment-index
228851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // array, ignoring those segments whose index entries have been set to -1,
228951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // and inserting slashes as needed.  Return the length of the resulting
229051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // path.
229151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
229251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Preconditions:
229351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs[i] == -1 implies segment i is to be ignored
229451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   path computed by split, as above, with '\0' having replaced '/'
229551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
229651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Postconditions:
229751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   path[0] .. path[return value] == Resulting path
229851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
229951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private int join(char[] path, int[] segs) {
230051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;           // Number of segments
230151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;      // Index of last char in path
230251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next path char to write
230351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
230451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (path[p] == '\0') {
230551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Restore initial slash for absolute paths
230651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path[p++] = '/';
230751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
230851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
230951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < ns; i++) {
231051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = segs[i];            // Current segment
231151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q == -1)
231251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Ignore this segment
231351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                continue;
231451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
231551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p == q) {
231651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // We're already at this segment, so just skip to its end
231751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while ((p <= end) && (path[p] != '\0'))
231851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
231951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (p <= end) {
232051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Preserve trailing slash
232151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '/';
232251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
232351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (p < q) {
232451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Copy q down to p
232551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while ((q <= end) && (path[q] != '\0'))
232651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = path[q++];
232751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= end) {
232851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Preserve trailing slash
232951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '/';
233051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
233151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else
233251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                throw new InternalError(); // ASSERT false
233351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
233451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
233551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return p;
233651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
233751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
233851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
233951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Remove "." segments from the given path, and remove segment pairs
234051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // consisting of a non-".." segment followed by a ".." segment.
234151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
23429af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    private static void removeDots(char[] path, int[] segs, boolean removeLeading) {
234351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;
234451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;
234551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
234651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < ns; i++) {
234751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int dots = 0;               // Number of dots found (0, 1, or 2)
234851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
234951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find next occurrence of "." or ".."
235051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            do {
235151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int p = segs[i];
235251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path[p] == '.') {
235351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (p == end) {
235451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 1;
235551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
235651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else if (path[p + 1] == '\0') {
235751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 1;
235851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
235951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else if ((path[p + 1] == '.')
236051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               && ((p + 1 == end)
236151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                   || (path[p + 2] == '\0'))) {
236251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 2;
236351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
236451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
236551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
236651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
236751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } while (i < ns);
236851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((i > ns) || (dots == 0))
236951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
237051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
237151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (dots == 1) {
237251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Remove this occurrence of "."
237351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                segs[i] = -1;
237451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
237551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // If there is a preceding non-".." segment, remove both that
2376bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // segment and this occurrence of ".."
237751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int j;
237851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                for (j = i - 1; j >= 0; j--) {
237951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (segs[j] != -1) break;
238051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
238151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (j >= 0) {
238251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = segs[j];
238351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (!((path[q] == '.')
238451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                          && (path[q + 1] == '.')
238551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                          && (path[q + 2] == '\0'))) {
238651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        segs[i] = -1;
238751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        segs[j] = -1;
238851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
23899af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong                } else if (removeLeading) {
2390bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // This is a leading ".." segment. Per RFC 3986 RI, this should be removed as
2391bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // well. This fixes RFC 2396 "abnormal" examples.
2392bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // http://b/25897693
2393bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    segs[i] = -1;
239451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
239551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
239651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
239751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
239851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
239951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
240051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // DEVIATION: If the normalized path is relative, and if the first
240151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segment could be parsed as a scheme name, then prepend a "." segment
240251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
240351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void maybeAddLeadingDot(char[] path, int[] segs) {
240451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
240551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (path[0] == '\0')
240651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // The path is absolute
240751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
240851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
240951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;
241051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = 0;                      // Index of first segment
241151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (f < ns) {
241251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (segs[f] >= 0)
241351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
241451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            f++;
241551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
241651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((f >= ns) || (f == 0))
241751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // The path is empty, or else the original first segment survived,
241851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // in which case we already know that no leading "." is needed
241951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
242051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
242151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = segs[f];
242251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
242351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (p >= path.length || path[p] == '\0')
242451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // No colon in first segment, so no "." needed
242551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
242651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
242751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // At this point we know that the first segment is unused,
242851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // hence we can insert a "." segment at that position
242951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        path[0] = '.';
243051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        path[1] = '\0';
243151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        segs[0] = 0;
243251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
243351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
243451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
243551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Normalize the given path string.  A normal path string has no empty
243651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segments (i.e., occurrences of "//"), no segments equal to ".", and no
243751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segments equal to ".." that are preceded by a segment not equal to "..".
243851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // In contrast to Unix-style pathname normalization, for URI paths we
243951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // always retain trailing slashes.
244051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
244151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String normalize(String ps) {
24429af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        return normalize(ps, false);
24439af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    }
24449af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong
24459af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    private static String normalize(String ps, boolean removeLeading) {
244651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
244751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Does this path need normalization?
244851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = needsNormalization(ps);        // Number of segments
244951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (ns < 0)
245051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Nope -- just return it
245151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ps;
245251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        char[] path = ps.toCharArray();         // Path in char-array form
245451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Split path into segments
245651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int[] segs = new int[ns];               // Segment-index array
245751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        split(path, segs);
245851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Remove dots
24609af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        removeDots(path, segs, removeLeading);
246151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
246251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Prevent scheme-name confusion
246351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        maybeAddLeadingDot(path, segs);
246451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
246551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Join the remaining segments and return the result
246651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = new String(path, 0, join(path, segs));
246751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s.equals(ps)) {
246851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // string was already normalized
246951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ps;
247051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
247151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return s;
247251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
247351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
247451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
247551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
247651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Character classes for parsing --
247751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
247851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 precisely specifies which characters in the US-ASCII charset are
247951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // permissible in the various components of a URI reference.  We here
248051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // define a set of mask pairs to aid in enforcing these restrictions.  Each
248151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // mask pair consists of two longs, a low mask and a high mask.  Taken
248251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // together they represent a 128-bit mask, where bit i is set iff the
248351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // character with value i is permitted.
248451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
248551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This approach is more efficient than sequentially searching arrays of
248651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // permitted characters.  It could be made still more efficient by
248751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // precompiling the mask information so that a character's presence in a
248851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // given mask could be determined by a single table lookup.
248951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
249051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute the low-order mask for the characters in the given string
249151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long lowMask(String chars) {
249251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = chars.length();
249351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
249451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++) {
249551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = chars.charAt(i);
249651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c < 64)
249751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                m |= (1L << c);
249851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
249951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
250051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
250151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
250251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute the high-order mask for the characters in the given string
250351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long highMask(String chars) {
250451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = chars.length();
250551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
250651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++) {
250751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = chars.charAt(i);
250851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c >= 64) && (c < 128))
250951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                m |= (1L << (c - 64));
251051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
251151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
251251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
251351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
251451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute a low-order mask for the characters
251551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // between first and last, inclusive
251651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long lowMask(char first, char last) {
251751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
251851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = Math.max(Math.min(first, 63), 0);
251951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int l = Math.max(Math.min(last, 63), 0);
252051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = f; i <= l; i++)
252151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            m |= 1L << i;
252251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
252351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
252451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
252551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute a high-order mask for the characters
252651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // between first and last, inclusive
252751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long highMask(char first, char last) {
252851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
252951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = Math.max(Math.min(first, 127), 64) - 64;
253051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int l = Math.max(Math.min(last, 127), 64) - 64;
253151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = f; i <= l; i++)
253251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            m |= 1L << i;
253351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
253451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
253551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
253651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Tell whether the given character is permitted by the given mask pair
253751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean match(char c, long lowMask, long highMask) {
253851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
253951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
254051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c < 64)
254151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ((1L << c) & lowMask) != 0;
254251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c < 128)
254351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ((1L << (c - 64)) & highMask) != 0;
254451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
254551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
254651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
254751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Character-class masks, in reverse order from RFC2396 because
254851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // initializers for static fields cannot make forward references.
254951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
255051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
255151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "8" | "9"
255251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DIGIT = lowMask('0', '9');
255351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DIGIT = 0L;
255451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
255551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
255651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
255751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
255851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_UPALPHA = 0L;
255951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_UPALPHA = highMask('A', 'Z');
256051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
256151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
256251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
256351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
256451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_LOWALPHA = 0L;
256551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_LOWALPHA = highMask('a', 'z');
256651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
256751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // alpha         = lowalpha | upalpha
256851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
256951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
257051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
257151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // alphanum      = alpha | digit
257251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
257351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
257451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
257551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
257651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                         "a" | "b" | "c" | "d" | "e" | "f"
257751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_HEX = L_DIGIT;
257851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
257951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
258051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
258151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "(" | ")"
258251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_MARK = lowMask("-_.!~*'()");
258351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_MARK = highMask("-_.!~*'()");
258451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
258551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // unreserved    = alphanum | mark
258651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
258751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
258851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
258951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
259051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "$" | "," | "[" | "]"
259151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Added per RFC2732: "[", "]"
259251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
259351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
259451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
259551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
259651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // characters are allowed; this is handled by the scanEscape method below.
259751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ESCAPED = 1L;
259851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ESCAPED = 0L;
259951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
260051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // uric          = reserved | unreserved | escaped
260151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
260251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
260351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
260451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // pchar         = unreserved | escaped |
260551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
260651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_PCHAR
260751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
260851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_PCHAR
260951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
261051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
261151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // All valid path characters
261251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_PATH = L_PCHAR | lowMask(";/");
261351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_PATH = H_PCHAR | highMask(";/");
261451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
261551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Dash, for use in domainlabel and toplabel
261651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DASH = lowMask("-");
261751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DASH = highMask("-");
261851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
26199265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    // UNDERSCORE, for use in domainlabel and toplabel
26209265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    private static final long L_UNDERSCORE = lowMask("_");
26219265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    private static final long H_UNDERSCORE = highMask("_");
26229265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong
262351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Dot, for use in hostnames
262451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DOT = lowMask(".");
262551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DOT = highMask(".");
262651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
262751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // userinfo      = *( unreserved | escaped |
262851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
262951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_USERINFO
263051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
263151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_USERINFO
263251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
263351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
263451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // reg_name      = 1*( unreserved | escaped | "$" | "," |
263551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                     ";" | ":" | "@" | "&" | "=" | "+" )
263651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_REG_NAME
263751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
263851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_REG_NAME
263951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
264051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
264151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // All valid characters for server-based authorities
264251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SERVER
264351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
264451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SERVER
264551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
264651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
264751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Special case of server authority that represents an IPv6 address
264851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // In this case, a % does not signify an escape sequence
264951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SERVER_PERCENT
265051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_SERVER | lowMask("%");
265151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SERVER_PERCENT
265251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_SERVER | highMask("%");
265351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_LEFT_BRACKET = lowMask("[");
265451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_LEFT_BRACKET = highMask("[");
265551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
265651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
265751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
265851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
265951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
266051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
266151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "&" | "=" | "+" | "$" | ","
266251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_URIC_NO_SLASH
266351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(";?:@&=+$,");
266451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_URIC_NO_SLASH
266551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(";?:@&=+$,");
266651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
266751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
266851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Escaping and encoding --
266951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
267051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private final static char[] hexDigits = {
267151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        '0', '1', '2', '3', '4', '5', '6', '7',
267251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
267351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    };
267451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
267551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void appendEscape(StringBuffer sb, byte b) {
267651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append('%');
267751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append(hexDigits[(b >> 4) & 0x0f]);
267851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append(hexDigits[(b >> 0) & 0x0f]);
267951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
268051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
268151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void appendEncoded(StringBuffer sb, char c) {
268251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = null;
268351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
268451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb = ThreadLocalCoders.encoderFor("UTF-8")
268551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                .encode(CharBuffer.wrap("" + c));
268651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (CharacterCodingException x) {
268751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert false;
268851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
268951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (bb.hasRemaining()) {
269051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int b = bb.get() & 0xff;
269151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (b >= 0x80)
269251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEscape(sb, (byte)b);
269351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
269451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append((char)b);
269551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
269651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
269751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
269851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Quote any characters in s that are not permitted
269951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // by the given mask pair
270051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
270151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String quote(String s, long lowMask, long highMask) {
270251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
270351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = null;
270451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
270551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < s.length(); i++) {
270651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = s.charAt(i);
270751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c < '\u0080') {
270851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (!match(c, lowMask, highMask)) {
270951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (sb == null) {
271051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb = new StringBuffer();
271151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb.append(s.substring(0, i));
271251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
271351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    appendEscape(sb, (byte)c);
271451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
271551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (sb != null)
271651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb.append(c);
271751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
271851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (allowNonASCII
271951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && (Character.isSpaceChar(c)
272051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                           || Character.isISOControl(c))) {
272151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (sb == null) {
272251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb = new StringBuffer();
272351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(s.substring(0, i));
272451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
272551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEncoded(sb, c);
272651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
272751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (sb != null)
272851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(c);
272951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
273051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
273151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return (sb == null) ? s : sb.toString();
273251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
273351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
273451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
273551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // assuming that s is otherwise legal
273651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
273751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String encode(String s) {
273851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
273951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (n == 0)
274051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
274151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
274251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // First check whether we actually need to encode
274351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0;;) {
274451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.charAt(i) >= '\u0080')
274551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
274651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (++i >= n)
274751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s;
274851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
274951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
275051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
275151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = null;
275251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
275351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb = ThreadLocalCoders.encoderFor("UTF-8")
275451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                .encode(CharBuffer.wrap(ns));
275551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (CharacterCodingException x) {
275651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert false;
275751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
275851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
275951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
276051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (bb.hasRemaining()) {
276151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int b = bb.get() & 0xff;
276251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (b >= 0x80)
276351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEscape(sb, (byte)b);
276451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
276551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append((char)b);
276651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
276751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
276851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
276951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
277051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int decode(char c) {
277151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= '0') && (c <= '9'))
277251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - '0';
277351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'a') && (c <= 'f'))
277451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - 'a' + 10;
277551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'A') && (c <= 'F'))
277651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - 'A' + 10;
277751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        assert false;
277851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return -1;
277951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
278051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
278151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static byte decode(char c1, char c2) {
278251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return (byte)(  ((decode(c1) & 0xf) << 4)
278351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                      | ((decode(c2) & 0xf) << 0));
278451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
278551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
278651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
278751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
278851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // sequence of escaped octets is not valid UTF-8 then the erroneous octets
278951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // are replaced with '\uFFFD'.
279051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
279151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            with a scope_id
279251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
279351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String decode(String s) {
279451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null)
279551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
279651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
279751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (n == 0)
279851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
279951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s.indexOf('%') < 0)
280051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
280151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
280251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer(n);
280351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = ByteBuffer.allocate(n);
280451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        CharBuffer cb = CharBuffer.allocate(n);
280551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
280651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .onMalformedInput(CodingErrorAction.REPLACE)
280751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .onUnmappableCharacter(CodingErrorAction.REPLACE);
280851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
280951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This is not horribly efficient, but it will do for now
281051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        char c = s.charAt(0);
281151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean betweenBrackets = false;
281251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
281351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n;) {
281451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert c == s.charAt(i);    // Loop invariant
281551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c == '[') {
281651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                betweenBrackets = true;
281751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (betweenBrackets && c == ']') {
281851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                betweenBrackets = false;
281951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
282051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c != '%' || betweenBrackets) {
282151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(c);
282251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (++i >= n)
282351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
282451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                c = s.charAt(i);
282551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                continue;
282651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
282751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb.clear();
282851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int ui = i;
282951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (;;) {
283051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                assert (n - i >= 2);
283151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                bb.put(decode(s.charAt(++i), s.charAt(++i)));
283251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (++i >= n)
283351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
283451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                c = s.charAt(i);
283551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (c != '%')
283651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
283751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
283851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb.flip();
283951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            cb.clear();
284051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            dec.reset();
284151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            CoderResult cr = dec.decode(bb, cb, true);
284251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert cr.isUnderflow();
284351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            cr = dec.flush(cb);
284451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert cr.isUnderflow();
284551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(cb.flip().toString());
284651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
284751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
284851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
284951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
285051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Parsing --
285351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // For convenience we wrap the input URI string in a new instance of the
285551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // following internal class.  This saves always having to pass the input
285651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // string as an argument to each internal scan/parse method.
285751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private class Parser {
285951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
286051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private String input;           // URI input string
286151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean requireServerAuthority = false;
286251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
286351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        Parser(String s) {
286451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            input = s;
286551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            string = s;
286651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
286751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
286851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Methods for throwing URISyntaxException in various ways --
286951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void fail(String reason) throws URISyntaxException {
287151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new URISyntaxException(input, reason);
287251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
287351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void fail(String reason, int p) throws URISyntaxException {
287551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new URISyntaxException(input, reason, p);
287651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
287751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void failExpecting(String expected, int p)
287951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
288051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
288151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Expected " + expected, p);
288251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
288351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
288451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void failExpecting(String expected, String prior, int p)
288551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
288651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
288751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Expected " + expected + " following " + prior, p);
288851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
288951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
289051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
289151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Simple access to the input string --
289251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
289351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Return a substring of the input string
289451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
289551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private String substring(int start, int end) {
289651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return input.substring(start, end);
289751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
289851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
289951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Return the char at position p,
290051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // assuming that p < input.length()
290151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
290251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private char charAt(int p) {
290351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return input.charAt(p);
290451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
290551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
290651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Tells whether start < end and, if so, whether charAt(start) == c
290751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
290851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean at(int start, int end, char c) {
290951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return (start < end) && (charAt(start) == c);
291051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
291151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
291251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Tells whether start + s.length() < end and, if so,
291351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // whether the chars at the start position match s exactly
291451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
291551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean at(int start, int end, String s) {
291651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
291751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int sn = s.length();
291851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (sn > end - p)
291951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
292051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int i = 0;
292151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (i < sn) {
292251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (charAt(p++) != s.charAt(i)) {
292351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
292451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
292551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
292651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
292751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return (i == sn);
292851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
292951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
293051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
293151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Scanning --
293251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
293351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // The various scan and parse methods that follow use a uniform
293451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // convention of taking the current start position and end index as
293551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // their first two arguments.  The start is inclusive while the end is
293651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // exclusive, just as in the String class, i.e., a start/end pair
293751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // denotes the left-open interval [start, end) of the input string.
293851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
293951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // These methods never proceed past the end position.  They may return
294051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -1 to indicate outright failure, but more often they simply return
294151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the position of the first char after the last char scanned.  Thus
294251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // a typical idiom is
294351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
294451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     int p = start;
294551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     int q = scan(p, end, ...);
294651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     if (q > p)
294751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // We scanned something
294851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
294951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     else if (q == p)
295051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // We scanned nothing
295151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
295251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     else if (q == -1)
295351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // Something went wrong
295451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
295551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
295651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
295751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a specific char: If the char at the given start position is
295851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // equal to c, return the index of the next char; otherwise, return the
295951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // start position.
296051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
296151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int end, char c) {
296251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((start < end) && (charAt(start) == c))
296351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return start + 1;
296451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return start;
296551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
296651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
296751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan forward from the given start position.  Stop at the first char
296851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // in the err string (in which case -1 is returned), or the first char
296951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // in the stop string (in which case the index of the preceding char is
297051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // returned), or the end of the input string (in which case the length
297151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // of the input string is returned).  May return the start position if
297251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // nothing matches.
297351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
297451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int end, String err, String stop) {
297551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
297651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < end) {
297751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = charAt(p);
297851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (err.indexOf(c) >= 0)
297951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return -1;
298051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (stop.indexOf(c) >= 0)
298151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
298251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
298351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
298451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
298551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
298651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
298751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a potential escape sequence, starting at the given position,
298851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // with the given first char (i.e., charAt(start) == c).
298951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
299051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This method assumes that if escapes are allowed then visible
299151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // non-US-ASCII chars are also allowed.
299251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
299351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanEscape(int start, int n, char first)
299451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
299551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
299651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
299751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = first;
299851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c == '%') {
299951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Process escape pair
300051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((p + 3 <= n)
300151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    && match(charAt(p + 1), L_HEX, H_HEX)
300251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    && match(charAt(p + 2), L_HEX, H_HEX)) {
300351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return p + 3;
300451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
300551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed escape pair", p);
300651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if ((c > 128)
300751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && !Character.isSpaceChar(c)
300851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && !Character.isISOControl(c)) {
300951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Allow unescaped but visible non-US-ASCII chars
301051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return p + 1;
301151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
301251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
301351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
301451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
301551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan chars that match the given mask pair
301651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
301751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int n, long lowMask, long highMask)
301851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
301951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
302051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
302151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < n) {
302251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = charAt(p);
302351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (match(c, lowMask, highMask)) {
302451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
302551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
302651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
302751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((lowMask & L_ESCAPED) != 0) {
302851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = scanEscape(p, n, c);
302951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q > p) {
303051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        p = q;
303151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        continue;
303251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
303351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
303451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
303551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
303651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
303751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
303851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
303951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Check that each of the chars in [start, end) matches the given mask
304051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
304151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void checkChars(int start, int end,
304251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                long lowMask, long highMask,
304351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                String what)
304451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
304551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
304651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scan(start, end, lowMask, highMask);
304751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < end)
304851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in " + what, p);
304951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
305051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
305151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Check that the char at position p matches the given mask
305251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
305351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void checkChar(int p,
305451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               long lowMask, long highMask,
305551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               String what)
305651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
305751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
305851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            checkChars(p, p + 1, lowMask, highMask, what);
305951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
306051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
306151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
306251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Parsing --
306351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
306451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [<scheme>:]<scheme-specific-part>[#<fragment>]
306551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
306651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        void parse(boolean rsa) throws URISyntaxException {
306751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            requireServerAuthority = rsa;
306851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int ssp;                    // Start of scheme-specific part
306951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = input.length();
307051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scan(0, n, "/?#", ":");
307151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((p >= 0) && at(p, n, ':')) {
307251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (p == 0)
307351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("scheme name", 0);
307451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
307551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
307651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                scheme = substring(0, p);
307751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;                    // Skip ':'
307851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ssp = p;
307951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, '/')) {
308051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = parseHierarchical(p, n);
308151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
308251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = scan(p, n, "", "#");
308351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q <= p)
308451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        failExpecting("scheme-specific part", p);
308551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    checkChars(p, q, L_URIC, H_URIC, "opaque part");
308651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
308751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
308851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
308951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ssp = 0;
309051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = parseHierarchical(0, n);
309151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
309251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            schemeSpecificPart = substring(ssp, p);
309351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '#')) {
309451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
309551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fragment = substring(p + 1, n);
309651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = n;
309751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
309851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
309951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("end of URI", p);
310051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
310151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
310251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [//authority]<path>[?<query>]
310351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
310451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // DEVIATION from RFC2396: We allow an empty authority component as
310551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // long as it's followed by a non-empty path, query component, or
310651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // fragment component.  This is so that URIs such as "file:///foo/bar"
310751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // will parse.  This seems to be the intent of RFC2396, though the
310851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // grammar does not permit it.  If the authority is empty then the
310951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // userInfo, host, and port components are undefined.
311051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
311151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // DEVIATION from RFC2396: We allow empty relative paths.  This seems
311251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // to be the intent of RFC2396, but the grammar does not permit it.
311351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // The primary consequence of this deviation is that "#f" parses as a
311451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // relative URI with an empty path.
311551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
311651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseHierarchical(int start, int n)
311751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
311851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
311951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
312051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '/') && at(p + 1, n, '/')) {
312151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p += 2;
312251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int q = scan(p, n, "", "/?#");
312351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
312451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = parseAuthority(p, q);
312551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (q < n) {
312651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // DEVIATION: Allow empty authority prior to non-empty
312751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // path, query component or fragment identifier
312851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else
312951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("authority", p);
313051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
313151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
313251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            checkChars(p, q, L_PATH, H_PATH, "path");
313351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path = substring(p, q);
313451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p = q;
313551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '?')) {
313651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
313751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "", "#");
313851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p, q, L_URIC, H_URIC, "query");
313951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                query = substring(p, q);
314051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
314151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
314251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
314351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
314451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
314551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // authority     = server | reg_name
314651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
314751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Ambiguity: An authority that is a registry name rather than a server
314851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // might have a prefix that parses as a server.  We use the fact that
314951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the authority component is always followed by '/' or the end of the
315051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // input string to resolve this: If the complete authority did not
315151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // parse as a server then we try to parse it as a registry name.
315251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
315351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseAuthority(int start, int n)
315451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
315551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
315651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
315751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = p;
315851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            URISyntaxException ex = null;
315951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
316051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean serverChars;
316151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean regChars;
316251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
316351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (scan(p, n, "", "]") > p) {
316451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // contains a literal IPv6 address, therefore % is allowed
316551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
316651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
316751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
316851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
316951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
317051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
317151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (regChars && !serverChars) {
317251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Must be a registry-based authority
317351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                authority = substring(p, n);
317451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return n;
317551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
317651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
317751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (serverChars) {
317851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Might be (probably is) a server-based authority, so attempt
317951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // to parse it as such.  If the attempt fails, try to treat it
318051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // as a registry-based authority.
318151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                try {
318251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    q = parseServer(p, n);
318351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q < n)
318451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        failExpecting("end of authority", q);
318551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    authority = substring(p, n);
318651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } catch (URISyntaxException x) {
318751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Undo results of failed parse
318851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    userInfo = null;
318951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    host = null;
319051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    port = -1;
319151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (requireServerAuthority) {
319251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // If we're insisting upon a server-based authority,
319351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // then just re-throw the exception
319451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        throw x;
319551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
319651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // Save the exception in case it doesn't parse as a
319751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // registry either
319851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        ex = x;
319951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        q = p;
320051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
320151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
320251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
320351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
320451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q < n) {
320551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (regChars) {
320651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Registry-based authority
320751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    authority = substring(p, n);
320851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (ex != null) {
320951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Re-throw exception; it was probably due to
321051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // a malformed IPv6 address
321151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    throw ex;
321251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
321351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    fail("Illegal character in authority", q);
321451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
321551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
321651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
321751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return n;
321851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
321951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
322051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
322151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [<userinfo>@]<host>[:<port>]
322251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
322351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseServer(int start, int n)
322451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
322551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
322651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
322751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
322851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
322951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // userinfo
323051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scan(p, n, "/?#", "@");
323151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((q >= p) && at(q, n, '@')) {
323251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
323351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                userInfo = substring(p, q);
323451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q + 1;              // Skip '@'
323551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
323651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
323751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // hostname, IPv4 address, or IPv6 address
323851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '[')) {
323951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
324051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
324151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "/?#", "]");
324251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q > p) && at(q, n, ']')) {
324351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // look for a "%" scope id
324451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int r = scan (p, q, "", "%");
324551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (r > p) {
324651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        parseIPv6Reference(p, r);
324751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        if (r+1 == q) {
324851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            fail ("scope id expected");
324951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        }
325051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
325151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                                "scope id");
325251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
325351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        parseIPv6Reference(p, q);
325451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
325551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    host = substring(p-1, q+1);
325651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q + 1;
325751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
325851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("closing bracket for IPv6 address", q);
325951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
326051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
326151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = parseIPv4Address(p, n);
326251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
326351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    q = parseHostname(p, n);
326451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
326551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
326651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
326751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // port
326851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, ':')) {
326951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
327051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "", "/");
327151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
327251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
327351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    try {
327451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        port = Integer.parseInt(substring(p, q));
327551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } catch (NumberFormatException x) {
327651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        fail("Malformed port number", p);
327751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
327851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
327951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
328051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
328151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
328251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting("port number", p);
328351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
328451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
328551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
328651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
328751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a string of decimal digits whose value fits in a byte
328851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
328951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanByte(int start, int n)
329051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
329151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
329251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
329351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = scan(p, n, L_DIGIT, H_DIGIT);
329451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q <= p) return q;
329551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (Integer.parseInt(substring(p, q)) > 255) return p;
329651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return q;
329751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
329851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
329951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan an IPv4 address.
330051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
330151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // If the strict argument is true then we require that the given
330251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // interval contain nothing besides an IPv4 address; if it is false
330351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // then we only require that it start with an IPv4 address.
330451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
330551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // If the interval does not contain or start with (depending upon the
330651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // strict argument) a legal IPv4 address characters then we return -1
330751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // immediately; otherwise we insist that these characters parse as a
330851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // legal IPv4 address and throw an exception on failure.
330951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
331051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We assume that any string of decimal digits and dots must be an IPv4
331151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // address.  It won't parse as a hostname anyway, so making that
331251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // assumption here allows more meaningful exceptions to be thrown.
331351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
331451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanIPv4Address(int start, int n, boolean strict)
331551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
331651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
331751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
331851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
331951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
332051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((m <= p) || (strict && (m != n)))
332151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
332251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (;;) {
332351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Per RFC2732: At most three digits per byte
332451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Further constraint: Each element fits in a byte
332551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
332651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
332751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
332851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
332951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
333051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
333151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
333251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q < m) break;
333351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return q;
333451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
333551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Malformed IPv4 address", q);
333651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
333751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
333851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
333951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Take an IPv4 address: Throw an exception if the given interval
334051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // contains anything except an IPv4 address
334151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
334251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int takeIPv4Address(int start, int n, String expected)
334351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
334451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
334551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scanIPv4Address(start, n, true);
334651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p <= start)
334751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting(expected, start);
334851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
334951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
335051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
335151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Attempt to parse an IPv4 address, returning -1 on failure but
335251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // allowing the given interval to contain [:<characters>] after
335351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the IPv4 address.
335451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
335551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseIPv4Address(int start, int n) {
335651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p;
335751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
335851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            try {
335951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = scanIPv4Address(start, n, false);
336051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } catch (URISyntaxException x) {
336151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
336251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } catch (NumberFormatException nfe) {
336351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
336451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
336551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
336651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p > start && p < n) {
336751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // IPv4 address is followed by something - check that
336851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // it's a ":" as this is the only valid character to
336951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // follow an address.
337051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (charAt(p) != ':') {
337151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = -1;
337251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
337351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
337451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
337551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p > start)
337651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                host = substring(start, p);
337751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
337851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
337951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
338051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
338151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
33829265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong        // domainlabel   = alphanum | alphanum *( alphanum | "-" | "_" ) alphanum
33839265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong        // toplabel      = alpha | alpha *( alphanum | "-" | "_" ) alphanum
338451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
338551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseHostname(int start, int n)
338651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
338751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
338851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
338951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
339051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int l = -1;                 // Start of last parsed label
339151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
339251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            do {
33939265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // domainlabel = alphanum [ *( alphanum | "-" | "_" ) alphanum ]
3394bb540b832e94f1d23009524726bf07f9952b2ed6Yi Kong                //
33959265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // The RFCs don't permit underscores in hostnames, but URI has to because a certain
33969265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // large website doesn't seem to care about standards and specs.
33979265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://code.google.com/p/android/issues/detail?id=37577
33989265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/17579865
33999265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/18016625
34009265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/18023709
340151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
340251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
340351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
340451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                l = p;
340551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
340651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
34079265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                    q = scan(p, n, L_ALPHANUM | L_DASH | L_UNDERSCORE, H_ALPHANUM | H_DASH | H_UNDERSCORE);
340851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q > p) {
340951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        if (charAt(q - 1) == '-')
341051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            fail("Illegal character in hostname", q - 1);
341151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        p = q;
341251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
341351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
341451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, '.');
341551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
341651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
341751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
341851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } while (p < n);
341951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((p < n) && !at(p, n, ':'))
342151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in hostname", p);
342251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (l < 0)
342451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting("hostname", start);
342551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // for a fully qualified hostname check that the rightmost
342751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // label starts with an alpha character.
342851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
342951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in hostname", l);
343051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
343151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
343251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            host = substring(start, p);
343351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
343451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
343551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
343651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
343751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
343851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
343951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
344051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the form ::12.34.56.78, which are clearly shown in the examples
344151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // earlier in the document.  Here is the original grammar:
344251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
344351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   IPv6address = hexpart [ ":" IPv4address ]
344451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
344551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq      = hex4 *( ":" hex4)
344651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hex4        = 1*4HEXDIG
344751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
344851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We therefore use the following revised grammar:
344951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
345051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   IPv6address = hexseq [ ":" IPv4address ]
345151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //                 | hexseq [ "::" [ hexpost ] ]
345251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //                 | "::" [ hexpost ]
345351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
345451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq      = hex4 *( ":" hex4)
345551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hex4        = 1*4HEXDIG
345651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
345751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This covers all and only the following cases:
345851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
345951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq
346051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq : IPv4address
346151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq ::
346251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: hexseq
346351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: hexseq : IPv4address
346451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: IPv4address
346551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: hexseq
346651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: hexseq : IPv4address
346751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: IPv4address
346851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   ::
346951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
347051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Additionally we constrain the IPv6 address as follows :-
347151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
347251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //  i.  IPv6 addresses without compressed zeros should contain
347351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //      exactly 16 bytes.
347451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
347551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //  ii. IPv6 addresses with compressed zeros should contain
347651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //      less than 16 bytes.
347751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
347851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int ipv6byteCount = 0;
347951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
348051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseIPv6Reference(int start, int n)
348151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
348251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
348351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
348451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
348551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean compressedZeros = false;
348651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
348751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scanHexSeq(p, n);
348851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
348951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p) {
349051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
349151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, "::")) {
349251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    compressedZeros = true;
349351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = scanHexPost(p + 2, n);
349451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (at(p, n, ':')) {
349551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = takeIPv4Address(p + 1,  n, "IPv4 address");
349651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    ipv6byteCount += 4;
349751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
349851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (at(p, n, "::")) {
349951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                compressedZeros = true;
350051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = scanHexPost(p + 2, n);
350151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
350251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
350351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed IPv6 address", start);
350451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (ipv6byteCount > 16)
350551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 address too long", start);
350651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!compressedZeros && ipv6byteCount < 16)
350751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 address too short", start);
350851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (compressedZeros && ipv6byteCount == 16)
350951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed IPv6 address", start);
351051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
351151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
351251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
351351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
351451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanHexPost(int start, int n)
351551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
351651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
351751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
351851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
351951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
352051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p == n)
352151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return p;
352251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
352351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scanHexSeq(p, n);
352451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p) {
352551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
352651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, ':')) {
352751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
352851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = takeIPv4Address(p, n, "hex digits or IPv4 address");
352951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    ipv6byteCount += 4;
353051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
353151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
353251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = takeIPv4Address(p, n, "hex digits or IPv4 address");
353351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ipv6byteCount += 4;
353451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
353551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
353651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
353751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
353851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a hex sequence; return -1 if one could not be scanned
353951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
354051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanHexSeq(int start, int n)
354151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
354251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
354351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
354451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
354551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
354651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scan(p, n, L_HEX, H_HEX);
354751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q <= p)
354851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
354951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(q, n, '.'))          // Beginning of IPv4 address
355051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
355151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p + 4)
355251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 hexadecimal digit sequence too long", p);
355351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ipv6byteCount += 2;
355451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p = q;
355551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < n) {
355651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (!at(p, n, ':'))
355751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
355851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p + 1, n, ':'))
355951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;              // "::"
356051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
356151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, L_HEX, H_HEX);
356251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
356351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("digits for an IPv6 address", p);
356451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(q, n, '.')) {    // Beginning of IPv4 address
356551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p--;
356651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
356751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
356851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p + 4)
356951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    fail("IPv6 hexadecimal digit sequence too long", p);
357051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ipv6byteCount += 2;
357151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
357251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
357351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
357451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
357551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
357651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
357751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
357851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
357951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski}
3580