151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/*
22c87ad3a45cecf9e344487cad1abfdebe79f2c7cNarayan Kamath * Copyright (C) 2014 The Android Open Source Project
351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is free software; you can redistribute it and/or modify it
751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * under the terms of the GNU General Public License version 2 only, as
851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * published by the Free Software Foundation.  Oracle designates this
951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * particular file as subject to the "Classpath" exception as provided
1051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * by Oracle in the LICENSE file that accompanied this code.
1151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This code is distributed in the hope that it will be useful, but WITHOUT
1351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * version 2 for more details (a copy is included in the LICENSE file that
1651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * accompanied this code).
1751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
1851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * You should have received a copy of the GNU General Public License version
1951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * 2 along with this work; if not, write to the Free Software Foundation,
2051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
2251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * or visit www.oracle.com if you need additional information or have any
2451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * questions.
2551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
2651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskipackage java.net;
2851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.IOException;
3051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.InvalidObjectException;
3151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.ObjectInputStream;
3251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.ObjectOutputStream;
3351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.io.Serializable;
3451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.ByteBuffer;
3551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.CharBuffer;
3651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CharsetDecoder;
3751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CoderResult;
3851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CodingErrorAction;
3951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.nio.charset.CharacterCodingException;
4051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.text.Normalizer;
4151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport sun.nio.cs.ThreadLocalCoders;
4251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.lang.Character;             // for javadoc
4451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskiimport java.lang.NullPointerException;  // for javadoc
4551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
4751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski/**
4851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Represents a Uniform Resource Identifier (URI) reference.
4951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
5051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> Aside from some minor deviations noted below, an instance of this
5151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * class represents a URI reference as defined by
5251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
5351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
5451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
5551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
5651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * also supports scope_ids. The syntax and usage of scope_ids is described
5751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <a href="Inet6Address.html#scoped">here</a>.
5851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This class provides constructors for creating URI instances from
5951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * their components or by parsing their string forms, methods for accessing the
6051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * various components of an instance, and methods for normalizing, resolving,
6151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * and relativizing URI instances.  Instances of this class are immutable.
6251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> URI syntax and components </h4>
6551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * At the highest level a URI reference (hereinafter simply "URI") in string
6751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * form has the syntax
6851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
6951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
7051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
7151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
7251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
7351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * where square brackets [...] delineate optional components and the characters
7451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
7551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
7651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
7751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * said to be <i>relative</i>.  URIs are also classified according to whether
7851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * they are <i>opaque</i> or <i>hierarchical</i>.
7951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
8151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * not begin with a slash character (<tt>'/'</tt>).  Opaque URIs are not
8251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * subject to further parsing.  Some examples of opaque URIs are:
8351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
8451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
8551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
8651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td><tt>news:comp.lang.java</tt><td></tr>
8751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
8851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
8951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
9051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> A <i>hierarchical</i> URI is either an absolute URI whose
9151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme-specific part begins with a slash character, or a relative URI, that
9251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * is, a URI that does not specify a scheme.  Some examples of hierarchical
9351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs are:
9451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
9551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
9651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com/j2se/1.3/</tt><br>
9751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>docs/guide/collections/designfaq.html#28</tt><br>
9851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt><br>
9951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>file:///~/calendar</tt>
10051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
10151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> A hierarchical URI is subject to further parsing according to the syntax
10351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
10551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
10651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
10751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
10851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
10951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves.  The
11051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme-specific part of a hierarchical URI consists of the characters
11151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * between the scheme and fragment components.
11251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
11351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The authority component of a hierarchical URI is, if specified, either
11451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
11551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * parses according to the familiar syntax
11651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
11751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
11851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
11951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
12051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
12151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
12251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * themselves.  Nearly all URI schemes currently in use are server-based.  An
12351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority component that does not parse in this way is considered to be
12451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * registry-based.
12551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
12651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The path component of a hierarchical URI is itself said to be absolute
12751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * if it begins with a slash character (<tt>'/'</tt>); otherwise it is
12851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * relative.  The path of a hierarchical URI that is either absolute or
12951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * specifies an authority is always absolute.
13051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
13151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> All told, then, a URI instance has the following nine components:
13251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
13351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
13451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
13551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>scheme</td><td><tt>String</tt></td></tr>
13651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>scheme-specific-part&nbsp;&nbsp;&nbsp;&nbsp;</td><td><tt>String</tt></td></tr>
13751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>authority</td><td><tt>String</tt></td></tr>
13851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>user-info</td><td><tt>String</tt></td></tr>
13951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>host</td><td><tt>String</tt></td></tr>
14051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>port</td><td><tt>int</tt></td></tr>
14151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>path</td><td><tt>String</tt></td></tr>
14251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>query</td><td><tt>String</tt></td></tr>
14351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tr><td>fragment</td><td><tt>String</tt></td></tr>
14451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
14551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
14651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * In a given instance any particular component is either <i>undefined</i> or
14751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>defined</i> with a distinct value.  Undefined string components are
14851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * represented by <tt>null</tt>, while undefined integer components are
14951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * represented by <tt>-1</tt>.  A string component may be defined to have the
15051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * empty string as its value; this is not equivalent to that component being
15151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * undefined.
15251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
15351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> Whether a particular component is or is not defined in an instance
15451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * depends upon the type of the URI being represented.  An absolute URI has a
15551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
15651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * possibly a fragment, but has no other components.  A hierarchical URI always
15751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * has a path (though it may be empty) and a scheme-specific-part (which at
15851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * least contains the path), and may have any of the other components.  If the
15951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority component is present and is server-based then the host component
16051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * will be defined and the user-information and port components may be defined.
16151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Operations on URI instances </h4>
16451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * The key operations supported by this class are those of
16651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
16751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
16851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
16951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * and <tt>".."</tt> segments from the path component of a hierarchical URI.
17051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Each <tt>"."</tt> segment is simply removed.  A <tt>".."</tt> segment is
17151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * removed only if it is preceded by a non-<tt>".."</tt> segment.
17251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Normalization has no effect upon opaque URIs.
17351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
17451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> <i>Resolution</i> is the process of resolving one URI against another,
17551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <i>base</i> URI.  The resulting URI is constructed from components of both
17651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs in the manner specified by RFC&nbsp;2396, taking components from the
17751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * base URI for those not specified in the original.  For hierarchical URIs,
17851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the path of the original is resolved against the path of the base and then
17951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * normalized.  The result, for example, of resolving
18051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
18151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
18251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>docs/guide/collections/designfaq.html#28&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt>(1)
18351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
18451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
18551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
18651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URI
18751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
18851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
18951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
19051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
19151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resolving the relative URI
19351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
19551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java&nbsp;&nbsp;&nbsp;&nbsp;</tt>(2)
19651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
19751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
19851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against this result yields, in turn,
19951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
20051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
20151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
20251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
20351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
20451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Resolution of both absolute and relative URIs, and of both absolute and
20551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * relative paths in the case of hierarchical URIs, is supported.  Resolving
20651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the URI <tt>file:///~calendar</tt> against any other URI simply yields the
20751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * original URI, since it is absolute.  Resolving the relative URI (2) above
20851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against the relative base URI (1) yields the normalized, but still relative,
20951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URI
21051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
21151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
21251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
21351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
21451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
21551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
21651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,
21751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
21851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
21951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>&nbsp;&nbsp;and<br>
22051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>&nbsp;&nbsp;.<br>
22151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
22251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * This operation is often useful when constructing a document containing URIs
22451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * that must be made relative to the base URI of the document wherever
22551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * possible.  For example, relativizing the URI
22651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
22751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
22851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
22951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
23051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * against the base URI
23251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
23451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com/j2se/1.3</tt>
23551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
23651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * yields the relative URI <tt>docs/guide/index.html</tt>.
23851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
23951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Character categories </h4>
24151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * RFC&nbsp;2396 specifies precisely which characters are permitted in the
24351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * various components of a URI reference.  The following categories, most of
24451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * which are taken from that specification, are used below to describe these
24551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * constraints:
24651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
24751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
24851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>alpha</i></th>
24951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The US-ASCII alphabetic characters,
25051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *        <tt>'A'</tt>&nbsp;through&nbsp;<tt>'Z'</tt>
25151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *        and <tt>'a'</tt>&nbsp;through&nbsp;<tt>'z'</tt></td></tr>
25251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>digit</i></th>
25351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The US-ASCII decimal digit characters,
25451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <tt>'0'</tt>&nbsp;through&nbsp;<tt>'9'</tt></td></tr>
25551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>alphanum</i></th>
25651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
25751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>unreserved</i>&nbsp;&nbsp;&nbsp;&nbsp;</th>
25851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>alphanum</i> characters together with those in the string
25951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *        <tt>"_-!.~'()*"</tt></td></tr>
26051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>punct</i></th>
26151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
26251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>reserved</i></th>
26351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>All <i>punct</i> characters together with those in the string
26451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *        <tt>"?/[]@"</tt></td></tr>
26551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>escaped</i></th>
26651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>Escaped octets, that is, triplets consisting of the percent
26751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           character (<tt>'%'</tt>) followed by two hexadecimal digits
26851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
26951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
27051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tr><th valign=top><i>other</i></th>
27151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *       <td>The Unicode characters that are not in the US-ASCII character set,
27251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           are not control characters (according to the {@link
27351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           java.lang.Character#isISOControl(char) Character.isISOControl}
27451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           method), and are not space characters (according to the {@link
27551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
27651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
27751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *           limited to US-ASCII)</i></td></tr>
27851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </table></blockquote>
27951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p><a name="legal-chars"></a> The set of all legal URI characters consists of
28151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
28251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * characters.
28351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Escaped octets, quotation, encoding, and decoding </h4>
28651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
28751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
28851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * fragment components.  Escaping serves two purposes in URIs:
28951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
29151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
29351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
29451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters.  </p></li>
29551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
29651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
29751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   component.  The user-info, path, query, and fragment components differ
29851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   slightly in terms of which characters are considered legal and illegal.
29951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   </p></li>
30051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
30251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * These purposes are served in this class by three related operations:
30451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
30651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
30751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
30851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   with the sequence of escaped octets that represent that character in the
30951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   UTF-8 character set.  The Euro currency symbol (<tt>'&#92;u20AC'</tt>),
31051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   for example, is encoded as <tt>"%E2%82%AC"</tt>.  <i>(<b>Deviation from
31151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   RFC&nbsp;2396</b>, which does not specify any particular character
31251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   set.)</i> </p></li>
31351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
31451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
31551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   encoding it.  The space character, for example, is quoted by replacing it
31651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   with <tt>"%20"</tt>.  UTF-8 contains US-ASCII, hence for US-ASCII
31751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters this transformation has exactly the effect required by
31851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   RFC&nbsp;2396. </p></li>
31951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
32051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p><a name="decode"></a>
32151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   A sequence of escaped octets is <i>decoded</i> by
32251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   replacing it with the sequence of characters that it represents in the
32351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
32451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   effect of de-quoting any quoted US-ASCII characters as well as that of
32551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   decoding any encoded non-US-ASCII characters.  If a <a
32651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
32751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   when decoding the escaped octets then the erroneous octets are replaced by
32851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <tt>'&#92;uFFFD'</tt>, the Unicode replacement character.  </p></li>
32951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
33151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * These operations are exposed in the constructors and methods of this class
33351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * as follows:
33451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <ul>
33651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
33751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #URI(java.lang.String) <code>single-argument
33851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   constructor</code>} requires any illegal characters in its argument to be
33951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   quoted and preserves any escaped octets and <i>other</i> characters that
34051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   are present.  </p></li>
34151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
34251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link
34351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
34451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <code>multi-argument constructors</code>} quote illegal characters as
34551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   required by the components in which they appear.  The percent character
34651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   (<tt>'%'</tt>) is always quoted by these constructors.  Any <i>other</i>
34751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters are preserved.  </p></li>
34851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
34951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
35051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
35151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
35251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
35351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   values of their corresponding components in raw form, without interpreting
35451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   any escaped octets.  The strings returned by these methods may contain
35551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   both escaped octets and <i>other</i> characters, and will not contain any
35651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   illegal characters.  </p></li>
35751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
35851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
35951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
36051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
36151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
36251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   octets in their corresponding components.  The strings returned by these
36351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   methods may contain both <i>other</i> characters and illegal characters,
36451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   and will not contain any escaped octets.  </p></li>
36551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
36651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #toString() toString} method returns a URI string with
36751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   all necessary quotation but which may contain <i>other</i> characters.
36851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   </p></li>
36951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
37151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   quoted and encoded URI string that does not contain any <i>other</i>
37251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *   characters.  </p></li>
37351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </ul>
37551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> Identities </h4>
37851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
37951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * For any URI <i>u</i>, it is always the case that
38051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
38151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
38251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt>&nbsp;.
38351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
38451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
38551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * For any URI <i>u</i> that does not contain redundant syntax such as two
38651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * slashes before an empty authority (as in <tt>file:///tmp/</tt>&nbsp;) or a
38751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * colon following a host name but no port (as in
38851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>http://java.sun.com:</tt>&nbsp;), and that does not encode characters
38951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * except those that must be quoted, the following identities also hold:
39051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
39151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
39251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
39351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
39451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
39551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * .equals(</tt><i>u</i><tt>)</tt>
39651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
39751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
39851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * in all cases,
39951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
40051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
40151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
40251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getUserInfo(),&nbsp;</tt><i>u</i><tt>.getAuthority(),<br>
40351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getPath(),&nbsp;</tt><i>u</i><tt>.getQuery(),<br>
40451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
40551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * .equals(</tt><i>u</i><tt>)</tt>
40651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
40751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
40851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * if <i>u</i> is hierarchical, and
40951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
41051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <blockquote>
41151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
41251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getUserInfo(),&nbsp;</tt><i>u</i><tt>.getHost(),&nbsp;</tt><i>u</i><tt>.getPort(),<br>
41351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getPath(),&nbsp;</tt><i>u</i><tt>.getQuery(),<br>
41451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
41551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * .equals(</tt><i>u</i><tt>)</tt>
41651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * </blockquote>
41751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
41851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * if <i>u</i> is hierarchical and has either no authority or a server-based
41951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * authority.
42051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
42151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
42251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <h4> URIs, URLs, and URNs </h4>
42351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
42451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
42551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
42651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * not every URI is a URL.  This is because there is another subcategory of
42751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
42851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * specify how to locate them.  The <tt>mailto</tt>, <tt>news</tt>, and
42951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <tt>isbn</tt> URIs shown above are examples of URNs.
43051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
43151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> The conceptual distinction between URIs and URLs is reflected in the
43251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * differences between this class and the {@link URL} class.
43351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
43451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An instance of this class represents a URI reference in the syntactic
43551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
43651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * A URI string is parsed according to the generic syntax without regard to the
43751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme, if any, that it specifies.  No lookup of the host, if any, is
43851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * performed, and no scheme-dependent stream handler is constructed.  Equality,
43951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * hashing, and comparison are defined strictly in terms of the character
44051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * content of the instance.  In other words, a URI instance is little more than
44151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * a structured string that supports the syntactic, scheme-independent
44251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * operations of comparison, normalization, resolution, and relativization.
44351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
44451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * <p> An instance of the {@link URL} class, by contrast, represents the
44551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * syntactic components of a URL together with some of the information required
44651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * to access the resource that it describes.  A URL must be absolute, that is,
44751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * it must always specify a scheme.  A URL string is parsed according to its
44851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * scheme.  A stream handler is always established for a URL, and in fact it is
44951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * impossible to create a URL instance for a scheme for which no handler is
45051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * available.  Equality and hashing depend upon both the scheme and the
45151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * Internet address of the host, if any; comparison is not defined.  In other
45251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * words, a URL is a structured string that supports the syntactic operation of
45351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * resolution as well as the network I/O operations of looking up the host and
45451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * opening a connection to the specified resource.
45551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
45651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
45751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * @author Mark Reinhold
45851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski * @since 1.4
45951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski *
4609e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2279.txt">RFC&nbsp;2279: UTF-8, a transformation format of ISO 10646</a>
4619e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373: IPv6 Addressing Architecture</a>
4629e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396: Uniform Resource Identifiers (URI): Generic Syntax</a>
4639e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath * @see <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732: Format for Literal IPv6 Addresses in URLs</a>
46451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski */
4659e78cee3f3edf84254174717f475605d712aad1cNarayan Kamath// Android changed: Reformat @see links.
46651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebskipublic final class URI
46751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    implements Comparable<URI>, Serializable
46851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski{
46951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Note: Comments containing the word "ASSERT" indicate places where a
47051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // throw of an InternalError should be replaced by an appropriate assertion
47151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // statement once asserts are enabled in the build.
47251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static final long serialVersionUID = -6052424284110960213L;
47451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Properties and components of this instance --
47751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
47851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
47951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String scheme;            // null ==> relative URI
48051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String fragment;
48151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
48251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Hierarchical URI components: [//<authority>]<path>[?<query>]
48351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String authority;         // Registry or server
48451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
48551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Server-based authority: [<userInfo>@]<host>[:<port>]
48651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String userInfo;
48751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String host;              // null ==> registry-based
48851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient int port = -1;            // -1 ==> undefined
48951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Remaining components of hierarchical URIs
49151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String path;              // null ==> opaque
49251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private transient String query;
49351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The remaining fields may be computed on demand
49551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String schemeSpecificPart;
49751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient int hash;        // Zero ==> undefined
49851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
49951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedUserInfo = null;
50051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedAuthority = null;
50151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedPath = null;
50251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedQuery = null;
50351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedFragment = null;
50451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile transient String decodedSchemeSpecificPart = null;
50551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
50651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
50751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The string form of this URI.
50851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
50951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @serial
51051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
51151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private volatile String string;             // The only serializable field
51251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Constructors and factories --
51651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private URI() { }                           // Used internally
51851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
51951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
52051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URI by parsing the given string.
52151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
52251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor parses the given string exactly as specified by the
52351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * grammar in <a
52451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
52551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
52651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
52751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ul type=disc>
52851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
52951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> An empty authority component is permitted as long as it is
53051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   followed by a non-empty path, a query component, or a fragment
53151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   component.  This allows the parsing of URIs such as
53251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <tt>"file:///foo/bar"</tt>, which seems to be the intent of
53351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   RFC&nbsp;2396 although the grammar does not permit it.  If the
53451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority component is empty then the user-information, host, and port
53551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   components are undefined. </p></li>
53651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
53751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Empty relative paths are permitted; this seems to be the
53851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
53951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   primary consequence of this deviation is that a standalone fragment
54051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   such as <tt>"#foo"</tt> parses as a relative URI with an empty path
54151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   and the given fragment, and can be usefully <a
54251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="#resolve-frag">resolved</a> against a base URI.
54351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
54451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> IPv4 addresses in host components are parsed rigorously, as
54551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   specified by <a
54651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
54751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   element of a dotted-quad address must contain no more than three
54851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   decimal digits.  Each element is further constrained to have a value
54951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   no greater than 255. </p></li>
55051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
55151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li> <p> Hostnames in host components that comprise only a single
55251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   domain label are permitted to start with an <i>alphanum</i>
55351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character. This seems to be the intent of <a
55451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
55551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   section&nbsp;3.2.2 although the grammar does not permit it. The
55651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   consequence of this deviation is that the authority component of a
55751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
55851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority. </p></li>
55951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
56051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
56151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   address must be enclosed in square brackets (<tt>'['</tt> and
56251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <tt>']'</tt>) as specified by <a
56351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
56451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   IPv6 address itself must parse according to <a
56551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
56651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   addresses are further constrained to describe no more than sixteen
56751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   bytes of address information, a constraint implicit in RFC&nbsp;2373
56851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   but not expressible in the grammar. </p></li>
56951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Characters in the <i>other</i> category are permitted wherever
57151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
57251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   user-information, path, query, and fragment components, as well as in
57351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the authority component if the authority is registry-based.  This
57451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   allows URIs to contain Unicode characters beyond those in the US-ASCII
57551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character set. </p></li>
57651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
57851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
57951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
58051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
58151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
58251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If <tt>str</tt> is <tt>null</tt>
58351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
58451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
58551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396, as augmented
58651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          by the above deviations
58751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
58851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String str) throws URISyntaxException {
58951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(str).parse(false);
59051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
59151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
59251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
59351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
59451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
59551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If a scheme is given then the path, if also given, must either be
59651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * empty or begin with a slash character (<tt>'/'</tt>).  Otherwise a
59751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component of the new URI may be left undefined by passing <tt>null</tt>
59851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the corresponding parameter or, in the case of the <tt>port</tt>
59951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * parameter, by passing <tt>-1</tt>.
60051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI string from the given components
60251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * according to the rules specified in <a
60351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
60451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7: </p>
60551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
60751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
60851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty. </p></li>
60951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
61051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
61151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   followed by a colon character (<tt>':'</tt>).  </p></li>
61251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
61351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If user information, a host, or a port are given then the
61451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   string <tt>"//"</tt> is appended.  </p></li>
61551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
61651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If user information is given then it is appended, followed by
61751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   a commercial-at character (<tt>'@'</tt>).  Any character not in the
61851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
61951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories is <a href="#quote">quoted</a>.  </p></li>
62051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
62151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a host is given then it is appended.  If the host is a
62251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   literal IPv6 address but is not enclosed in square brackets
62351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
62451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
62551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
62651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a port number is given then a colon character
62751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>':'</tt>) is appended, followed by the port number in decimal.
62851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
62951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
63051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a path is given then it is appended.  Any character not in
63151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
63251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories, and not equal to the slash character (<tt>'/'</tt>) or the
63351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   commercial-at character (<tt>'@'</tt>), is quoted.  </p></li>
63451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
63551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a query is given then a question-mark character
63651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'?'</tt>) is appended, followed by the query.  Any character that
63751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
63851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
63951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
64051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
64151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'#'</tt>) is appended, followed by the fragment.  Any character
64251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   that is not a legal URI character is quoted.  </p></li>
64351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
64451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
64551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
64651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed as if by invoking the {@link
64751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor and then invoking the {@link
64851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #parseServerAuthority()} method upon the result; this may cause a {@link
64951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URISyntaxException} to be thrown.  </p>
65051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
65151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
65251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   userInfo  User name and authorization information
65351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   host      Host name
65451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   port      Port number
65551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path      Path
65651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   query     Query
65751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
65851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
65951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws URISyntaxException
66051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         If both a scheme and a path are given but the path is relative,
66151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         if the URI string constructed from the given components violates
66251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         RFC&nbsp;2396, or if the authority component of the string is
66351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         present but cannot be parsed as a server-based authority
66451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
66551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme,
66651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String userInfo, String host, int port,
66751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String path, String query, String fragment)
66851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
66951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
67051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = toString(scheme, null,
67151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, userInfo, host, port,
67251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            path, query, fragment);
67351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        checkPath(s, scheme, path);
67451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(s).parse(true);
67551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
67651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
67751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
67851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
67951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
68051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If a scheme is given then the path, if also given, must either be
68151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * empty or begin with a slash character (<tt>'/'</tt>).  Otherwise a
68251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component of the new URI may be left undefined by passing <tt>null</tt>
68351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the corresponding parameter.
68451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
68551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI string from the given components
68651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * according to the rules specified in <a
68751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
68851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7: </p>
68951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
69051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
69151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
69251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty.  </p></li>
69351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
69451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
69551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   followed by a colon character (<tt>':'</tt>).  </p></li>
69651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
69751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If an authority is given then the string <tt>"//"</tt> is
69851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   appended, followed by the authority.  If the authority contains a
69951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   literal IPv6 address then the address must be enclosed in square
70051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   brackets (<tt>'['</tt> and <tt>']'</tt>).  Any character not in the
70151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
70251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories, and not equal to the commercial-at character
70351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'@'</tt>), is <a href="#quote">quoted</a>.  </p></li>
70451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
70551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a path is given then it is appended.  Any character not in
70651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
70751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   categories, and not equal to the slash character (<tt>'/'</tt>) or the
70851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   commercial-at character (<tt>'@'</tt>), is quoted.  </p></li>
70951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
71051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a query is given then a question-mark character
71151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'?'</tt>) is appended, followed by the query.  Any character that
71251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
71351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </p></li>
71451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
71551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
71651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'#'</tt>) is appended, followed by the fragment.  Any character
71751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   that is not a legal URI character is quoted.  </p></li>
71851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
71951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
72051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
72151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed as if by invoking the {@link
72251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor and then invoking the {@link
72351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #parseServerAuthority()} method upon the result; this may cause a {@link
72451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URISyntaxException} to be thrown.  </p>
72551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
72651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme     Scheme name
72751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   authority  Authority
72851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path       Path
72951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   query      Query
73051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment   Fragment
73151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
73251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws URISyntaxException
73351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         If both a scheme and a path are given but the path is relative,
73451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         if the URI string constructed from the given components violates
73551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         RFC&nbsp;2396, or if the authority component of the string is
73651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *         present but cannot be parsed as a server-based authority
73751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
73851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme,
73951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String authority,
74051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski               String path, String query, String fragment)
74151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
74251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
74351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = toString(scheme, null,
74451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            authority, null, null, -1,
74551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            path, query, fragment);
74651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        checkPath(s, scheme, path);
74751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(s).parse(false);
74851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
74951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
75051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
75151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a hierarchical URI from the given components.
75251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
75351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A component may be left undefined by passing <tt>null</tt>.
75451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
75551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience constructor works as if by invoking the
75651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * seven-argument constructor as follows:
75751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
75851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <blockquote><tt>
75951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * new&nbsp;{@link #URI(String, String, String, int, String, String, String)
76051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI}(scheme,&nbsp;null,&nbsp;host,&nbsp;-1,&nbsp;path,&nbsp;null,&nbsp;fragment);
76151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </tt></blockquote>
76251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
76351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
76451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   host      Host name
76551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   path      Path
76651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
76751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
76851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
76951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the URI string constructed from the given components
77051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          violates RFC&nbsp;2396
77151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
77251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme, String host, String path, String fragment)
77351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
77451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
77551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        this(scheme, null, host, -1, path, null, fragment);
77651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
77751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
77851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
77951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URI from the given components.
78051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A component may be left undefined by passing <tt>null</tt>.
78251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This constructor first builds a URI in string form using the given
78451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components as follows:  </p>
78551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
78751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
78851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Initially, the result string is empty.  </p></li>
78951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme is given then it is appended to the result,
79151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   followed by a colon character (<tt>':'</tt>).  </p></li>
79251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a scheme-specific part is given then it is appended.  Any
79451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   character that is not a <a href="#legal-chars">legal URI character</a>
79551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   is <a href="#quote">quoted</a>.  </p></li>
79651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
79751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, if a fragment is given then a hash character
79851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>'#'</tt>) is appended to the string, followed by the fragment.
79951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   Any character that is not a legal URI character is quoted.  </p></li>
80051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
80151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
80251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
80351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The resulting URI string is then parsed in order to create the new
80451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI instance as if by invoking the {@link #URI(String)} constructor;
80551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this may cause a {@link URISyntaxException} to be thrown.  </p>
80651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
80751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   scheme    Scheme name
80851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   ssp       Scheme-specific part
80951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   fragment  Fragment
81051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
81151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
81251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the URI string constructed from the given components
81351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          violates RFC&nbsp;2396
81451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
81551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI(String scheme, String ssp, String fragment)
81651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
81751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
81851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(toString(scheme, ssp,
81951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, null, null, -1,
82051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            null, null, fragment))
82151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .parse(false);
82251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
82351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
82451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
82551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Creates a URI by parsing the given string.
82651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
82751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience factory method works as if by invoking the {@link
82851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
82951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * constructor is caught and wrapped in a new {@link
83051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * IllegalArgumentException} object, which is then thrown.
83151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
83251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method is provided for use in situations where it is known that
83351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the given string is a legal URI, for example for URI constants declared
83451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * within in a program, and so it would be considered a programming error
83551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * for the string not to parse as such.  The constructors, which throw
83651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link URISyntaxException} directly, should be used situations where a
83751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI is being constructed from user input or from some other source that
83851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * may be prone to errors.  </p>
83951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
84051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
84151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The new URI
84251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
84351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
84451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If <tt>str</tt> is <tt>null</tt>
84551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
84651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
84751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396
84851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
84951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public static URI create(String str) {
85051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
85151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return new URI(str);
85251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (URISyntaxException x) {
85351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new IllegalArgumentException(x.getMessage(), x);
85451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
85551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
85651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
85751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
85851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Operations --
85951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
86051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
86151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Attempts to parse this URI's authority component, if defined, into
86251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.
86351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
86451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI's authority component has already been recognized as
86551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * being server-based then it will already have been parsed into
86651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.  In this case, or if this
86751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI has no authority component, this method simply returns this URI.
86851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
86951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> Otherwise this method attempts once more to parse the authority
87051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component into user-information, host, and port components, and throws
87151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * an exception describing why the authority component could not be parsed
87251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * in that way.
87351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
87451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method is provided because the generic URI syntax specified in
87551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
87651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * cannot always distinguish a malformed server-based authority from a
87751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * legitimate registry-based authority.  It must therefore treat some
87851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * instances of the former as instances of the latter.  The authority
87951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
88051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * legal server-based authority but it is legal as a registry-based
88151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * authority.
88251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
88351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> In many common situations, for example when working URIs that are
88451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * known to be either URNs or URLs, the hierarchical URIs being used will
88551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * always be server-based.  They therefore must either be parsed as such or
88651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * treated as an error.  In these cases a statement such as
88751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
88851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <blockquote>
88951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
89051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </blockquote>
89151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
89251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
89351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * it has an authority component, has a server-based authority with proper
89451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * user-information, host, and port components.  Invoking this method also
89551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * ensures that if the authority could not be parsed in that way then an
89651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * appropriate diagnostic message can be issued based upon the exception
89751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * that is thrown. </p>
89851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
89951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URI whose authority field has been parsed
90051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          as a server-based authority
90151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
90251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  URISyntaxException
90351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the authority component of this URI is defined
90451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          but cannot be parsed as a server-based authority
90551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          according to RFC&nbsp;2396
90651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
90751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI parseServerAuthority()
90851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
90951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
91051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We could be clever and cache the error message and index from the
91151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // exception thrown during the original parse, but that would require
91251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // either more fields or a more-obscure representation.
91351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((host != null) || (authority == null))
91451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return this;
91551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
91651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        new Parser(string).parse(true);
91751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return this;
91851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
91951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
92051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
92151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Normalizes this URI's path.
92251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
92351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI is opaque, or if its path is already in normal form,
92451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * then this URI is returned.  Otherwise a new URI is constructed that is
92551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * identical to this URI except that its path is computed by normalizing
92651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this URI's path in a manner consistent with <a
92751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
92851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
92951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </p>
93051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
93151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
93251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
93351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> All <tt>"."</tt> segments are removed. </p></li>
93451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
93551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
93651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   segment then both of these segments are removed.  This step is
93751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   repeated until it is no longer applicable. </p></li>
93851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
93951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If the path is relative, and if its first segment contains a
94051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
94151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   prepended.  This prevents a relative URI with a path such as
94251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
94351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
94451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
94551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
94651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
94751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
94851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A normalized path will begin with one or more <tt>".."</tt> segments
94951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * if there were insufficient non-<tt>".."</tt> segments preceding them to
95051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * allow their removal.  A normalized path will begin with a <tt>"."</tt>
95151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * segment if one was inserted by step 3 above.  Otherwise, a normalized
95251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
95351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
95451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URI equivalent to this URI,
95551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          but whose path is in normal form
95651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
95751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI normalize() {
95851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return normalize(this);
95951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
96051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
96151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
96251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Resolves the given URI against this URI.
96351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
96451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If the given URI is already absolute, or if this URI is opaque, then
96551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * the given URI is returned.
96651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
96751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p><a name="resolve-frag"></a> If the given URI's fragment component is
96851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * defined, its path component is empty, and its scheme, authority, and
96951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * query components are undefined, then a URI with the given fragment but
97051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * with all other components equal to those of this URI is returned.  This
97151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * allows a URI representing a standalone fragment reference, such as
97251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <tt>"#foo"</tt>, to be usefully resolved against a base URI.
97351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
97451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> Otherwise this method constructs a new hierarchical URI in a manner
97551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * consistent with <a
97651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
97751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2; that is: </p>
97851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
97951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
98051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A new URI is constructed with this URI's scheme and the given
98251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   URI's query and fragment components. </p></li>
98351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If the given URI has an authority component then the new URI's
98551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority and path are taken from the given URI. </p></li>
98651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
98751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Otherwise the new URI's authority component is copied from
98851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   this URI, and its path is computed as follows: </p>
98951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
99051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <ol type=a>
99151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
99251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If the given URI's path is absolute then the new URI's path
99351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     is taken from the given URI. </p></li>
99451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
99551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> Otherwise the given URI's path is relative, and so the new
99651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     URI's path is computed by resolving the path of the given URI
99751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     against the path of this URI.  This is done by concatenating all but
99851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     the last segment of this URI's path, if any, with the given URI's
99951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     path and then normalizing the result as if by invoking the {@link
100051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     #normalize() normalize} method. </p></li>
100151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </ol></li>
100351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
100551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The result of this method is absolute if, and only if, either this
100751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * URI is absolute or the given URI is absolute.  </p>
100851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
100951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  uri  The URI to be resolved against this URI
101051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
101151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
101251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
101351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If <tt>uri</tt> is <tt>null</tt>
101451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
101551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI resolve(URI uri) {
101651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return resolve(this, uri);
101751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
101851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
101951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
102051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a new URI by parsing the given string and then resolving it
102151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * against this URI.
102251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
102351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience method works as if invoking it were equivalent to
102451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * evaluating the expression <tt>{@link #resolve(java.net.URI)
102551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
102651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
102751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  str   The string to be parsed into a URI
102851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
102951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
103051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
103151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If <tt>str</tt> is <tt>null</tt>
103251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
103351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
103451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given string violates RFC&nbsp;2396
103551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
103651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI resolve(String str) {
103751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return resolve(URI.create(str));
103851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
103951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
104051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
104151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Relativizes the given URI against this URI.
104251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The relativization of the given URI against this URI is computed as
104451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * follows: </p>
104551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ol>
104751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
104851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> If either this URI or the given URI are opaque, or if the
104951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   scheme and authority components of the two URIs are not identical, or
105051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   if the path of this URI is not a prefix of the path of the given URI,
105151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   then the given URI is returned. </p></li>
105251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
105351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Otherwise a new relative hierarchical URI is constructed with
105451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   query and fragment components taken from the given URI and with a path
105551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   component computed by removing this URI's path from the beginning of
105651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the given URI's path. </p></li>
105751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
105851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ol>
105951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
106051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  uri  The URI to be relativized against this URI
106151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return The resulting URI
106251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
106351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  NullPointerException
106451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If <tt>uri</tt> is <tt>null</tt>
106551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
106651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URI relativize(URI uri) {
106751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return relativize(this, uri);
106851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
106951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
107051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
107151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Constructs a URL from this URI.
107251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This convenience method works as if invoking it were equivalent to
107451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * evaluating the expression <tt>new&nbsp;URL(this.toString())</tt> after
107551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * first checking that this URI is absolute. </p>
107651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A URL constructed from this URI
107851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
107951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  IllegalArgumentException
108051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If this URL is not absolute
108151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
108251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  MalformedURLException
108351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If a protocol handler for the URL could not be found,
108451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or if some other error occurred while constructing the URL
108551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
108651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public URL toURL()
108751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws MalformedURLException {
108851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!isAbsolute())
108951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new IllegalArgumentException("URI is not absolute");
109051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return new URL(toString());
109151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
109251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
109351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Component access methods --
109451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
109551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
109651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the scheme component of this URI.
109751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
109851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The scheme component of a URI, if defined, only contains characters
109951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>.  A
110051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * scheme always starts with an <i>alpha</i> character. <p>
110151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
110251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The scheme component of a URI cannot contain escaped octets, hence this
110351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method does not perform any decoding.
110451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
110551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The scheme component of this URI,
110651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the scheme is undefined
110751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
110851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getScheme() {
110951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return scheme;
111051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
111151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
111251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
111351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tells whether or not this URI is absolute.
111451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
111551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A URI is absolute if, and only if, it has a scheme component. </p>
111651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
111751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  <tt>true</tt> if, and only if, this URI is absolute
111851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
111951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean isAbsolute() {
112051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return scheme != null;
112151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
112251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
112351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
112451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tells whether or not this URI is opaque.
112551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
112651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> A URI is opaque if, and only if, it is absolute and its
112751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * scheme-specific part does not begin with a slash character ('/').
112851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * An opaque URI has a scheme, a scheme-specific part, and possibly
112951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * a fragment; all other components are undefined. </p>
113051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
113151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  <tt>true</tt> if, and only if, this URI is opaque
113251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
113351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean isOpaque() {
113451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return path == null;
113551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
113651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
113751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
113851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw scheme-specific part of this URI.  The scheme-specific
113951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * part is never undefined, though it may be empty.
114051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
114151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The scheme-specific part of a URI only contains legal URI
114251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
114351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
114451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw scheme-specific part of this URI
114551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          (never <tt>null</tt>)
114651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
114751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawSchemeSpecificPart() {
114851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineSchemeSpecificPart();
114951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return schemeSpecificPart;
115051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
115151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
115251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
115351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded scheme-specific part of this URI.
115451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
115551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
115651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
115751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * except that all sequences of escaped octets are <a
115851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="#decode">decoded</a>.  </p>
115951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
116051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded scheme-specific part of this URI
116151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          (never <tt>null</tt>)
116251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
116351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getSchemeSpecificPart() {
116451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (decodedSchemeSpecificPart == null)
116551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
116651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedSchemeSpecificPart;
116751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
116851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
116951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
117051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw authority component of this URI.
117151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
117251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The authority component of a URI, if defined, only contains the
117351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * commercial-at character (<tt>'@'</tt>) and characters in the
117451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
117551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * categories.  If the authority is server-based then it is further
117651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * constrained to have valid user-information, host, and port
117751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components. </p>
117851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
117951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw authority component of this URI,
118051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the authority is undefined
118151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
118251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawAuthority() {
118351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return authority;
118451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
118551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
118651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
118751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded authority component of this URI.
118851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
118951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
119051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawAuthority() getRawAuthority} method except that all
119151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
119251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
119351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded authority component of this URI,
119451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the authority is undefined
119551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
119651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getAuthority() {
119751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (decodedAuthority == null)
119851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedAuthority = decode(authority);
119951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedAuthority;
120051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
120151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
120251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
120351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw user-information component of this URI.
120451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
120551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The user-information component of a URI, if defined, only contains
120651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
120751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <i>other</i> categories. </p>
120851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
120951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw user-information component of this URI,
121051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the user information is undefined
121151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
121251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawUserInfo() {
121351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return userInfo;
121451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
121551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
121651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
121751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded user-information component of this URI.
121851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
121951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
122051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawUserInfo() getRawUserInfo} method except that all
122151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
122251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
122351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded user-information component of this URI,
122451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the user information is undefined
122551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
122651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getUserInfo() {
122751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedUserInfo == null) && (userInfo != null))
122851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedUserInfo = decode(userInfo);
122951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedUserInfo;
123051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
123151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
123251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
123351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the host component of this URI.
123451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
123551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The host component of a URI, if defined, will have one of the
123651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * following forms: </p>
123751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
123851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ul type=disc>
123951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
124051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A domain name consisting of one or more <i>labels</i>
124151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   separated by period characters (<tt>'.'</tt>), optionally followed by
124251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   a period character.  Each label consists of <i>alphanum</i> characters
124351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   as well as hyphen characters (<tt>'-'</tt>), though hyphens never
124451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   occur as the first or last characters in a label. The rightmost
124551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   label of a domain name consisting of two or more labels, begins
124651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   with an <i>alpha</i> character. </li>
124751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
124851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A dotted-quad IPv4 address of the form
124951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
125051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   where no <i>digit</i> sequence is longer than three characters and no
125151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   sequence has a value larger than 255. </p></li>
125251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
125351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
125451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
125551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   (<tt>':'</tt>), and possibly an embedded IPv4 address.  The full
125651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   syntax of IPv6 addresses is specified in <a
125751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
125851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   Addressing Architecture</i></a>.  </p></li>
125951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
126051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
126151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
126251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * The host component of a URI cannot contain escaped octets, hence this
126351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method does not perform any decoding.
126451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
126551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The host component of this URI,
126651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the host is undefined
126751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
126851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getHost() {
126951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return host;
127051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
127151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
127251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
127351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the port number of this URI.
127451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
127551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The port component of a URI, if defined, is a non-negative
127651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * integer. </p>
127751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
127851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The port component of this URI,
127951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>-1</tt> if the port is undefined
128051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
128151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int getPort() {
128251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return port;
128351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
128451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
128551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
128651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw path component of this URI.
128751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
128851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The path component of a URI, if defined, only contains the slash
128951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
129051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
129151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and <i>other</i> categories. </p>
129251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
129351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The path component of this URI,
129451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the path is undefined
129551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
129651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawPath() {
129751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return path;
129851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
129951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
130051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
130151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded path component of this URI.
130251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
130351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
130451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawPath() getRawPath} method except that all sequences of
130551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * escaped octets are <a href="#decode">decoded</a>.  </p>
130651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
130751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded path component of this URI,
130851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the path is undefined
130951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
131051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getPath() {
131151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedPath == null) && (path != null))
131251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedPath = decode(path);
131351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedPath;
131451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
131551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
131651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
131751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw query component of this URI.
131851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
131951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The query component of a URI, if defined, only contains legal URI
132051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
132151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
132251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw query component of this URI,
132351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the query is undefined
132451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
132551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawQuery() {
132651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return query;
132751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
132851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
132951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
133051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded query component of this URI.
133151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
133251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
133351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawQuery() getRawQuery} method except that all sequences of
133451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * escaped octets are <a href="#decode">decoded</a>.  </p>
133551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
133651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded query component of this URI,
133751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the query is undefined
133851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
133951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getQuery() {
134051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedQuery == null) && (query != null))
134151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedQuery = decode(query);
134251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedQuery;
134351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
134451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
134551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
134651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the raw fragment component of this URI.
134751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
134851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The fragment component of a URI, if defined, only contains legal URI
134951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * characters. </p>
135051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
135151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The raw fragment component of this URI,
135251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the fragment is undefined
135351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
135451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getRawFragment() {
135551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return fragment;
135651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
135751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
135851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
135951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the decoded fragment component of this URI.
136051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
136151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The string returned by this method is equal to that returned by the
136251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link #getRawFragment() getRawFragment} method except that all
136351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
136451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
136551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The decoded fragment component of this URI,
136651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          or <tt>null</tt> if the fragment is undefined
136751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
136851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String getFragment() {
136951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((decodedFragment == null) && (fragment != null))
137051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            decodedFragment = decode(fragment);
137151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return decodedFragment;
137251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
137351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
137451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
137551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Equality, comparison, hash code, toString, and serialization --
137651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
137751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
137851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Tests this URI for equality with another object.
137951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
138051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If the given object is not a URI then this method immediately
138151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * returns <tt>false</tt>.
138251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
138351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two URIs to be considered equal requires that either both are
138451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * opaque or both are hierarchical.  Their schemes must either both be
138551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * undefined or else be equal without regard to case. Their fragments
138651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * must either both be undefined or else be equal.
138751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
138851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two opaque URIs to be considered equal, their scheme-specific
138951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * parts must be equal.
139051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
139151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> For two hierarchical URIs to be considered equal, their paths must
139251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * be equal and their queries must either both be undefined or else be
139351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * equal.  Their authorities must either both be undefined, or both be
139451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * registry-based, or both be server-based.  If their authorities are
139551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * defined and are registry-based, then they must be equal.  If their
139651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * authorities are defined and are server-based, then their hosts must be
139751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * equal without regard to case, their port numbers must be equal, and
139851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * their user-information components must be equal.
139951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
140051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> When testing the user-information, path, query, fragment, authority,
140151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * or scheme-specific parts of two URIs for equality, the raw forms rather
140251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * than the encoded forms of these components are compared and the
140351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * hexadecimal digits of escaped octets are compared without regard to
140451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * case.
140551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
140651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method satisfies the general contract of the {@link
140751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * java.lang.Object#equals(Object) Object.equals} method. </p>
140851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
140951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   ob   The object to which this object is to be compared
141051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
141151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  <tt>true</tt> if, and only if, the given object is a URI that
141251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          is identical to this URI
141351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
141451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public boolean equals(Object ob) {
141551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (ob == this)
141651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
141751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!(ob instanceof URI))
141851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
141951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI that = (URI)ob;
142051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque() != that.isOpaque()) return false;
142151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
142251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.fragment, that.fragment)) return false;
142351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
142451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Opaque
142551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque())
142651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return equal(this.schemeSpecificPart, that.schemeSpecificPart);
142751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
142851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Hierarchical
142951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.path, that.path)) return false;
143051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equal(this.query, that.query)) return false;
143151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
143251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Authorities
143351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.authority == that.authority) return true;
143451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.host != null) {
143551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Server-based
143651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equal(this.userInfo, that.userInfo)) return false;
143751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equalIgnoringCase(this.host, that.host)) return false;
143851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (this.port != that.port) return false;
143951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (this.authority != null) {
144051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Registry-based
144151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!equal(this.authority, that.authority)) return false;
144251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (this.authority != that.authority) {
144351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
144451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
144551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
144651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return true;
144751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
144851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
144951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
145051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns a hash-code value for this URI.  The hash code is based upon all
145151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * of the URI's components, and satisfies the general contract of the
145251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * {@link java.lang.Object#hashCode() Object.hashCode} method.
145351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
145451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A hash-code value for this URI
145551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
145651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int hashCode() {
145751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (hash != 0)
145851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return hash;
145951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int h = hashIgnoringCase(0, scheme);
146051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        h = hash(h, fragment);
146151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (isOpaque()) {
146251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, schemeSpecificPart);
146351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
146451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, path);
146551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = hash(h, query);
146651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (host != null) {
146751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hash(h, userInfo);
146851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hashIgnoringCase(h, host);
146951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h += 1949 * port;
147051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
147151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                h = hash(h, authority);
147251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
147351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
147451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        hash = h;
147551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return h;
147651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
147751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
147851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
147951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Compares this URI to another object, which must be a URI.
148051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
148151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> When comparing corresponding components of two URIs, if one
148251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * component is undefined but the other is defined then the first is
148351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * considered to be less than the second.  Unless otherwise noted, string
148451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components are ordered according to their natural, case-sensitive
148551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * ordering as defined by the {@link java.lang.String#compareTo(Object)
148651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * String.compareTo} method.  String components that are subject to
148751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * encoding are compared by comparing their raw forms rather than their
148851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * encoded forms.
148951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The ordering of URIs is defined as follows: </p>
149151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <ul type=disc>
149351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two URIs with different schemes are ordered according the
149551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   ordering of their schemes, without regard to case. </p></li>
149651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
149751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> A hierarchical URI is considered to be less than an opaque URI
149851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   with an identical scheme. </p></li>
149951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two opaque URIs with identical schemes are ordered according
150151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   to the ordering of their scheme-specific parts. </p></li>
150251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two opaque URIs with identical schemes and scheme-specific
150451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   parts are ordered according to the ordering of their
150551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   fragments. </p></li>
150651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
150751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Two hierarchical URIs with identical schemes are ordered
150851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   according to the ordering of their authority components: </p>
150951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <ul type=disc>
151151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If both authority components are server-based then the URIs
151351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     are ordered according to their user-information components; if these
151451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     components are identical then the URIs are ordered according to the
151551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     ordering of their hosts, without regard to case; if the hosts are
151651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     identical then the URIs are ordered according to the ordering of
151751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     their ports. </p></li>
151851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
151951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     <li><p> If one or both authority components are registry-based then
152051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     the URIs are ordered according to the ordering of their authority
152151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *     components. </p></li>
152251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
152351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   </ul></li>
152451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
152551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   <li><p> Finally, two hierarchical URIs with identical schemes and
152651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   authority components are ordered according to the ordering of their
152751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   paths; if their paths are identical then they are ordered according to
152851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   the ordering of their queries; if the queries are identical then they
152951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *   are ordered according to the order of their fragments. </p></li>
153051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * </ul>
153251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> This method satisfies the general contract of the {@link
153451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
153551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method. </p>
153651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
153751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param   that
153851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          The object to which this URI is to be compared
153951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
154051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  A negative integer, zero, or a positive integer as this URI is
154151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          less than, equal to, or greater than the given URI
154251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
154351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @throws  ClassCastException
154451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          If the given object is not a URI
154551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
154651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public int compareTo(URI that) {
154751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int c;
154851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
154951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
155051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c;
155151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
155251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (this.isOpaque()) {
155351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (that.isOpaque()) {
155451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Both opaque
155551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((c = compare(this.schemeSpecificPart,
155651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 that.schemeSpecificPart)) != 0)
155751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return c;
155851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return compare(this.fragment, that.fragment);
155951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
156051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return +1;                  // Opaque > hierarchical
156151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (that.isOpaque()) {
156251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;                  // Hierarchical < opaque
156351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
156451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
156551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Hierarchical
156651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((this.host != null) && (that.host != null)) {
156751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Both server-based
156851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compare(this.userInfo, that.userInfo)) != 0)
156951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
157051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compareIgnoringCase(this.host, that.host)) != 0)
157151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
157251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = this.port - that.port) != 0)
157351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return c;
157451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
157551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // If one or both authorities are registry-based then we simply
157651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // compare them in the usual, case-sensitive way.  If one is
157751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // registry-based and one is server-based then the strings are
157851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // guaranteed to be unequal, hence the comparison will never return
157951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // zero and the compareTo and equals methods will remain
158051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // consistent.
158151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c = compare(this.authority, that.authority)) != 0) return c;
158251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
158351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
158451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compare(this.path, that.path)) != 0) return c;
158551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c = compare(this.query, that.query)) != 0) return c;
158651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return compare(this.fragment, that.fragment);
158751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
158851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
158951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
159051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the content of this URI as a string.
159151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
159251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI was created by invoking one of the constructors in this
159351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * class then a string equivalent to the original input string, or to the
159451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * string computed from the originally-given components, as appropriate, is
159551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * returned.  Otherwise this URI was created by normalization, resolution,
159651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * or relativization, and so a string is constructed from this URI's
159751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * components according to the rules specified in <a
159851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
159951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * section&nbsp;5.2, step&nbsp;7. </p>
160051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
160151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The string form of this URI
160251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
160351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String toString() {
160451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
160551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return string;
160651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
160751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
160851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
160951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Returns the content of this URI as a US-ASCII string.
161051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
161151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> If this URI does not contain any characters in the <i>other</i>
161251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * category then an invocation of this method will return the same value as
161351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * an invocation of the {@link #toString() toString} method.  Otherwise
161451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * this method works as if by invoking that method and then <a
161551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * href="#encode">encoding</a> the result.  </p>
161651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
161751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @return  The string form of this URI, encoded as needed
161851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          so that it only contains characters in the US-ASCII
161951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *          charset
162051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
162151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    public String toASCIIString() {
162251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
162351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return encode(string);
162451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
162551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Serialization support --
162851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
162951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
163051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Saves the content of this URI to the given serial stream.
163151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
163251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The only serializable field of a URI instance is its <tt>string</tt>
163351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * field.  That field is given a value, if it does not have one already,
163451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
163551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * method of the given object-output stream is invoked. </p>
163651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
163751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  os  The object-output stream to which this object
163851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *             is to be written
163951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
164051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void writeObject(ObjectOutputStream os)
164151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws IOException
164251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
164351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        defineString();
164451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        os.defaultWriteObject();        // Writes the string field only
164551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
164651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
164751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    /**
164851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * Reconstitutes a URI from the given serial stream.
164951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
165051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
165151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * invoked to read the value of the <tt>string</tt> field.  The result is
165251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * then parsed in the usual way.
165351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *
165451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     * @param  is  The object-input stream from which this object
165551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     *             is being read
165651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski     */
165751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void readObject(ObjectInputStream is)
165851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws ClassNotFoundException, IOException
165951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
166051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        port = -1;                      // Argh
166151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        is.defaultReadObject();
166251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
166351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            new Parser(string).parse(false);
166451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (URISyntaxException x) {
166551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            IOException y = new InvalidObjectException("Invalid URI");
166651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            y.initCause(x);
166751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw y;
166851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
166951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
167051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- End of public methods --
167351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Utility methods for string-field comparison and hashing --
167651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
167751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // These methods return appropriate values for null string arguments,
167851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // thereby simplifying the equals, hashCode, and compareTo methods.
167951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
168051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The case-ignoring methods should only be applied to strings whose
168151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // characters are all known to be US-ASCII.  Because of this restriction,
168251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // these methods are faster than the similar methods in the String class.
168351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
168451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
168551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int toLower(char c) {
168651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'A') && (c <= 'Z'))
168751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c + ('a' - 'A');
168851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return c;
168951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
169051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
169151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean equal(String s, String t) {
169251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return true;
169351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((s != null) && (t != null)) {
169451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.length() != t.length())
169551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
169651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.indexOf('%') < 0)
169751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s.equals(t);
169851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = s.length();
169951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (int i = 0; i < n;) {
170051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = s.charAt(i);
170151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char d = t.charAt(i);
170251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (c != '%') {
170351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (c != d)
170451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        return false;
170551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    i++;
170651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
170751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
170851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (d != '%')
170951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
171251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
171551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
171651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
171751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
171851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
171951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
172051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
172151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
172251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
172351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
172451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean equalIgnoringCase(String s, String t) {
172551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return true;
172651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((s != null) && (t != null)) {
172751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = s.length();
172851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t.length() != n)
172951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
173051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (int i = 0; i < n; i++) {
173151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
173251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return false;
173351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
173451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return true;
173551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
173651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
173751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
173851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
173951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int hash(int hash, String s) {
174051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null) return hash;
174151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return hash * 127 + s.hashCode();
174251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
174351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
174451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
174551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int hashIgnoringCase(int hash, String s) {
174651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null) return hash;
174751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int h = hash;
174851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
174951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++)
175051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            h = 31 * h + toLower(s.charAt(i));
175151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return h;
175251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
175351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
175451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int compare(String s, String t) {
175551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return 0;
175651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s != null) {
175751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t != null)
175851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s.compareTo(t);
175951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
176051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return +1;
176151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
176251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
176351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
176451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
176551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
176651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // US-ASCII only
176751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int compareIgnoringCase(String s, String t) {
176851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == t) return 0;
176951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s != null) {
177051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (t != null) {
177151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int sn = s.length();
177251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int tn = t.length();
177351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int n = sn < tn ? sn : tn;
177451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                for (int i = 0; i < n; i++) {
177551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
177651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (c != 0)
177751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        return c;
177851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
177951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return sn - tn;
178051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
178151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return +1;
178251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
178351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
178451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
178551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
178651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
178751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
178851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- String construction --
178951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
179051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If a scheme is given then the path, if given, must be absolute
179151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
179251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void checkPath(String s, String scheme, String path)
179351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        throws URISyntaxException
179451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
179551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
179651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((path != null)
179751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && ((path.length() > 0) && (path.charAt(0) != '/')))
179851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                throw new URISyntaxException(s,
179951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                             "Relative path in absolute URI");
180051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
180151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
180251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
180351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendAuthority(StringBuffer sb,
180451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String authority,
180551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String userInfo,
180651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 String host,
180751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 int port)
180851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
180951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (host != null) {
181051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append("//");
181151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (userInfo != null) {
181251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
181351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('@');
181451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
181551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean needBrackets = ((host.indexOf(':') >= 0)
181651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.startsWith("[")
181751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.endsWith("]"));
181851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (needBrackets) sb.append('[');
181951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(host);
182051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (needBrackets) sb.append(']');
182151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (port != -1) {
182251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(':');
182351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(port);
182451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
182551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else if (authority != null) {
182651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append("//");
182751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (authority.startsWith("[")) {
182851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // authority should (but may not) contain an embedded IPv6 address
182951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int end = authority.indexOf("]");
183051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                String doquote = authority, dontquote = "";
183151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (end != -1 && authority.indexOf(":") != -1) {
183251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // the authority contains an IPv6 address
183351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (end == authority.length()) {
183451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = authority;
183551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = "";
183651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
183751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = authority.substring(0 , end + 1);
183851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = authority.substring(end + 1);
183951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
184051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
184151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(dontquote);
184251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(doquote,
184351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            L_REG_NAME | L_SERVER,
184451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            H_REG_NAME | H_SERVER));
184551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
184651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(authority,
184751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            L_REG_NAME | L_SERVER,
184851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            H_REG_NAME | H_SERVER));
184951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
185051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
185151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
185251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
185351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendSchemeSpecificPart(StringBuffer sb,
185451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String opaquePart,
185551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String authority,
185651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String userInfo,
185751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String host,
185851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          int port,
185951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String path,
186051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                          String query)
186151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
186251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (opaquePart != null) {
186351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            /* check if SSP begins with an IPv6 address
186451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski             * because we must not quote a literal IPv6 address
186551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski             */
186651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (opaquePart.startsWith("//[")) {
186751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int end =  opaquePart.indexOf("]");
186851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (end != -1 && opaquePart.indexOf(":")!=-1) {
186951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    String doquote, dontquote;
187051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (end == opaquePart.length()) {
187151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = opaquePart;
187251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = "";
187351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
187451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dontquote = opaquePart.substring(0,end+1);
187551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        doquote = opaquePart.substring(end+1);
187651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
187751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append (dontquote);
187851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(quote(doquote, L_URIC, H_URIC));
187951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
188051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
188151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(opaquePart, L_URIC, H_URIC));
188251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
188351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
188451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            appendAuthority(sb, authority, userInfo, host, port);
188551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path != null)
188651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(path, L_PATH, H_PATH));
188751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (query != null) {
188851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('?');
188951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(quote(query, L_URIC, H_URIC));
189051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
189151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
189251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
189351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
189451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void appendFragment(StringBuffer sb, String fragment) {
189551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (fragment != null) {
189651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append('#');
189751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(quote(fragment, L_URIC, H_URIC));
189851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
189951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
190051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
190151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private String toString(String scheme,
190251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String opaquePart,
190351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String authority,
190451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String userInfo,
190551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String host,
190651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            int port,
190751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String path,
190851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String query,
190951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            String fragment)
191051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
191151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
191251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
191351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(scheme);
191451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(':');
191551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
191651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendSchemeSpecificPart(sb, opaquePart,
191751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 authority, userInfo, host, port,
191851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 path, query);
191951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendFragment(sb, fragment);
192051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
192151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
192251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
192351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void defineSchemeSpecificPart() {
192451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (schemeSpecificPart != null) return;
192551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
192651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
192751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                 host, port, getPath(), getQuery());
192851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (sb.length() == 0) return;
192951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        schemeSpecificPart = sb.toString();
193051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
193151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
193251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private void defineString() {
193351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (string != null) return;
193451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
193551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
193651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (scheme != null) {
193751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(scheme);
193851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(':');
193951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
194051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (isOpaque()) {
194151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(schemeSpecificPart);
194251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
194351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (host != null) {
194451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append("//");
194551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (userInfo != null) {
194651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(userInfo);
194751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append('@');
194851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
194951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                boolean needBrackets = ((host.indexOf(':') >= 0)
195051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.startsWith("[")
195151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                    && !host.endsWith("]"));
195251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (needBrackets) sb.append('[');
195351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(host);
195451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (needBrackets) sb.append(']');
195551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (port != -1) {
195651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(':');
195751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(port);
195851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
195951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (authority != null) {
196051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append("//");
196151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(authority);
196251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
196351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path != null)
196451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(path);
196551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (query != null) {
196651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append('?');
196751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(query);
196851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
196951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
197051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (fragment != null) {
197151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append('#');
197251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(fragment);
197351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
197451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        string = sb.toString();
197551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
197651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
197751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
197851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Normalization, resolution, and relativization --
197951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
198051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 5.2 (6)
198151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String resolvePath(String base, String child,
198251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                      boolean absolute)
198351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    {
198451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int i = base.lastIndexOf('/');
198551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int cn = child.length();
198651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String path = "";
198751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
198851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (cn == 0) {
198951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6a)
199051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (i >= 0)
199151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                path = base.substring(0, i + 1);
199251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
199351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            StringBuffer sb = new StringBuffer(base.length() + cn);
199451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6a)
199551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (i >= 0)
199651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(base.substring(0, i + 1));
199751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // 5.2 (6b)
199851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(child);
199951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path = sb.toString();
200051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
200151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
200251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (6c-f)
20039af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        String np = normalize(path, true);
200451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
200551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (6g): If the result is absolute but the path begins with "../",
200651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // then we simply leave the path as-is
200751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
200851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return np;
200951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
201051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
201151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 5.2
201251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI resolve(URI base, URI child) {
201351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // check if child if opaque first so that NPE is thrown
201451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // if child is null.
201551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.isOpaque() || base.isOpaque())
201651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
201751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
201851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (2): Reference to current document (lone fragment)
201951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((child.scheme == null) && (child.authority == null)
202051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            && child.path.equals("") && (child.fragment != null)
202151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            && (child.query == null)) {
202251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((base.fragment != null)
202351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && child.fragment.equals(base.fragment)) {
202451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return base;
202551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
202651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            URI ru = new URI();
202751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.scheme = base.scheme;
202851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = base.authority;
202951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = base.userInfo;
203051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = base.host;
203151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = base.port;
203251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.path = base.path;
203351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.fragment = child.fragment;
203451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.query = base.query;
203551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ru;
203651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
203751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
203851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (3): Child is absolute
203951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.scheme != null)
204051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
204151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
204251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI ru = new URI();             // Resolved URI
204351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.scheme = base.scheme;
204451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.query = child.query;
204551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ru.fragment = child.fragment;
204651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
204751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (4): Authority
204851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.authority == null) {
204951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = base.authority;
205051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = base.host;
205151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = base.userInfo;
205251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = base.port;
205351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
2054bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong            if (child.path == null || child.path.isEmpty()) {
2055bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // This is an addtional path from RFC 3986 RI, which fixes following RFC 2396
2056bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // "normal" examples:
2057bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // Base: http://a/b/c/d;p?q
2058bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //   "?y" = "http://a/b/c/d;p?y"
2059bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //   ""   = "http://a/b/c/d;p?q"
2060bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // http://b/25897693
2061bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.path = base.path;
2062bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.query = child.query != null ? child.query : base.query;
2063bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong            } else if ((child.path.length() > 0) && (child.path.charAt(0) == '/')) {
206451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // 5.2 (5): Child path is absolute
2065bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                //
2066bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // There is an additional step from RFC 3986 RI, requiring to remove dots for
2067bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // absolute path as well.
2068bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // http://b/25897693
20699af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong                ru.path = normalize(child.path, true);
207051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
207151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // 5.2 (6): Resolve relative path
2072bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                ru.path = resolvePath(base.path, child.path, base.isAbsolute());
207351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
207451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } else {
207551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.authority = child.authority;
207651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = child.host;
207751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.userInfo = child.userInfo;
207851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.host = child.host;
207951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.port = child.port;
208051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ru.path = child.path;
208151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
208251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
208351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // 5.2 (7): Recombine (nothing to do here)
208451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return ru;
208551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
208651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
208751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If the given URI's path is normal then return the URI;
208851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // o.w., return a new URI containing the normalized path.
208951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
209051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI normalize(URI u) {
209151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
209251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return u;
209351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
209451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String np = normalize(u.path);
209551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (np == u.path)
209651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return u;
209751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
209851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI v = new URI();
209951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.scheme = u.scheme;
210051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.fragment = u.fragment;
210151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.authority = u.authority;
210251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.userInfo = u.userInfo;
210351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.host = u.host;
210451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.port = u.port;
210551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.path = np;
210651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.query = u.query;
210751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return v;
210851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
210951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
211051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // If both URIs are hierarchical, their scheme and authority components are
211151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // identical, and the base path is a prefix of the child's path, then
211251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // return a relative URI that, when resolved against the base, yields the
211351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // child; otherwise, return the child.
211451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
211551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static URI relativize(URI base, URI child) {
211651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // check if child if opaque first so that NPE is thrown
211751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // if child is null.
211851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (child.isOpaque() || base.isOpaque())
211951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
212051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!equalIgnoringCase(base.scheme, child.scheme)
212151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            || !equal(base.authority, child.authority))
212251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return child;
212351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
212451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String bp = normalize(base.path);
212551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String cp = normalize(child.path);
212651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (!bp.equals(cp)) {
212731651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // Android-changed: The original OpenJdk implementation would append a trailing slash
212831651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // to paths like "/a/b" before relativizing them. This would relativize /a/b/c to
212931651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // "/c" against "/a/b" the android implementation did not do this. It would assume that
213031651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // "b" wasn't a directory and relativize the path to "/b/c". The spec is pretty vague
213131651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // about this but this change is being made because we have several tests that expect
213231651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            // this behaviour.
213331651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            if (bp.indexOf('/') != -1) {
213431651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath                bp = bp.substring(0, bp.lastIndexOf('/') + 1);
213531651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath            }
213631651e7a39b8c79cacebda5439d25715a02aa7cfNarayan Kamath
213751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!cp.startsWith(bp))
213851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return child;
213951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
214051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
214151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        URI v = new URI();
214251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.path = cp.substring(bp.length());
214351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.query = child.query;
214451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        v.fragment = child.fragment;
214551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return v;
214651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
214751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
214851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
214951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
215051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Path normalization --
215151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
215251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The following algorithm for path normalization avoids the creation of a
215351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // string object for each segment, as well as the use of a string buffer to
215451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // compute the final result, by using a single char array and editing it in
215551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // place.  The array is first split into segments, replacing each slash
215651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // with '\0' and creating a segment-index array, each element of which is
215751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // the index of the first char in the corresponding segment.  We then walk
215851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // through both arrays, removing ".", "..", and other segments as necessary
215951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // by setting their entries in the index array to -1.  Finally, the two
216051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // arrays are used to rejoin the segments and compute the final result.
216151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
216251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This code is based upon src/solaris/native/java/io/canonicalize_md.c
216351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
216451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
216551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Check the given path to see if it might need normalization.  A path
216651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // might need normalization if it contains duplicate slashes, a "."
216751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segment, or a ".." segment.  Return -1 if no further normalization is
216851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // possible, otherwise return the number of segments found.
216951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
217051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This method takes a string argument rather than a char array so that
217151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // this test can be performed without invoking path.toCharArray().
217251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
217351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private int needsNormalization(String path) {
217451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean normal = true;
217551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = 0;                     // Number of segments
217651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length() - 1;    // Index of last char in path
217751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next char in path
217851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
217951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Skip initial slashes
218051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
218151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path.charAt(p) != '/') break;
218251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p++;
218351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
218451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (p > 1) normal = false;
218551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
218651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan segments
218751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
218851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
218951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Looking at "." or ".." ?
219051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((path.charAt(p) == '.')
219151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                && ((p == end)
219251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    || ((path.charAt(p + 1) == '/')
219351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        || ((path.charAt(p + 1) == '.')
219451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            && ((p + 1 == end)
219551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                || (path.charAt(p + 2) == '/')))))) {
219651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                normal = false;
219751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
219851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ns++;
219951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
220051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find beginning of next segment
220151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p <= end) {
220251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path.charAt(p++) != '/')
220351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
220451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
220551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Skip redundant slashes
220651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while (p <= end) {
220751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (path.charAt(p) != '/') break;
220851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    normal = false;
220951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
221051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
221151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
221251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
221351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
221451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
221551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
221651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return normal ? -1 : ns;
221751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
221851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
221951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
222051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Split the given path into segments, replacing slashes with nulls and
222151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // filling in the given segment-index array.
222251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
222351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Preconditions:
222451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs.length == Number of segments in path
222551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
222651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Postconditions:
222751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   All slashes in path replaced by '\0'
222851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
222951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
223051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private void split(char[] path, int[] segs) {
223151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;      // Index of last char in path
223251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next char in path
223351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int i = 0;                      // Index of current segment
223451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
223551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Skip initial slashes
223651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
223751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (path[p] != '/') break;
223851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path[p] = '\0';
223951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p++;
224051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
224151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
224251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (p <= end) {
224351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
224451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Note start of segment
224551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            segs[i++] = p++;
224651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
224751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find beginning of next segment
224851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p <= end) {
224951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path[p++] != '/')
225051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
225151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                path[p - 1] = '\0';
225251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
225351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Skip redundant slashes
225451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while (p <= end) {
225551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (path[p] != '/') break;
225651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '\0';
225751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
225851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
225951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
226051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
226151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (i != segs.length)
226351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new InternalError();  // ASSERT
226451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
226551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
226751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Join the segments in the given path according to the given segment-index
226851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // array, ignoring those segments whose index entries have been set to -1,
226951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // and inserting slashes as needed.  Return the length of the resulting
227051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // path.
227151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
227251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Preconditions:
227351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   segs[i] == -1 implies segment i is to be ignored
227451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   path computed by split, as above, with '\0' having replaced '/'
227551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
227651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Postconditions:
227751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //   path[0] .. path[return value] == Resulting path
227851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
227951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    static private int join(char[] path, int[] segs) {
228051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;           // Number of segments
228151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;      // Index of last char in path
228251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = 0;                      // Index of next path char to write
228351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
228451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (path[p] == '\0') {
228551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Restore initial slash for absolute paths
228651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path[p++] = '/';
228751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
228851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
228951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < ns; i++) {
229051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = segs[i];            // Current segment
229151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q == -1)
229251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Ignore this segment
229351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                continue;
229451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
229551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p == q) {
229651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // We're already at this segment, so just skip to its end
229751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while ((p <= end) && (path[p] != '\0'))
229851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
229951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (p <= end) {
230051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Preserve trailing slash
230151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '/';
230251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
230351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (p < q) {
230451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Copy q down to p
230551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                while ((q <= end) && (path[q] != '\0'))
230651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = path[q++];
230751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= end) {
230851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Preserve trailing slash
230951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    path[p++] = '/';
231051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
231151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else
231251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                throw new InternalError(); // ASSERT false
231351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
231451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
231551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return p;
231651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
231751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
231851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
231951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Remove "." segments from the given path, and remove segment pairs
232051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // consisting of a non-".." segment followed by a ".." segment.
232151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
23229af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    private static void removeDots(char[] path, int[] segs, boolean removeLeading) {
232351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;
232451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int end = path.length - 1;
232551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
232651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < ns; i++) {
232751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int dots = 0;               // Number of dots found (0, 1, or 2)
232851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
232951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Find next occurrence of "." or ".."
233051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            do {
233151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int p = segs[i];
233251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (path[p] == '.') {
233351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (p == end) {
233451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 1;
233551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
233651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else if (path[p + 1] == '\0') {
233751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 1;
233851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
233951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else if ((path[p + 1] == '.')
234051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               && ((p + 1 == end)
234151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                   || (path[p + 2] == '\0'))) {
234251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        dots = 2;
234351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        break;
234451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
234551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
234651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
234751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } while (i < ns);
234851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((i > ns) || (dots == 0))
234951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
235051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
235151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (dots == 1) {
235251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Remove this occurrence of "."
235351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                segs[i] = -1;
235451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
235551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // If there is a preceding non-".." segment, remove both that
2356bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                // segment and this occurrence of ".."
235751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int j;
235851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                for (j = i - 1; j >= 0; j--) {
235951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (segs[j] != -1) break;
236051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
236151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (j >= 0) {
236251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = segs[j];
236351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (!((path[q] == '.')
236451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                          && (path[q + 1] == '.')
236551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                          && (path[q + 2] == '\0'))) {
236651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        segs[i] = -1;
236751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        segs[j] = -1;
236851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
23699af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong                } else if (removeLeading) {
2370bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // This is a leading ".." segment. Per RFC 3986 RI, this should be removed as
2371bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // well. This fixes RFC 2396 "abnormal" examples.
2372bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    // http://b/25897693
2373bd3fc1206b075a1eada8f6229d68cdb114a5799aYi Kong                    segs[i] = -1;
237451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
237551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
237651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
237751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
237851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
237951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
238051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // DEVIATION: If the normalized path is relative, and if the first
238151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segment could be parsed as a scheme name, then prepend a "." segment
238251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
238351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void maybeAddLeadingDot(char[] path, int[] segs) {
238451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
238551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (path[0] == '\0')
238651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // The path is absolute
238751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
238851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
238951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = segs.length;
239051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = 0;                      // Index of first segment
239151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (f < ns) {
239251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (segs[f] >= 0)
239351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
239451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            f++;
239551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
239651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((f >= ns) || (f == 0))
239751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // The path is empty, or else the original first segment survived,
239851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // in which case we already know that no leading "." is needed
239951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
240051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
240151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int p = segs[f];
240251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
240351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (p >= path.length || path[p] == '\0')
240451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // No colon in first segment, so no "." needed
240551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return;
240651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
240751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // At this point we know that the first segment is unused,
240851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // hence we can insert a "." segment at that position
240951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        path[0] = '.';
241051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        path[1] = '\0';
241151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        segs[0] = 0;
241251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
241351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
241451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
241551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Normalize the given path string.  A normal path string has no empty
241651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segments (i.e., occurrences of "//"), no segments equal to ".", and no
241751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // segments equal to ".." that are preceded by a segment not equal to "..".
241851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // In contrast to Unix-style pathname normalization, for URI paths we
241951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // always retain trailing slashes.
242051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
242151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String normalize(String ps) {
24229af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        return normalize(ps, false);
24239af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    }
24249af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong
24259af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong    private static String normalize(String ps, boolean removeLeading) {
242651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
242751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Does this path need normalization?
242851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int ns = needsNormalization(ps);        // Number of segments
242951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (ns < 0)
243051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // Nope -- just return it
243151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ps;
243251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
243351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        char[] path = ps.toCharArray();         // Path in char-array form
243451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
243551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Split path into segments
243651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int[] segs = new int[ns];               // Segment-index array
243751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        split(path, segs);
243851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
243951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Remove dots
24409af8755ec458552ad55adb2d106fd0915fd860e9Yi Kong        removeDots(path, segs, removeLeading);
244151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
244251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Prevent scheme-name confusion
244351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        maybeAddLeadingDot(path, segs);
244451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
244551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Join the remaining segments and return the result
244651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String s = new String(path, 0, join(path, segs));
244751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s.equals(ps)) {
244851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // string was already normalized
244951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ps;
245051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
245151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return s;
245251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
245351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Character classes for parsing --
245751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
245851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // RFC2396 precisely specifies which characters in the US-ASCII charset are
245951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // permissible in the various components of a URI reference.  We here
246051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // define a set of mask pairs to aid in enforcing these restrictions.  Each
246151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // mask pair consists of two longs, a low mask and a high mask.  Taken
246251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // together they represent a 128-bit mask, where bit i is set iff the
246351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // character with value i is permitted.
246451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
246551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // This approach is more efficient than sequentially searching arrays of
246651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // permitted characters.  It could be made still more efficient by
246751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // precompiling the mask information so that a character's presence in a
246851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // given mask could be determined by a single table lookup.
246951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
247051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute the low-order mask for the characters in the given string
247151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long lowMask(String chars) {
247251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = chars.length();
247351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
247451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++) {
247551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = chars.charAt(i);
247651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c < 64)
247751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                m |= (1L << c);
247851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
247951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
248051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
248151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
248251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute the high-order mask for the characters in the given string
248351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long highMask(String chars) {
248451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = chars.length();
248551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
248651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n; i++) {
248751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = chars.charAt(i);
248851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((c >= 64) && (c < 128))
248951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                m |= (1L << (c - 64));
249051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
249151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
249251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
249351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
249451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute a low-order mask for the characters
249551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // between first and last, inclusive
249651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long lowMask(char first, char last) {
249751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
249851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = Math.max(Math.min(first, 63), 0);
249951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int l = Math.max(Math.min(last, 63), 0);
250051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = f; i <= l; i++)
250151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            m |= 1L << i;
250251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
250351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
250451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
250551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Compute a high-order mask for the characters
250651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // between first and last, inclusive
250751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static long highMask(char first, char last) {
250851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        long m = 0;
250951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int f = Math.max(Math.min(first, 127), 64) - 64;
251051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int l = Math.max(Math.min(last, 127), 64) - 64;
251151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = f; i <= l; i++)
251251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            m |= 1L << i;
251351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return m;
251451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
251551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
251651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Tell whether the given character is permitted by the given mask pair
251751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static boolean match(char c, long lowMask, long highMask) {
251851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
251951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return false;
252051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c < 64)
252151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ((1L << c) & lowMask) != 0;
252251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (c < 128)
252351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return ((1L << (c - 64)) & highMask) != 0;
252451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return false;
252551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
252651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
252751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Character-class masks, in reverse order from RFC2396 because
252851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // initializers for static fields cannot make forward references.
252951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
253051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
253151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "8" | "9"
253251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DIGIT = lowMask('0', '9');
253351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DIGIT = 0L;
253451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
253551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
253651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
253751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
253851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_UPALPHA = 0L;
253951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_UPALPHA = highMask('A', 'Z');
254051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
254151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
254251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
254351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
254451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_LOWALPHA = 0L;
254551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_LOWALPHA = highMask('a', 'z');
254651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
254751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // alpha         = lowalpha | upalpha
254851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
254951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
255051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
255151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // alphanum      = alpha | digit
255251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
255351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
255451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
255551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
255651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                         "a" | "b" | "c" | "d" | "e" | "f"
255751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_HEX = L_DIGIT;
255851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
255951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
256051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
256151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "(" | ")"
256251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_MARK = lowMask("-_.!~*'()");
256351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_MARK = highMask("-_.!~*'()");
256451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
256551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // unreserved    = alphanum | mark
256651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
256751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
256851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
256951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
257051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "$" | "," | "[" | "]"
257151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Added per RFC2732: "[", "]"
257251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
257351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
257451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
257551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
257651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // characters are allowed; this is handled by the scanEscape method below.
257751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_ESCAPED = 1L;
257851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_ESCAPED = 0L;
257951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
258051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // uric          = reserved | unreserved | escaped
258151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
258251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
258351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
258451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // pchar         = unreserved | escaped |
258551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
258651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_PCHAR
258751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
258851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_PCHAR
258951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
259051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
259151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // All valid path characters
259251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_PATH = L_PCHAR | lowMask(";/");
259351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_PATH = H_PCHAR | highMask(";/");
259451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
259551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Dash, for use in domainlabel and toplabel
259651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DASH = lowMask("-");
259751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DASH = highMask("-");
259851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
25999265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    // UNDERSCORE, for use in domainlabel and toplabel
26009265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    private static final long L_UNDERSCORE = lowMask("_");
26019265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong    private static final long H_UNDERSCORE = highMask("_");
26029265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong
260351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Dot, for use in hostnames
260451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_DOT = lowMask(".");
260551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_DOT = highMask(".");
260651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
260751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // userinfo      = *( unreserved | escaped |
260851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
260951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_USERINFO
261051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
261151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_USERINFO
261251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
261351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
261451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // reg_name      = 1*( unreserved | escaped | "$" | "," |
261551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                     ";" | ":" | "@" | "&" | "=" | "+" )
261651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_REG_NAME
261751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
261851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_REG_NAME
261951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
262051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
262151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // All valid characters for server-based authorities
262251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SERVER
262351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
262451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SERVER
262551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
262651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
262751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Special case of server authority that represents an IPv6 address
262851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // In this case, a % does not signify an escape sequence
262951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SERVER_PERCENT
263051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_SERVER | lowMask("%");
263151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SERVER_PERCENT
263251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_SERVER | highMask("%");
263351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_LEFT_BRACKET = lowMask("[");
263451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_LEFT_BRACKET = highMask("[");
263551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
263651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
263751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
263851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
263951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
264051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
264151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //                 "&" | "=" | "+" | "$" | ","
264251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long L_URIC_NO_SLASH
264351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = L_UNRESERVED | L_ESCAPED | lowMask(";?:@&=+$,");
264451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static final long H_URIC_NO_SLASH
264551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        = H_UNRESERVED | H_ESCAPED | highMask(";?:@&=+$,");
264651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
264751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
264851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Escaping and encoding --
264951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
265051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private final static char[] hexDigits = {
265151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        '0', '1', '2', '3', '4', '5', '6', '7',
265251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
265351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    };
265451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
265551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void appendEscape(StringBuffer sb, byte b) {
265651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append('%');
265751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append(hexDigits[(b >> 4) & 0x0f]);
265851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        sb.append(hexDigits[(b >> 0) & 0x0f]);
265951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
266051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
266151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static void appendEncoded(StringBuffer sb, char c) {
266251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = null;
266351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
266451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb = ThreadLocalCoders.encoderFor("UTF-8")
266551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                .encode(CharBuffer.wrap("" + c));
266651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (CharacterCodingException x) {
266751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert false;
266851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
266951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (bb.hasRemaining()) {
267051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int b = bb.get() & 0xff;
267151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (b >= 0x80)
267251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEscape(sb, (byte)b);
267351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
267451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append((char)b);
267551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
267651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
267751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
267851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Quote any characters in s that are not permitted
267951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // by the given mask pair
268051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
268151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String quote(String s, long lowMask, long highMask) {
268251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
268351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = null;
268451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
268551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < s.length(); i++) {
268651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = s.charAt(i);
268751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c < '\u0080') {
268851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (!match(c, lowMask, highMask)) {
268951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (sb == null) {
269051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb = new StringBuffer();
269151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb.append(s.substring(0, i));
269251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
269351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    appendEscape(sb, (byte)c);
269451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
269551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (sb != null)
269651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        sb.append(c);
269751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
269851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (allowNonASCII
269951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && (Character.isSpaceChar(c)
270051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                           || Character.isISOControl(c))) {
270151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (sb == null) {
270251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb = new StringBuffer();
270351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(s.substring(0, i));
270451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
270551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEncoded(sb, c);
270651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
270751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (sb != null)
270851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    sb.append(c);
270951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
271051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
271151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return (sb == null) ? s : sb.toString();
271251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
271351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
271451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
271551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // assuming that s is otherwise legal
271651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
271751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String encode(String s) {
271851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
271951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (n == 0)
272051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
272151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
272251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // First check whether we actually need to encode
272351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0;;) {
272451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (s.charAt(i) >= '\u0080')
272551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
272651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (++i >= n)
272751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return s;
272851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
272951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
273051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
273151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = null;
273251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        try {
273351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb = ThreadLocalCoders.encoderFor("UTF-8")
273451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                .encode(CharBuffer.wrap(ns));
273551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        } catch (CharacterCodingException x) {
273651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert false;
273751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
273851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
273951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer();
274051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        while (bb.hasRemaining()) {
274151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int b = bb.get() & 0xff;
274251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (b >= 0x80)
274351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                appendEscape(sb, (byte)b);
274451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            else
274551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append((char)b);
274651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
274751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
274851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
274951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
275051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static int decode(char c) {
275151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= '0') && (c <= '9'))
275251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - '0';
275351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'a') && (c <= 'f'))
275451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - 'a' + 10;
275551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if ((c >= 'A') && (c <= 'F'))
275651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return c - 'A' + 10;
275751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        assert false;
275851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return -1;
275951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
276051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
276151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static byte decode(char c1, char c2) {
276251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return (byte)(  ((decode(c1) & 0xf) << 4)
276351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                      | ((decode(c2) & 0xf) << 0));
276451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
276551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
276651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
276751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
276851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // sequence of escaped octets is not valid UTF-8 then the erroneous octets
276951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // are replaced with '\uFFFD'.
277051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
277151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //            with a scope_id
277251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    //
277351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private static String decode(String s) {
277451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s == null)
277551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
277651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        int n = s.length();
277751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (n == 0)
277851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
277951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        if (s.indexOf('%') < 0)
278051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return s;
278151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
278251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        StringBuffer sb = new StringBuffer(n);
278351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        ByteBuffer bb = ByteBuffer.allocate(n);
278451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        CharBuffer cb = CharBuffer.allocate(n);
278551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
278651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .onMalformedInput(CodingErrorAction.REPLACE)
278751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            .onUnmappableCharacter(CodingErrorAction.REPLACE);
278851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
278951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This is not horribly efficient, but it will do for now
279051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        char c = s.charAt(0);
279151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        boolean betweenBrackets = false;
279251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
279351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        for (int i = 0; i < n;) {
279451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert c == s.charAt(i);    // Loop invariant
279551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c == '[') {
279651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                betweenBrackets = true;
279751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (betweenBrackets && c == ']') {
279851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                betweenBrackets = false;
279951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
280051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c != '%' || betweenBrackets) {
280151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                sb.append(c);
280251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (++i >= n)
280351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
280451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                c = s.charAt(i);
280551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                continue;
280651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
280751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb.clear();
280851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int ui = i;
280951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (;;) {
281051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                assert (n - i >= 2);
281151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                bb.put(decode(s.charAt(++i), s.charAt(++i)));
281251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (++i >= n)
281351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
281451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                c = s.charAt(i);
281551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (c != '%')
281651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
281751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
281851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            bb.flip();
281951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            cb.clear();
282051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            dec.reset();
282151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            CoderResult cr = dec.decode(bb, cb, true);
282251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert cr.isUnderflow();
282351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            cr = dec.flush(cb);
282451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            assert cr.isUnderflow();
282551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            sb.append(cb.flip().toString());
282651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
282751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
282851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        return sb.toString();
282951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
283051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
283151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
283251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // -- Parsing --
283351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
283451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // For convenience we wrap the input URI string in a new instance of the
283551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // following internal class.  This saves always having to pass the input
283651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    // string as an argument to each internal scan/parse method.
283751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
283851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    private class Parser {
283951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
284051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private String input;           // URI input string
284151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean requireServerAuthority = false;
284251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
284351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        Parser(String s) {
284451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            input = s;
284551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            string = s;
284651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
284751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
284851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Methods for throwing URISyntaxException in various ways --
284951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void fail(String reason) throws URISyntaxException {
285151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new URISyntaxException(input, reason);
285251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
285351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void fail(String reason, int p) throws URISyntaxException {
285551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throw new URISyntaxException(input, reason, p);
285651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
285751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
285851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void failExpecting(String expected, int p)
285951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
286051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
286151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Expected " + expected, p);
286251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
286351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
286451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void failExpecting(String expected, String prior, int p)
286551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
286651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
286751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Expected " + expected + " following " + prior, p);
286851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
286951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Simple access to the input string --
287251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Return a substring of the input string
287451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
287551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private String substring(int start, int end) {
287651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return input.substring(start, end);
287751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
287851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
287951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Return the char at position p,
288051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // assuming that p < input.length()
288151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
288251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private char charAt(int p) {
288351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return input.charAt(p);
288451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
288551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
288651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Tells whether start < end and, if so, whether charAt(start) == c
288751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
288851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean at(int start, int end, char c) {
288951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return (start < end) && (charAt(start) == c);
289051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
289151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
289251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Tells whether start + s.length() < end and, if so,
289351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // whether the chars at the start position match s exactly
289451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
289551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private boolean at(int start, int end, String s) {
289651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
289751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int sn = s.length();
289851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (sn > end - p)
289951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return false;
290051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int i = 0;
290151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (i < sn) {
290251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (charAt(p++) != s.charAt(i)) {
290351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
290451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
290551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                i++;
290651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
290751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return (i == sn);
290851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
290951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
291051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
291151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Scanning --
291251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
291351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // The various scan and parse methods that follow use a uniform
291451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // convention of taking the current start position and end index as
291551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // their first two arguments.  The start is inclusive while the end is
291651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // exclusive, just as in the String class, i.e., a start/end pair
291751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // denotes the left-open interval [start, end) of the input string.
291851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
291951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // These methods never proceed past the end position.  They may return
292051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -1 to indicate outright failure, but more often they simply return
292151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the position of the first char after the last char scanned.  Thus
292251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // a typical idiom is
292351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
292451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     int p = start;
292551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     int q = scan(p, end, ...);
292651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     if (q > p)
292751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // We scanned something
292851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
292951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     else if (q == p)
293051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // We scanned nothing
293151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
293251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //     else if (q == -1)
293351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         // Something went wrong
293451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //         ...;
293551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
293651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
293751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a specific char: If the char at the given start position is
293851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // equal to c, return the index of the next char; otherwise, return the
293951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // start position.
294051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
294151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int end, char c) {
294251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((start < end) && (charAt(start) == c))
294351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return start + 1;
294451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return start;
294551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
294651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
294751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan forward from the given start position.  Stop at the first char
294851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // in the err string (in which case -1 is returned), or the first char
294951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // in the stop string (in which case the index of the preceding char is
295051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // returned), or the end of the input string (in which case the length
295151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // of the input string is returned).  May return the start position if
295251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // nothing matches.
295351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
295451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int end, String err, String stop) {
295551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
295651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < end) {
295751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = charAt(p);
295851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (err.indexOf(c) >= 0)
295951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return -1;
296051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (stop.indexOf(c) >= 0)
296151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
296251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
296351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
296451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
296551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
296651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
296751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a potential escape sequence, starting at the given position,
296851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // with the given first char (i.e., charAt(start) == c).
296951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
297051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This method assumes that if escapes are allowed then visible
297151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // non-US-ASCII chars are also allowed.
297251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
297351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanEscape(int start, int n, char first)
297451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
297551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
297651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
297751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            char c = first;
297851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (c == '%') {
297951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Process escape pair
298051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((p + 3 <= n)
298151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    && match(charAt(p + 1), L_HEX, H_HEX)
298251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    && match(charAt(p + 2), L_HEX, H_HEX)) {
298351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    return p + 3;
298451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
298551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed escape pair", p);
298651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if ((c > 128)
298751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && !Character.isSpaceChar(c)
298851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                       && !Character.isISOControl(c)) {
298951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Allow unescaped but visible non-US-ASCII chars
299051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return p + 1;
299151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
299251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
299351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
299451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
299551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan chars that match the given mask pair
299651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
299751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scan(int start, int n, long lowMask, long highMask)
299851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
299951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
300051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
300151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < n) {
300251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                char c = charAt(p);
300351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (match(c, lowMask, highMask)) {
300451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
300551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    continue;
300651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
300751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((lowMask & L_ESCAPED) != 0) {
300851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = scanEscape(p, n, c);
300951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q > p) {
301051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        p = q;
301151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        continue;
301251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
301351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
301451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                break;
301551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
301651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
301751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
301851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
301951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Check that each of the chars in [start, end) matches the given mask
302051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
302151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void checkChars(int start, int end,
302251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                long lowMask, long highMask,
302351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                String what)
302451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
302551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
302651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scan(start, end, lowMask, highMask);
302751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < end)
302851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in " + what, p);
302951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
303051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
303151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Check that the char at position p matches the given mask
303251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
303351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private void checkChar(int p,
303451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               long lowMask, long highMask,
303551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                               String what)
303651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
303751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
303851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            checkChars(p, p + 1, lowMask, highMask, what);
303951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
304051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
304151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
304251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // -- Parsing --
304351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
304451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [<scheme>:]<scheme-specific-part>[#<fragment>]
304551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
304651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        void parse(boolean rsa) throws URISyntaxException {
304751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            requireServerAuthority = rsa;
304851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int ssp;                    // Start of scheme-specific part
304951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int n = input.length();
305051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scan(0, n, "/?#", ":");
305151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((p >= 0) && at(p, n, ':')) {
305251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (p == 0)
305351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("scheme name", 0);
305451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
305551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
305651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                scheme = substring(0, p);
305751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;                    // Skip ':'
305851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ssp = p;
305951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, '/')) {
306051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = parseHierarchical(p, n);
306151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
306251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int q = scan(p, n, "", "#");
306351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q <= p)
306451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        failExpecting("scheme-specific part", p);
306551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    checkChars(p, q, L_URIC, H_URIC, "opaque part");
306651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
306751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
306851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
306951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ssp = 0;
307051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = parseHierarchical(0, n);
307151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
307251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            schemeSpecificPart = substring(ssp, p);
307351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '#')) {
307451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
307551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fragment = substring(p + 1, n);
307651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = n;
307751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
307851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
307951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("end of URI", p);
308051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
308151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
308251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [//authority]<path>[?<query>]
308351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
308451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // DEVIATION from RFC2396: We allow an empty authority component as
308551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // long as it's followed by a non-empty path, query component, or
308651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // fragment component.  This is so that URIs such as "file:///foo/bar"
308751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // will parse.  This seems to be the intent of RFC2396, though the
308851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // grammar does not permit it.  If the authority is empty then the
308951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // userInfo, host, and port components are undefined.
309051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
309151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // DEVIATION from RFC2396: We allow empty relative paths.  This seems
309251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // to be the intent of RFC2396, but the grammar does not permit it.
309351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // The primary consequence of this deviation is that "#f" parses as a
309451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // relative URI with an empty path.
309551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
309651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseHierarchical(int start, int n)
309751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
309851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
309951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
310051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '/') && at(p + 1, n, '/')) {
310151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p += 2;
310251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                int q = scan(p, n, "", "/?#");
310351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
310451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = parseAuthority(p, q);
310551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (q < n) {
310651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // DEVIATION: Allow empty authority prior to non-empty
310751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // path, query component or fragment identifier
310851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else
310951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("authority", p);
311051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
311151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
311251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            checkChars(p, q, L_PATH, H_PATH, "path");
311351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            path = substring(p, q);
311451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p = q;
311551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '?')) {
311651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
311751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "", "#");
311851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p, q, L_URIC, H_URIC, "query");
311951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                query = substring(p, q);
312051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
312151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
312251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
312351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
312451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
312551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // authority     = server | reg_name
312651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
312751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Ambiguity: An authority that is a registry name rather than a server
312851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // might have a prefix that parses as a server.  We use the fact that
312951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the authority component is always followed by '/' or the end of the
313051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // input string to resolve this: If the complete authority did not
313151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // parse as a server then we try to parse it as a registry name.
313251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
313351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseAuthority(int start, int n)
313451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
313551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
313651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
313751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = p;
313851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            URISyntaxException ex = null;
313951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
314051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean serverChars;
314151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean regChars;
314251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
314351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (scan(p, n, "", "]") > p) {
314451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // contains a literal IPv6 address, therefore % is allowed
314551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
314651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
314751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
314851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
314951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
315051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
315151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (regChars && !serverChars) {
315251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Must be a registry-based authority
315351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                authority = substring(p, n);
315451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return n;
315551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
315651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
315751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (serverChars) {
315851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Might be (probably is) a server-based authority, so attempt
315951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // to parse it as such.  If the attempt fails, try to treat it
316051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // as a registry-based authority.
316151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                try {
316251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    q = parseServer(p, n);
316351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q < n)
316451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        failExpecting("end of authority", q);
316551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    authority = substring(p, n);
316651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } catch (URISyntaxException x) {
316751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Undo results of failed parse
316851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    userInfo = null;
316951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    host = null;
317051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    port = -1;
317151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (requireServerAuthority) {
317251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // If we're insisting upon a server-based authority,
317351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // then just re-throw the exception
317451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        throw x;
317551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
317651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // Save the exception in case it doesn't parse as a
317751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        // registry either
317851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        ex = x;
317951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        q = p;
318051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
318151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
318251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
318351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
318451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q < n) {
318551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (regChars) {
318651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Registry-based authority
318751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    authority = substring(p, n);
318851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (ex != null) {
318951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // Re-throw exception; it was probably due to
319051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // a malformed IPv6 address
319151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    throw ex;
319251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
319351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    fail("Illegal character in authority", q);
319451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
319551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
319651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
319751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return n;
319851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
319951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
320051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
320151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // [<userinfo>@]<host>[:<port>]
320251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
320351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseServer(int start, int n)
320451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
320551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
320651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
320751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
320851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
320951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // userinfo
321051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scan(p, n, "/?#", "@");
321151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((q >= p) && at(q, n, '@')) {
321251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
321351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                userInfo = substring(p, q);
321451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q + 1;              // Skip '@'
321551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
321651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
321751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // hostname, IPv4 address, or IPv6 address
321851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, '[')) {
321951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
322051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
322151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "/?#", "]");
322251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q > p) && at(q, n, ']')) {
322351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    // look for a "%" scope id
322451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    int r = scan (p, q, "", "%");
322551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (r > p) {
322651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        parseIPv6Reference(p, r);
322751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        if (r+1 == q) {
322851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            fail ("scope id expected");
322951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        }
323051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
323151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                                                "scope id");
323251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } else {
323351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        parseIPv6Reference(p, q);
323451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
323551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    host = substring(p-1, q+1);
323651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q + 1;
323751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else {
323851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("closing bracket for IPv6 address", q);
323951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
324051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
324151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = parseIPv4Address(p, n);
324251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
324351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    q = parseHostname(p, n);
324451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
324551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
324651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
324751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // port
324851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(p, n, ':')) {
324951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
325051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, "", "/");
325151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
325251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
325351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    try {
325451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        port = Integer.parseInt(substring(p, q));
325551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    } catch (NumberFormatException x) {
325651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        fail("Malformed port number", p);
325751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
325851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
325951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
326051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
326151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
326251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting("port number", p);
326351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
326451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
326551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
326651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
326751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a string of decimal digits whose value fits in a byte
326851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
326951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanByte(int start, int n)
327051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
327151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
327251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
327351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q = scan(p, n, L_DIGIT, H_DIGIT);
327451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q <= p) return q;
327551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (Integer.parseInt(substring(p, q)) > 255) return p;
327651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return q;
327751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
327851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
327951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan an IPv4 address.
328051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
328151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // If the strict argument is true then we require that the given
328251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // interval contain nothing besides an IPv4 address; if it is false
328351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // then we only require that it start with an IPv4 address.
328451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
328551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // If the interval does not contain or start with (depending upon the
328651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // strict argument) a legal IPv4 address characters then we return -1
328751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // immediately; otherwise we insist that these characters parse as a
328851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // legal IPv4 address and throw an exception on failure.
328951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
329051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We assume that any string of decimal digits and dots must be an IPv4
329151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // address.  It won't parse as a hostname anyway, so making that
329251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // assumption here allows more meaningful exceptions to be thrown.
329351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
329451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanIPv4Address(int start, int n, boolean strict)
329551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
329651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
329751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
329851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
329951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
330051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((m <= p) || (strict && (m != n)))
330151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
330251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            for (;;) {
330351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Per RFC2732: At most three digits per byte
330451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // Further constraint: Each element fits in a byte
330551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
330651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
330751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
330851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
330951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
331051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scan(p, m, '.')) <= p) break;  p = q;
331151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if ((q = scanByte(p, m)) <= p) break;   p = q;
331251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q < m) break;
331351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return q;
331451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
331551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            fail("Malformed IPv4 address", q);
331651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return -1;
331751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
331851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
331951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Take an IPv4 address: Throw an exception if the given interval
332051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // contains anything except an IPv4 address
332151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
332251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int takeIPv4Address(int start, int n, String expected)
332351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
332451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
332551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = scanIPv4Address(start, n, true);
332651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p <= start)
332751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting(expected, start);
332851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
332951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
333051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
333151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Attempt to parse an IPv4 address, returning -1 on failure but
333251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // allowing the given interval to contain [:<characters>] after
333351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the IPv4 address.
333451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
333551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseIPv4Address(int start, int n) {
333651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p;
333751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
333851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            try {
333951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = scanIPv4Address(start, n, false);
334051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } catch (URISyntaxException x) {
334151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
334251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } catch (NumberFormatException nfe) {
334351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
334451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
334551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
334651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p > start && p < n) {
334751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // IPv4 address is followed by something - check that
334851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // it's a ":" as this is the only valid character to
334951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                // follow an address.
335051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (charAt(p) != ':') {
335151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = -1;
335251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
335351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
335451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
335551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p > start)
335651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                host = substring(start, p);
335751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
335851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
335951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
336051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
336151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
33629265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong        // domainlabel   = alphanum | alphanum *( alphanum | "-" | "_" ) alphanum
33639265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong        // toplabel      = alpha | alpha *( alphanum | "-" | "_" ) alphanum
336451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
336551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseHostname(int start, int n)
336651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
336751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
336851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
336951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
337051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int l = -1;                 // Start of last parsed label
337151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
337251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            do {
33739265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // domainlabel = alphanum [ *( alphanum | "-" | "_" ) alphanum ]
3374b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong
3375b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                // RFC1034#section-3.5 doesn't permit empty labels in hostnames, but we accepted
3376b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                // this prior to N and the behavior is used by some apps. They're accepted for
3377b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                // compatibility but we produce a warning in the log.
3378b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                // http://b/25991669
3379ed526e210d0af226fc7a88fab1f18dfa953eecb9Yi Kong                if (p < n && charAt(p) == '.') {
3380b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                  java.lang.System.logE("URI " + substring(start, n) +  " has empty labels in " +
3381b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                                        "the hostname. This is malformed and will not be accepted" +
3382b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                                        "in future Android releases.");
3383b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                  q = ++p;
3384b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                  continue;
3385b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong                }
3386b1e72048a6bf7c12c8b7dddab5b3f0729e27da45Yi Kong
33879265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // The RFCs don't permit underscores in hostnames, but URI has to because a certain
33889265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // large website doesn't seem to care about standards and specs.
33899265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://code.google.com/p/android/issues/detail?id=37577
33909265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/17579865
33919265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/18016625
33929265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                // http://b/18023709
339351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
339451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
339551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
339651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                l = p;
339751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p) {
339851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = q;
33999265d1f1d5b5e8404b79aab2a5c48b1ab3fbb4daYi Kong                    q = scan(p, n, L_ALPHANUM | L_DASH | L_UNDERSCORE, H_ALPHANUM | H_DASH | H_UNDERSCORE);
340051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    if (q > p) {
340151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        if (charAt(q - 1) == '-')
340251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                            fail("Illegal character in hostname", q - 1);
340351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                        p = q;
340451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    }
340551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
340651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, '.');
340751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
340851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
340951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
341051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } while (p < n);
341151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
341251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if ((p < n) && !at(p, n, ':'))
341351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in hostname", p);
341451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
341551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (l < 0)
341651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                failExpecting("hostname", start);
341751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
341851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // for a fully qualified hostname check that the rightmost
341951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            // label starts with an alpha character.
342051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
342151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Illegal character in hostname", l);
342251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
342351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            host = substring(start, p);
342551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
342651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
342751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
342951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
343051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
343151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
343251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // the form ::12.34.56.78, which are clearly shown in the examples
343351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // earlier in the document.  Here is the original grammar:
343451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
343551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   IPv6address = hexpart [ ":" IPv4address ]
343651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
343751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq      = hex4 *( ":" hex4)
343851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hex4        = 1*4HEXDIG
343951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
344051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // We therefore use the following revised grammar:
344151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
344251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   IPv6address = hexseq [ ":" IPv4address ]
344351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //                 | hexseq [ "::" [ hexpost ] ]
344451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //                 | "::" [ hexpost ]
344551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
344651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq      = hex4 *( ":" hex4)
344751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hex4        = 1*4HEXDIG
344851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
344951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // This covers all and only the following cases:
345051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
345151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq
345251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq : IPv4address
345351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq ::
345451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: hexseq
345551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: hexseq : IPv4address
345651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   hexseq :: IPv4address
345751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: hexseq
345851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: hexseq : IPv4address
345951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   :: IPv4address
346051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //   ::
346151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
346251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Additionally we constrain the IPv6 address as follows :-
346351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
346451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //  i.  IPv6 addresses without compressed zeros should contain
346551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //      exactly 16 bytes.
346651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
346751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //  ii. IPv6 addresses with compressed zeros should contain
346851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //      less than 16 bytes.
346951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
347051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int ipv6byteCount = 0;
347151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
347251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int parseIPv6Reference(int start, int n)
347351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
347451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
347551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
347651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
347751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            boolean compressedZeros = false;
347851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
347951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scanHexSeq(p, n);
348051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
348151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p) {
348251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
348351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, "::")) {
348451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    compressedZeros = true;
348551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = scanHexPost(p + 2, n);
348651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                } else if (at(p, n, ':')) {
348751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = takeIPv4Address(p + 1,  n, "IPv4 address");
348851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    ipv6byteCount += 4;
348951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
349051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else if (at(p, n, "::")) {
349151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                compressedZeros = true;
349251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = scanHexPost(p + 2, n);
349351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
349451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p < n)
349551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed IPv6 address", start);
349651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (ipv6byteCount > 16)
349751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 address too long", start);
349851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (!compressedZeros && ipv6byteCount < 16)
349951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 address too short", start);
350051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (compressedZeros && ipv6byteCount == 16)
350151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("Malformed IPv6 address", start);
350251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
350351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
350451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
350551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
350651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanHexPost(int start, int n)
350751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
350851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
350951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
351051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
351151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
351251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (p == n)
351351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return p;
351451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
351551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scanHexSeq(p, n);
351651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p) {
351751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
351851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p, n, ':')) {
351951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p++;
352051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p = takeIPv4Address(p, n, "hex digits or IPv4 address");
352151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    ipv6byteCount += 4;
352251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
352351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            } else {
352451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = takeIPv4Address(p, n, "hex digits or IPv4 address");
352551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ipv6byteCount += 4;
352651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
352751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
352851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
352951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
353051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        // Scan a hex sequence; return -1 if one could not be scanned
353151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        //
353251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        private int scanHexSeq(int start, int n)
353351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            throws URISyntaxException
353451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        {
353551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int p = start;
353651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            int q;
353751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
353851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            q = scan(p, n, L_HEX, H_HEX);
353951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q <= p)
354051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
354151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (at(q, n, '.'))          // Beginning of IPv4 address
354251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                return -1;
354351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            if (q > p + 4)
354451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                fail("IPv6 hexadecimal digit sequence too long", p);
354551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            ipv6byteCount += 2;
354651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            p = q;
354751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            while (p < n) {
354851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (!at(p, n, ':'))
354951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
355051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(p + 1, n, ':'))
355151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;              // "::"
355251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p++;
355351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                q = scan(p, n, L_HEX, H_HEX);
355451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q <= p)
355551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    failExpecting("digits for an IPv6 address", p);
355651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (at(q, n, '.')) {    // Beginning of IPv4 address
355751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    p--;
355851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    break;
355951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                }
356051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                if (q > p + 4)
356151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                    fail("IPv6 hexadecimal digit sequence too long", p);
356251b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                ipv6byteCount += 2;
356351b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski                p = q;
356451b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            }
356551b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
356651b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski            return p;
356751b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski        }
356851b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
356951b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski    }
357051b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski
357151b1b6997fd3f980076b8081f7f1165ccc2a4008Piotr Jastrzebski}
3572