156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/*
2fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Copyright (C) 2015 The Android Open Source Project
356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson *
4fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Licensed under the Apache License, Version 2.0 (the "License");
5fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * you may not use this file except in compliance with the License.
6fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * You may obtain a copy of the License at
756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson *
8fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro *      http://www.apache.org/licenses/LICENSE-2.0
9fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro *
10fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Unless required by applicable law or agreed to in writing, software
11fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * distributed under the License is distributed on an "AS IS" BASIS,
12fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * See the License for the specific language governing permissions and
14fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * limitations under the License
1556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */
1656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
1756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpackage libcore.net;
1856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
1956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.io.ByteArrayOutputStream;
2056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.net.URISyntaxException;
21fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.ByteBuffer;
22fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.CharBuffer;
23fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharacterCodingException;
2456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.nio.charset.Charset;
25fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharsetDecoder;
26fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharsetEncoder;
27fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CoderResult;
28fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CodingErrorAction;
292a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughesimport java.nio.charset.StandardCharsets;
3056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
3156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/**
32fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encodes and decodes “application/x-www-form-urlencoded” content.
3356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson *
34fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t.
35fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to
36fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form.
37fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * e.g a 4 byte sequence might look like” %c6%ef%e0%e8”
3856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */
3956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpublic abstract class UriCodec {
4056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    /**
41fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Returns true iff. ‘c’ does not need to be escaped.
42fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be
43fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * escaped. This set is referred to as the ``whitelist''.
4456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson     */
4556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    protected abstract boolean isRetained(char c);
4656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
47fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static boolean isWhitelisted(char c) {
48fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
49fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
50fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
51fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private boolean isWhitelistedOrRetained(char c) {
52595904f2798b24d16b7b180a278ee964dcefd637Sergio Giro        return isWhitelisted(c) || isRetained(c);
53fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
54fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
5556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    /**
56fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Throw URISyntaxException if any of the characters in the range [start, end) are not valid
57fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * according to this codec.
58fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *  - If a char is in the whitelist or retained, it is valid both escaped and unescaped.
59fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *  - All escaped octets appearing in the input are structurally valid hex, i.e convertible to
60fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *  decimals.
61fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *
62fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * On success, the substring [start, end) is returned.
63fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * {@code name} is not used, except to generate debugging info.
6456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson     */
65ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson    public final String validate(String uri, int start, int end, String name)
66ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson            throws URISyntaxException {
67fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        int i = start;
68fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        while (i < end) {
69fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            char c = uri.charAt(i++);
70fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (isWhitelistedOrRetained(c)) {
71fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                continue;
72fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
73fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            // c is either '%' or character not allowed in a uri.
74fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (c != '%') {
75fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                throw unexpectedCharacterException(uri, name, c, i - 1);
76fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
77fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            // Expect two characters representing a number in hex.
78fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            for (int j = 0; j < 2; j++) {
79fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                c = getNextCharacter(uri, i++, end, name);
80fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                if (hexCharToValue(c) < 0) {
81fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    throw unexpectedCharacterException(uri, name, c, i - 1);
8256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson                }
8356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson            }
8456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
85ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson        return uri.substring(start, end);
8656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
8756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
8856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    /**
89fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f').
9056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson     */
91fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static int hexCharToValue(char c) {
92fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if('0' <= c && c <= '9') {
93fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return c - '0';
94fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
95fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if ('a' <= c && c <= 'f') {
96fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return 10 + c - 'a';
97fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
98fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if ('A' <= c && c <= 'F') {
99fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return 10 + c - 'A';
10056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
101fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return -1;
10256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
10356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
104fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static URISyntaxException unexpectedCharacterException(
105fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            String uri, String name, char unexpected, int index) {
106fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        String nameString = (name == null) ? "" :  " in [" + name + "]";
107fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return new URISyntaxException(
108fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                uri, "Unexpected character" + nameString + ": " + unexpected, index);
109fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
110fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
111fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static char getNextCharacter(String uri, int index, int end, String name)
112fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro             throws URISyntaxException {
113fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (index >= end) {
114fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            String nameString = (name == null) ? "" :  " in [" + name + "]";
115fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            throw new URISyntaxException(
116fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    uri, "Unexpected end of string" + nameString, index);
11756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
118fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return uri.charAt(index);
119fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
12056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
121fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
122fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor
123fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * in {@code legal}.
124fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
125fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    public static void validateSimple(String uri, String legal) throws URISyntaxException {
126fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        for (int i = 0; i < uri.length(); i++) {
127fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            char c = uri.charAt(i);
128fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (!isWhitelisted(c) && legal.indexOf(c) < 0) {
129fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                throw unexpectedCharacterException(uri, null /* name */, c, i);
13056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson            }
13156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
13256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
13356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
134fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
135fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
136fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *
137fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes.
138fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
13910527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson    public final String encode(String s, Charset charset) {
140fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        StringBuilder builder = new StringBuilder(s.length());
14132559028b14b9b321b10eede050afd554a376569Jesse Wilson        appendEncoded(builder, s, charset, false);
14232559028b14b9b321b10eede050afd554a376569Jesse Wilson        return builder.toString();
14356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
14456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
145fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
146fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
147fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *
148fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
149fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
15010527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson    public final void appendEncoded(StringBuilder builder, String s) {
1512a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes        appendEncoded(builder, s, StandardCharsets.UTF_8, false);
15232559028b14b9b321b10eede050afd554a376569Jesse Wilson    }
15356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
154fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
155fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
156fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *
157fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
158fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * This method must produce partially encoded output. What this means is that if encoded octets
159fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * appear in the input string, they are passed through unmodified, instead of being double
160fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * escaped. Consider a decoder operating on the global whitelist dealing with a string
161fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it
162fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * will be double encoded into “foo%2525bar”.
163fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
16410527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson    public final void appendPartiallyEncoded(StringBuilder builder, String s) {
1652a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes        appendEncoded(builder, s, StandardCharsets.UTF_8, true);
16656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
16756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
168fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private void appendEncoded(
169fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) {
170fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        CharsetEncoder encoder = charset.newEncoder()
171fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                .onMalformedInput(CodingErrorAction.REPORT)
172fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                .onUnmappableCharacter(CodingErrorAction.REPORT);
173fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        CharBuffer cBuffer = CharBuffer.allocate(s.length());
174fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        for (int i = 0; i < s.length(); i++) {
17556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson            char c = s.charAt(i);
176fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (c == '%' && partiallyEncoded) {
177fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                // In case there are characters waiting to be encoded.
178fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                flushEncodingCharBuffer(builder, encoder, cBuffer);
179fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                builder.append('%');
180fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                continue;
181fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
182fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
183fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (c == ' ' && isRetained(' ')) {
184595904f2798b24d16b7b180a278ee964dcefd637Sergio Giro                flushEncodingCharBuffer(builder, encoder, cBuffer);
185fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                builder.append('+');
186fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                continue;
187fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
188fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
189fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (isWhitelistedOrRetained(c)) {
190fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                flushEncodingCharBuffer(builder, encoder, cBuffer);
191fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                builder.append(c);
192fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                continue;
19356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson            }
194fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
195fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            // Put the character in the queue for encoding.
196fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            cBuffer.put(c);
19756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
198fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        flushEncodingCharBuffer(builder, encoder, cBuffer);
19956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
20056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
201fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static void flushEncodingCharBuffer(
202fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            StringBuilder builder,
203fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            CharsetEncoder encoder,
204fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            CharBuffer cBuffer) {
205fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (cBuffer.position() == 0) {
206fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return;
207fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
208fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // We are reading from the buffer now.
209fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        cBuffer.flip();
210fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        ByteBuffer byteBuffer = ByteBuffer.allocate(
211fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar()));
212fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        byteBuffer.position(0);
213fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */);
214fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // According to the {@code CharsetEncoder#encode} spec, the method returns underflow
215fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // and leaves an empty output when all bytes were processed correctly.
216fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (result != CoderResult.UNDERFLOW) {
217fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            throw new IllegalArgumentException(
218fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    "Error encoding, unexpected result ["
219fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + result.toString()
220fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + "] using encoder for ["
221fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + encoder.charset().name()
222fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + "]");
223fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
224fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (cBuffer.hasRemaining()) {
225fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            throw new IllegalArgumentException(
226fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    "Encoder for [" + encoder.charset().name() + "] failed with underflow with "
227fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + "remaining input [" + cBuffer + "]");
228fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
229fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // Need to flush in case the encoder saves internal state.
230fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        encoder.flush(byteBuffer);
231fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (result != CoderResult.UNDERFLOW) {
232fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            throw new IllegalArgumentException(
233fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    "Error encoding, unexpected result ["
234fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + result.toString()
235fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + "] flushing encoder for ["
236fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + encoder.charset().name()
237fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            + "]");
238fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
239fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        encoder.reset();
240fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
241fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        byteBuffer.flip();
242fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // Write the encoded bytes.
243fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        while(byteBuffer.hasRemaining()) {
244fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            byte b = byteBuffer.get();
245fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            builder.append('%');
246fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            builder.append(intToHexDigit((b & 0xf0) >>> 4));
247fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            builder.append(intToHexDigit(b & 0x0f));
248fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
249fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
250fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // Use the character buffer to write again.
251fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        cBuffer.flip();
252fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        cBuffer.limit(cBuffer.capacity());
253fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
254fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
255fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static char intToHexDigit(int b) {
256fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (b < 10) {
257fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return (char) ('0' + b);
25832559028b14b9b321b10eede050afd554a376569Jesse Wilson        } else {
259fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return (char) ('A' + b - 10);
26032559028b14b9b321b10eede050afd554a376569Jesse Wilson        }
26132559028b14b9b321b10eede050afd554a376569Jesse Wilson    }
26232559028b14b9b321b10eede050afd554a376569Jesse Wilson
263fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
264fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Decode a string according to the rules of this decoder.
265fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *
266fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘
267fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *   (white space)
268fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for
269fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     *   invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets.
270fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
271fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    public static String decode(
272fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            String s, boolean convertPlus, Charset charset, boolean throwOnFailure) {
273fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        StringBuilder builder = new StringBuilder(s.length());
274fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        appendDecoded(builder, s, convertPlus, charset, throwOnFailure);
275fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return builder.toString();
276fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    }
277fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
278fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
279fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Character to be output when there's an error decoding an input.
280fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
281fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static final char INVALID_INPUT_CHARACTER = '\ufffd';
282fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro
283fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static void appendDecoded(
284fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            StringBuilder builder,
285fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            String s,
286fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            boolean convertPlus,
287fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            Charset charset,
288fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            boolean throwOnFailure) {
289fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        CharsetDecoder decoder = charset.newDecoder()
290e3101cdcd024a26212f73604e0a48b96422be9ecSergio Giro                .onMalformedInput(CodingErrorAction.REPLACE)
291e3101cdcd024a26212f73604e0a48b96422be9ecSergio Giro                .replaceWith("\ufffd")
292fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                .onUnmappableCharacter(CodingErrorAction.REPORT);
293fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // Holds the bytes corresponding to the escaped chars being read (empty if the last char
294fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        // wasn't a escaped char).
295fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        ByteBuffer byteBuffer = ByteBuffer.allocate(s.length());
296fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        int i = 0;
297fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        while (i < s.length()) {
298fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            char c = s.charAt(i);
299fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            i++;
300fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            switch (c) {
301fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                case '+':
302fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    flushDecodingByteAccumulator(
303fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            builder, decoder, byteBuffer, throwOnFailure);
304fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    builder.append(convertPlus ? ' ' : '+');
305fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    break;
306fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                case '%':
307fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    // Expect two characters representing a number in hex.
308fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    byte hexValue = 0;
309fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    for (int j = 0; j < 2; j++) {
310fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        try {
311fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            c = getNextCharacter(s, i, s.length(), null /* name */);
312fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        } catch (URISyntaxException e) {
313fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            // Unexpected end of input.
314fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            if (throwOnFailure) {
315fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                throw new IllegalArgumentException(e);
316fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            } else {
317fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                flushDecodingByteAccumulator(
318fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                        builder, decoder, byteBuffer, throwOnFailure);
319fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                builder.append(INVALID_INPUT_CHARACTER);
320fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                return;
321fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            }
322fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        }
323fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        i++;
324fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        int newDigit = hexCharToValue(c);
325fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        if (newDigit < 0) {
326fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            if (throwOnFailure) {
327fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                throw new IllegalArgumentException(
328fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                        unexpectedCharacterException(s, null /* name */, c, i - 1));
329fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            } else {
330fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                flushDecodingByteAccumulator(
331fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                        builder, decoder, byteBuffer, throwOnFailure);
332fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                builder.append(INVALID_INPUT_CHARACTER);
333fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                                break;
334fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                            }
335fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        }
336fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                        hexValue = (byte) (hexValue * 0x10 + newDigit);
337fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    }
338fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    byteBuffer.put(hexValue);
339fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    break;
340fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                default:
341fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
342fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                    builder.append(c);
343fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
344fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
345fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
34656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
34756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
348fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    private static void flushDecodingByteAccumulator(
349fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            StringBuilder builder,
350fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            CharsetDecoder decoder,
351fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            ByteBuffer byteBuffer,
352fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            boolean throwOnFailure) {
353fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        if (byteBuffer.position() == 0) {
354fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            return;
355fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        }
356fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        byteBuffer.flip();
357fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        try {
358fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            builder.append(decoder.decode(byteBuffer));
359fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        } catch (CharacterCodingException e) {
360fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            if (throwOnFailure) {
361fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                throw new IllegalArgumentException(e);
362fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            } else {
363fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                builder.append(INVALID_INPUT_CHARACTER);
364fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            }
365fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        } finally {
366fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            // Use the byte buffer to write again.
367fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            byteBuffer.flip();
368fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro            byteBuffer.limit(byteBuffer.capacity());
36956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson        }
37056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
37156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson
372fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    /**
373fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     * Equivalent to {@code decode(s, false, UTF_8, true)}
374fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro     */
375fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro    public static String decode(String s) {
376fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro        return decode(
377fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro                s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */);
37856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson    }
379fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro}