1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package libcore.net;
19
20import java.io.ByteArrayOutputStream;
21import java.net.URISyntaxException;
22import java.nio.charset.Charset;
23import java.nio.charset.Charsets;
24
25/**
26 * Encodes and decodes {@code application/x-www-form-urlencoded} content.
27 * Subclasses define exactly which characters are legal.
28 *
29 * <p>By default, UTF-8 is used to encode escaped characters. A single input
30 * character like "\u0080" may be encoded to multiple octets like %C2%80.
31 */
32public abstract class UriCodec {
33
34    /**
35     * Returns true if {@code c} does not need to be escaped.
36     */
37    protected abstract boolean isRetained(char c);
38
39    /**
40     * Throws if {@code s} is invalid according to this encoder.
41     */
42    public final String validate(String uri, int start, int end, String name)
43            throws URISyntaxException {
44        for (int i = start; i < end; ) {
45            char ch = uri.charAt(i);
46            if ((ch >= 'a' && ch <= 'z')
47                    || (ch >= 'A' && ch <= 'Z')
48                    || (ch >= '0' && ch <= '9')
49                    || isRetained(ch)) {
50                i++;
51            } else if (ch == '%') {
52                if (i + 2 >= end) {
53                    throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i);
54                }
55                int d1 = hexToInt(uri.charAt(i + 1));
56                int d2 = hexToInt(uri.charAt(i + 2));
57                if (d1 == -1 || d2 == -1) {
58                    throw new URISyntaxException(uri, "Invalid % sequence: "
59                            + uri.substring(i, i + 3) + " in " + name, i);
60                }
61                i += 3;
62            } else {
63                throw new URISyntaxException(uri, "Illegal character in " + name, i);
64            }
65        }
66        return uri.substring(start, end);
67    }
68
69    /**
70     * Throws if {@code s} contains characters that are not letters, digits or
71     * in {@code legal}.
72     */
73    public static void validateSimple(String s, String legal)
74            throws URISyntaxException {
75        for (int i = 0; i < s.length(); i++) {
76            char ch = s.charAt(i);
77            if (!((ch >= 'a' && ch <= 'z')
78                    || (ch >= 'A' && ch <= 'Z')
79                    || (ch >= '0' && ch <= '9')
80                    || legal.indexOf(ch) > -1)) {
81                throw new URISyntaxException(s, "Illegal character", i);
82            }
83        }
84    }
85
86    /**
87     * Encodes {@code s} and appends the result to {@code builder}.
88     *
89     * @param isPartiallyEncoded true to fix input that has already been
90     *     partially or fully encoded. For example, input of "hello%20world" is
91     *     unchanged with isPartiallyEncoded=true but would be double-escaped to
92     *     "hello%2520world" otherwise.
93     */
94    private void appendEncoded(StringBuilder builder, String s, Charset charset,
95            boolean isPartiallyEncoded) {
96        if (s == null) {
97            throw new NullPointerException("s == null");
98        }
99
100        int escapeStart = -1;
101        for (int i = 0; i < s.length(); i++) {
102            char c = s.charAt(i);
103            if ((c >= 'a' && c <= 'z')
104                    || (c >= 'A' && c <= 'Z')
105                    || (c >= '0' && c <= '9')
106                    || isRetained(c)
107                    || (c == '%' && isPartiallyEncoded)) {
108                if (escapeStart != -1) {
109                    appendHex(builder, s.substring(escapeStart, i), charset);
110                    escapeStart = -1;
111                }
112                if (c == '%' && isPartiallyEncoded) {
113                    // this is an encoded 3-character sequence like "%20"
114                    builder.append(s, i, i + 3);
115                    i += 2;
116                } else if (c == ' ') {
117                    builder.append('+');
118                } else {
119                    builder.append(c);
120                }
121            } else if (escapeStart == -1) {
122                escapeStart = i;
123            }
124        }
125        if (escapeStart != -1) {
126            appendHex(builder, s.substring(escapeStart, s.length()), charset);
127        }
128    }
129
130    public final String encode(String s, Charset charset) {
131        // Guess a bit larger for encoded form
132        StringBuilder builder = new StringBuilder(s.length() + 16);
133        appendEncoded(builder, s, charset, false);
134        return builder.toString();
135    }
136
137    public final void appendEncoded(StringBuilder builder, String s) {
138        appendEncoded(builder, s, Charsets.UTF_8, false);
139    }
140
141    public final void appendPartiallyEncoded(StringBuilder builder, String s) {
142        appendEncoded(builder, s, Charsets.UTF_8, true);
143    }
144
145    /**
146     * @param convertPlus true to convert '+' to ' '.
147     * @param throwOnFailure true to throw an IllegalArgumentException on
148     *     invalid escape sequences; false to replace them with the replacement
149     *     character (U+fffd).
150     */
151    public static String decode(String s, boolean convertPlus, Charset charset,
152            boolean throwOnFailure) {
153        if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
154            return s;
155        }
156
157        StringBuilder result = new StringBuilder(s.length());
158        ByteArrayOutputStream out = new ByteArrayOutputStream();
159        for (int i = 0; i < s.length();) {
160            char c = s.charAt(i);
161            if (c == '%') {
162                do {
163                    int d1, d2;
164                    if (i + 2 < s.length()
165                            && (d1 = hexToInt(s.charAt(i + 1))) != -1
166                            && (d2 = hexToInt(s.charAt(i + 2))) != -1) {
167                        out.write((byte) ((d1 << 4) + d2));
168                    } else if (throwOnFailure) {
169                        throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s);
170                    } else {
171                        byte[] replacement = "\ufffd".getBytes(charset);
172                        out.write(replacement, 0, replacement.length);
173                    }
174                    i += 3;
175                } while (i < s.length() && s.charAt(i) == '%');
176                result.append(new String(out.toByteArray(), charset));
177                out.reset();
178            } else {
179                if (convertPlus && c == '+') {
180                    c = ' ';
181                }
182                result.append(c);
183                i++;
184            }
185        }
186        return result.toString();
187    }
188
189    /**
190     * Like {@link Character#digit}, but without support for non-ASCII
191     * characters.
192     */
193    private static int hexToInt(char c) {
194        if ('0' <= c && c <= '9') {
195            return c - '0';
196        } else if ('a' <= c && c <= 'f') {
197            return 10 + (c - 'a');
198        } else if ('A' <= c && c <= 'F') {
199            return 10 + (c - 'A');
200        } else {
201            return -1;
202        }
203    }
204
205    public static String decode(String s) {
206        return decode(s, false, Charsets.UTF_8, true);
207    }
208
209    private static void appendHex(StringBuilder builder, String s, Charset charset) {
210        for (byte b : s.getBytes(charset)) {
211            appendHex(builder, b);
212        }
213    }
214
215    private static void appendHex(StringBuilder sb, byte b) {
216        sb.append('%');
217        sb.append(Byte.toHexString(b, true));
218    }
219}
220