UriCodec.java revision 10527ac8763cc50fa9eca0d4ce495909899f0b9a
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package libcore.net;
19
20import java.io.ByteArrayOutputStream;
21import java.net.URISyntaxException;
22import java.nio.charset.Charset;
23import java.nio.charset.Charsets;
24
25/**
26 * Encodes and decodes {@code application/x-www-form-urlencoded} content.
27 * Subclasses define exactly which characters are legal.
28 *
29 * <p>By default, UTF-8 is used to encode escaped characters. A single input
30 * character like "\u0080" may be encoded to multiple octets like %C2%80.
31 */
32public abstract class UriCodec {
33
34    /**
35     * Returns true if {@code c} does not need to be escaped.
36     */
37    protected abstract boolean isRetained(char c);
38
39    /**
40     * Throws if {@code s} is invalid according to this encoder.
41     */
42    public final void validate(String s) throws URISyntaxException {
43        for (int i = 0; i < s.length();) {
44            char ch = s.charAt(i);
45            if ((ch >= 'a' && ch <= 'z')
46                    || (ch >= 'A' && ch <= 'Z')
47                    || (ch >= '0' && ch <= '9')
48                    || isRetained(ch)) {
49                i++;
50            } else if (ch == '%') {
51                if (i + 2 >= s.length()) {
52                    throw new URISyntaxException(s, "Incomplete % sequence", i);
53                }
54                int d1 = hexToInt(s.charAt(i + 1));
55                int d2 = hexToInt(s.charAt(i + 2));
56                if (d1 == -1 || d2 == -1) {
57                    throw new URISyntaxException(s, "Invalid % sequence: " +
58                            s.substring(i, i + 3), i);
59                }
60                i += 3;
61            } else {
62                throw new URISyntaxException(s, "Illegal character", i);
63            }
64        }
65    }
66
67    /**
68     * Throws if {@code s} contains characters that are not letters, digits or
69     * in {@code legal}.
70     */
71    public static void validateSimple(String s, String legal) throws URISyntaxException {
72        for (int i = 0; i < s.length(); i++) {
73            char ch = s.charAt(i);
74            if (!((ch >= 'a' && ch <= 'z')
75                    || (ch >= 'A' && ch <= 'Z')
76                    || (ch >= '0' && ch <= '9')
77                    || legal.indexOf(ch) > -1)) {
78                throw new URISyntaxException(s, "Illegal character", i);
79            }
80        }
81    }
82
83    /**
84     * Encodes {@code s} and appends the result to {@code builder}.
85     *
86     * @param isPartiallyEncoded true to fix input that has already been
87     *     partially or fully encoded. For example, input of "hello%20world" is
88     *     unchanged with isPartiallyEncoded=true but would be double-escaped to
89     *     "hello%2520world" otherwise.
90     */
91    private void appendEncoded(StringBuilder builder, String s, Charset charset,
92            boolean isPartiallyEncoded) {
93        if (s == null) {
94            throw new NullPointerException();
95        }
96
97        int escapeStart = -1;
98        for (int i = 0; i < s.length(); i++) {
99            char c = s.charAt(i);
100            if ((c >= 'a' && c <= 'z')
101                    || (c >= 'A' && c <= 'Z')
102                    || (c >= '0' && c <= '9')
103                    || isRetained(c)
104                    || (c == '%' && isPartiallyEncoded)) {
105                if (escapeStart != -1) {
106                    appendHex(builder, s.substring(escapeStart, i), charset);
107                    escapeStart = -1;
108                }
109                if (c == '%' && isPartiallyEncoded) {
110                    // this is an encoded 3-character sequence like "%20"
111                    builder.append(s, i, i + 3);
112                    i += 2;
113                } else if (c == ' ') {
114                    builder.append('+');
115                } else {
116                    builder.append(c);
117                }
118            } else if (escapeStart == -1) {
119                escapeStart = i;
120            }
121        }
122        if (escapeStart != -1) {
123            appendHex(builder, s.substring(escapeStart, s.length()), charset);
124        }
125    }
126
127    public final String encode(String s, Charset charset) {
128        // Guess a bit larger for encoded form
129        StringBuilder builder = new StringBuilder(s.length() + 16);
130        appendEncoded(builder, s, charset, false);
131        return builder.toString();
132    }
133
134    public final void appendEncoded(StringBuilder builder, String s) {
135        appendEncoded(builder, s, Charsets.UTF_8, false);
136    }
137
138    public final void appendPartiallyEncoded(StringBuilder builder, String s) {
139        appendEncoded(builder, s, Charsets.UTF_8, true);
140    }
141
142    /**
143     * @param convertPlus true to convert '+' to ' '.
144     */
145    public static String decode(String s, boolean convertPlus, Charset charset) {
146        if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
147            return s;
148        }
149
150        StringBuilder result = new StringBuilder(s.length());
151        ByteArrayOutputStream out = new ByteArrayOutputStream();
152        for (int i = 0; i < s.length();) {
153            char c = s.charAt(i);
154            if (c == '%') {
155                do {
156                    if (i + 2 >= s.length()) {
157                        throw new IllegalArgumentException("Incomplete % sequence at: " + i);
158                    }
159                    int d1 = hexToInt(s.charAt(i + 1));
160                    int d2 = hexToInt(s.charAt(i + 2));
161                    if (d1 == -1 || d2 == -1) {
162                        throw new IllegalArgumentException("Invalid % sequence " +
163                                s.substring(i, i + 3) + " at " + i);
164                    }
165                    out.write((byte) ((d1 << 4) + d2));
166                    i += 3;
167                } while (i < s.length() && s.charAt(i) == '%');
168                result.append(new String(out.toByteArray(), charset));
169                out.reset();
170            } else {
171                if (convertPlus && c == '+') {
172                    c = ' ';
173                }
174                result.append(c);
175                i++;
176            }
177        }
178        return result.toString();
179    }
180
181    /**
182     * Like {@link Character#digit}, but without support for non-ASCII
183     * characters.
184     */
185    private static int hexToInt(char c) {
186        if ('0' <= c && c <= '9') {
187            return c - '0';
188        } else if ('a' <= c && c <= 'f') {
189            return 10 + (c - 'a');
190        } else if ('A' <= c && c <= 'F') {
191            return 10 + (c - 'A');
192        } else {
193            return -1;
194        }
195    }
196
197    public static String decode(String s) {
198        return decode(s, false, Charsets.UTF_8);
199    }
200
201    private static void appendHex(StringBuilder builder, String s, Charset charset) {
202        for (byte b : s.getBytes(charset)) {
203            appendHex(builder, b);
204        }
205    }
206
207    private static void appendHex(StringBuilder sb, byte b) {
208        sb.append('%');
209        sb.append(Byte.toHexString(b, true));
210    }
211}
212