156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/* 256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Licensed to the Apache Software Foundation (ASF) under one or more 356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * contributor license agreements. See the NOTICE file distributed with 456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * this work for additional information regarding copyright ownership. 556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * The ASF licenses this file to You under the Apache License, Version 2.0 656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * (the "License"); you may not use this file except in compliance with 756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * the License. You may obtain a copy of the License at 856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * http://www.apache.org/licenses/LICENSE-2.0 1056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 1156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Unless required by applicable law or agreed to in writing, software 1256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * distributed under the License is distributed on an "AS IS" BASIS, 1356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * See the License for the specific language governing permissions and 1556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * limitations under the License. 1656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 1756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 1856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpackage libcore.net; 1956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 2056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.io.ByteArrayOutputStream; 2156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.net.URISyntaxException; 2256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.nio.charset.Charset; 232a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughesimport java.nio.charset.StandardCharsets; 2456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 2556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/** 2656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Encodes and decodes {@code application/x-www-form-urlencoded} content. 2756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Subclasses define exactly which characters are legal. 2856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 2956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * <p>By default, UTF-8 is used to encode escaped characters. A single input 3056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * character like "\u0080" may be encoded to multiple octets like %C2%80. 3156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 3256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpublic abstract class UriCodec { 3356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 3456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 3556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Returns true if {@code c} does not need to be escaped. 3656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 3756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson protected abstract boolean isRetained(char c); 3856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 3956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 4056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Throws if {@code s} is invalid according to this encoder. 4156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 42ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson public final String validate(String uri, int start, int end, String name) 43ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throws URISyntaxException { 44ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson for (int i = start; i < end; ) { 45ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson char ch = uri.charAt(i); 4656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if ((ch >= 'a' && ch <= 'z') 4756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (ch >= 'A' && ch <= 'Z') 4856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (ch >= '0' && ch <= '9') 4956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || isRetained(ch)) { 5056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson i++; 5156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } else if (ch == '%') { 52ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson if (i + 2 >= end) { 53ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i); 5456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 55ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson int d1 = hexToInt(uri.charAt(i + 1)); 56ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson int d2 = hexToInt(uri.charAt(i + 2)); 5756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (d1 == -1 || d2 == -1) { 58ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throw new URISyntaxException(uri, "Invalid % sequence: " 59ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson + uri.substring(i, i + 3) + " in " + name, i); 6056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 6156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson i += 3; 6256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } else { 63ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throw new URISyntaxException(uri, "Illegal character in " + name, i); 6456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 6556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 66ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson return uri.substring(start, end); 6756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 6856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 6956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 7056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Throws if {@code s} contains characters that are not letters, digits or 7156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * in {@code legal}. 7256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 73ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson public static void validateSimple(String s, String legal) 74ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throws URISyntaxException { 7556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson for (int i = 0; i < s.length(); i++) { 7656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson char ch = s.charAt(i); 7756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (!((ch >= 'a' && ch <= 'z') 7856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (ch >= 'A' && ch <= 'Z') 7956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (ch >= '0' && ch <= '9') 8056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || legal.indexOf(ch) > -1)) { 8156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson throw new URISyntaxException(s, "Illegal character", i); 8256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 8656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 8756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * Encodes {@code s} and appends the result to {@code builder}. 8832559028b14b9b321b10eede050afd554a376569Jesse Wilson * 8932559028b14b9b321b10eede050afd554a376569Jesse Wilson * @param isPartiallyEncoded true to fix input that has already been 9032559028b14b9b321b10eede050afd554a376569Jesse Wilson * partially or fully encoded. For example, input of "hello%20world" is 9132559028b14b9b321b10eede050afd554a376569Jesse Wilson * unchanged with isPartiallyEncoded=true but would be double-escaped to 9232559028b14b9b321b10eede050afd554a376569Jesse Wilson * "hello%2520world" otherwise. 9356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 9432559028b14b9b321b10eede050afd554a376569Jesse Wilson private void appendEncoded(StringBuilder builder, String s, Charset charset, 9532559028b14b9b321b10eede050afd554a376569Jesse Wilson boolean isPartiallyEncoded) { 9656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (s == null) { 97d43b9ef11a1095967a3396b246639b563e1a4128Kenny Root throw new NullPointerException("s == null"); 9856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 9956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 10056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson int escapeStart = -1; 10156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson for (int i = 0; i < s.length(); i++) { 10256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson char c = s.charAt(i); 103786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson if ((c >= 'a' && c <= 'z') 10456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (c >= 'A' && c <= 'Z') 10556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson || (c >= '0' && c <= '9') 106786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson || isRetained(c) 107786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson || (c == '%' && isPartiallyEncoded)) { 10856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (escapeStart != -1) { 10956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson appendHex(builder, s.substring(escapeStart, i), charset); 11056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson escapeStart = -1; 11156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 112786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson if (c == '%' && isPartiallyEncoded) { 113786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson // this is an encoded 3-character sequence like "%20" 114bb1546d2c87c6f7e996a83b6f0efca17453d53dbBrian Carlstrom builder.append(s, i, Math.min(i + 3, s.length())); 115786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson i += 2; 116786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson } else if (c == ' ') { 11756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson builder.append('+'); 118786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson } else { 119786ca658dfe8596835859ce45ee3b0ed46b8f21cJesse Wilson builder.append(c); 12056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 12156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } else if (escapeStart == -1) { 12256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson escapeStart = i; 12356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 12456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 12556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (escapeStart != -1) { 12656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson appendHex(builder, s.substring(escapeStart, s.length()), charset); 12756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 12856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 12956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 13010527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final String encode(String s, Charset charset) { 13132559028b14b9b321b10eede050afd554a376569Jesse Wilson // Guess a bit larger for encoded form 13232559028b14b9b321b10eede050afd554a376569Jesse Wilson StringBuilder builder = new StringBuilder(s.length() + 16); 13332559028b14b9b321b10eede050afd554a376569Jesse Wilson appendEncoded(builder, s, charset, false); 13432559028b14b9b321b10eede050afd554a376569Jesse Wilson return builder.toString(); 13556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 13656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 13710527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final void appendEncoded(StringBuilder builder, String s) { 1382a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes appendEncoded(builder, s, StandardCharsets.UTF_8, false); 13932559028b14b9b321b10eede050afd554a376569Jesse Wilson } 14056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 14110527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final void appendPartiallyEncoded(StringBuilder builder, String s) { 1422a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes appendEncoded(builder, s, StandardCharsets.UTF_8, true); 14356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 14456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 14556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 14656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * @param convertPlus true to convert '+' to ' '. 147783631d82682027beaf80540ab08912fd56fea0cJesse Wilson * @param throwOnFailure true to throw an IllegalArgumentException on 148783631d82682027beaf80540ab08912fd56fea0cJesse Wilson * invalid escape sequences; false to replace them with the replacement 149783631d82682027beaf80540ab08912fd56fea0cJesse Wilson * character (U+fffd). 15056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 151783631d82682027beaf80540ab08912fd56fea0cJesse Wilson public static String decode(String s, boolean convertPlus, Charset charset, 152783631d82682027beaf80540ab08912fd56fea0cJesse Wilson boolean throwOnFailure) { 15356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) { 15456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson return s; 15556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 15656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 15756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson StringBuilder result = new StringBuilder(s.length()); 15856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson ByteArrayOutputStream out = new ByteArrayOutputStream(); 15956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson for (int i = 0; i < s.length();) { 16056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson char c = s.charAt(i); 16156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (c == '%') { 16256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson do { 163783631d82682027beaf80540ab08912fd56fea0cJesse Wilson int d1, d2; 164783631d82682027beaf80540ab08912fd56fea0cJesse Wilson if (i + 2 < s.length() 165783631d82682027beaf80540ab08912fd56fea0cJesse Wilson && (d1 = hexToInt(s.charAt(i + 1))) != -1 166783631d82682027beaf80540ab08912fd56fea0cJesse Wilson && (d2 = hexToInt(s.charAt(i + 2))) != -1) { 167783631d82682027beaf80540ab08912fd56fea0cJesse Wilson out.write((byte) ((d1 << 4) + d2)); 168783631d82682027beaf80540ab08912fd56fea0cJesse Wilson } else if (throwOnFailure) { 169783631d82682027beaf80540ab08912fd56fea0cJesse Wilson throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s); 170783631d82682027beaf80540ab08912fd56fea0cJesse Wilson } else { 171783631d82682027beaf80540ab08912fd56fea0cJesse Wilson byte[] replacement = "\ufffd".getBytes(charset); 172783631d82682027beaf80540ab08912fd56fea0cJesse Wilson out.write(replacement, 0, replacement.length); 17356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 17456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson i += 3; 17556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } while (i < s.length() && s.charAt(i) == '%'); 17656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson result.append(new String(out.toByteArray(), charset)); 17756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson out.reset(); 17856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } else { 17956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson if (convertPlus && c == '+') { 18056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson c = ' '; 18156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 18256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson result.append(c); 18356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson i++; 18456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 18556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 18656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson return result.toString(); 18756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 18856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 18932559028b14b9b321b10eede050afd554a376569Jesse Wilson /** 19032559028b14b9b321b10eede050afd554a376569Jesse Wilson * Like {@link Character#digit}, but without support for non-ASCII 19132559028b14b9b321b10eede050afd554a376569Jesse Wilson * characters. 19232559028b14b9b321b10eede050afd554a376569Jesse Wilson */ 19332559028b14b9b321b10eede050afd554a376569Jesse Wilson private static int hexToInt(char c) { 19432559028b14b9b321b10eede050afd554a376569Jesse Wilson if ('0' <= c && c <= '9') { 19532559028b14b9b321b10eede050afd554a376569Jesse Wilson return c - '0'; 19632559028b14b9b321b10eede050afd554a376569Jesse Wilson } else if ('a' <= c && c <= 'f') { 19732559028b14b9b321b10eede050afd554a376569Jesse Wilson return 10 + (c - 'a'); 19832559028b14b9b321b10eede050afd554a376569Jesse Wilson } else if ('A' <= c && c <= 'F') { 19932559028b14b9b321b10eede050afd554a376569Jesse Wilson return 10 + (c - 'A'); 20032559028b14b9b321b10eede050afd554a376569Jesse Wilson } else { 20132559028b14b9b321b10eede050afd554a376569Jesse Wilson return -1; 20232559028b14b9b321b10eede050afd554a376569Jesse Wilson } 20332559028b14b9b321b10eede050afd554a376569Jesse Wilson } 20432559028b14b9b321b10eede050afd554a376569Jesse Wilson 20556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson public static String decode(String s) { 2062a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes return decode(s, false, StandardCharsets.UTF_8, true); 20756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 20856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 20956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson private static void appendHex(StringBuilder builder, String s, Charset charset) { 21056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson for (byte b : s.getBytes(charset)) { 21156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson appendHex(builder, b); 21256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 21356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 21456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 21556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson private static void appendHex(StringBuilder sb, byte b) { 21656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson sb.append('%'); 21756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson sb.append(Byte.toHexString(b, true)); 21856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 21956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson} 220