UriCodec.java revision 10527ac8763cc50fa9eca0d4ce495909899f0b9a
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package libcore.net; 19 20import java.io.ByteArrayOutputStream; 21import java.net.URISyntaxException; 22import java.nio.charset.Charset; 23import java.nio.charset.Charsets; 24 25/** 26 * Encodes and decodes {@code application/x-www-form-urlencoded} content. 27 * Subclasses define exactly which characters are legal. 28 * 29 * <p>By default, UTF-8 is used to encode escaped characters. A single input 30 * character like "\u0080" may be encoded to multiple octets like %C2%80. 31 */ 32public abstract class UriCodec { 33 34 /** 35 * Returns true if {@code c} does not need to be escaped. 36 */ 37 protected abstract boolean isRetained(char c); 38 39 /** 40 * Throws if {@code s} is invalid according to this encoder. 41 */ 42 public final void validate(String s) throws URISyntaxException { 43 for (int i = 0; i < s.length();) { 44 char ch = s.charAt(i); 45 if ((ch >= 'a' && ch <= 'z') 46 || (ch >= 'A' && ch <= 'Z') 47 || (ch >= '0' && ch <= '9') 48 || isRetained(ch)) { 49 i++; 50 } else if (ch == '%') { 51 if (i + 2 >= s.length()) { 52 throw new URISyntaxException(s, "Incomplete % sequence", i); 53 } 54 int d1 = hexToInt(s.charAt(i + 1)); 55 int d2 = hexToInt(s.charAt(i + 2)); 56 if (d1 == -1 || d2 == -1) { 57 throw new URISyntaxException(s, "Invalid % sequence: " + 58 s.substring(i, i + 3), i); 59 } 60 i += 3; 61 } else { 62 throw new URISyntaxException(s, "Illegal character", i); 63 } 64 } 65 } 66 67 /** 68 * Throws if {@code s} contains characters that are not letters, digits or 69 * in {@code legal}. 70 */ 71 public static void validateSimple(String s, String legal) throws URISyntaxException { 72 for (int i = 0; i < s.length(); i++) { 73 char ch = s.charAt(i); 74 if (!((ch >= 'a' && ch <= 'z') 75 || (ch >= 'A' && ch <= 'Z') 76 || (ch >= '0' && ch <= '9') 77 || legal.indexOf(ch) > -1)) { 78 throw new URISyntaxException(s, "Illegal character", i); 79 } 80 } 81 } 82 83 /** 84 * Encodes {@code s} and appends the result to {@code builder}. 85 * 86 * @param isPartiallyEncoded true to fix input that has already been 87 * partially or fully encoded. For example, input of "hello%20world" is 88 * unchanged with isPartiallyEncoded=true but would be double-escaped to 89 * "hello%2520world" otherwise. 90 */ 91 private void appendEncoded(StringBuilder builder, String s, Charset charset, 92 boolean isPartiallyEncoded) { 93 if (s == null) { 94 throw new NullPointerException(); 95 } 96 97 int escapeStart = -1; 98 for (int i = 0; i < s.length(); i++) { 99 char c = s.charAt(i); 100 if ((c >= 'a' && c <= 'z') 101 || (c >= 'A' && c <= 'Z') 102 || (c >= '0' && c <= '9') 103 || isRetained(c) 104 || (c == '%' && isPartiallyEncoded)) { 105 if (escapeStart != -1) { 106 appendHex(builder, s.substring(escapeStart, i), charset); 107 escapeStart = -1; 108 } 109 if (c == '%' && isPartiallyEncoded) { 110 // this is an encoded 3-character sequence like "%20" 111 builder.append(s, i, i + 3); 112 i += 2; 113 } else if (c == ' ') { 114 builder.append('+'); 115 } else { 116 builder.append(c); 117 } 118 } else if (escapeStart == -1) { 119 escapeStart = i; 120 } 121 } 122 if (escapeStart != -1) { 123 appendHex(builder, s.substring(escapeStart, s.length()), charset); 124 } 125 } 126 127 public final String encode(String s, Charset charset) { 128 // Guess a bit larger for encoded form 129 StringBuilder builder = new StringBuilder(s.length() + 16); 130 appendEncoded(builder, s, charset, false); 131 return builder.toString(); 132 } 133 134 public final void appendEncoded(StringBuilder builder, String s) { 135 appendEncoded(builder, s, Charsets.UTF_8, false); 136 } 137 138 public final void appendPartiallyEncoded(StringBuilder builder, String s) { 139 appendEncoded(builder, s, Charsets.UTF_8, true); 140 } 141 142 /** 143 * @param convertPlus true to convert '+' to ' '. 144 */ 145 public static String decode(String s, boolean convertPlus, Charset charset) { 146 if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) { 147 return s; 148 } 149 150 StringBuilder result = new StringBuilder(s.length()); 151 ByteArrayOutputStream out = new ByteArrayOutputStream(); 152 for (int i = 0; i < s.length();) { 153 char c = s.charAt(i); 154 if (c == '%') { 155 do { 156 if (i + 2 >= s.length()) { 157 throw new IllegalArgumentException("Incomplete % sequence at: " + i); 158 } 159 int d1 = hexToInt(s.charAt(i + 1)); 160 int d2 = hexToInt(s.charAt(i + 2)); 161 if (d1 == -1 || d2 == -1) { 162 throw new IllegalArgumentException("Invalid % sequence " + 163 s.substring(i, i + 3) + " at " + i); 164 } 165 out.write((byte) ((d1 << 4) + d2)); 166 i += 3; 167 } while (i < s.length() && s.charAt(i) == '%'); 168 result.append(new String(out.toByteArray(), charset)); 169 out.reset(); 170 } else { 171 if (convertPlus && c == '+') { 172 c = ' '; 173 } 174 result.append(c); 175 i++; 176 } 177 } 178 return result.toString(); 179 } 180 181 /** 182 * Like {@link Character#digit}, but without support for non-ASCII 183 * characters. 184 */ 185 private static int hexToInt(char c) { 186 if ('0' <= c && c <= '9') { 187 return c - '0'; 188 } else if ('a' <= c && c <= 'f') { 189 return 10 + (c - 'a'); 190 } else if ('A' <= c && c <= 'F') { 191 return 10 + (c - 'A'); 192 } else { 193 return -1; 194 } 195 } 196 197 public static String decode(String s) { 198 return decode(s, false, Charsets.UTF_8); 199 } 200 201 private static void appendHex(StringBuilder builder, String s, Charset charset) { 202 for (byte b : s.getBytes(charset)) { 203 appendHex(builder, b); 204 } 205 } 206 207 private static void appendHex(StringBuilder sb, byte b) { 208 sb.append('%'); 209 sb.append(Byte.toHexString(b, true)); 210 } 211} 212