156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/* 2fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Copyright (C) 2015 The Android Open Source Project 356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 4fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Licensed under the Apache License, Version 2.0 (the "License"); 5fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * you may not use this file except in compliance with the License. 6fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * You may obtain a copy of the License at 756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 8fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * http://www.apache.org/licenses/LICENSE-2.0 9fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 10fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Unless required by applicable law or agreed to in writing, software 11fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * distributed under the License is distributed on an "AS IS" BASIS, 12fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * See the License for the specific language governing permissions and 14fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * limitations under the License 1556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 1656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 1756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpackage libcore.net; 1856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 1956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.io.ByteArrayOutputStream; 2056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.net.URISyntaxException; 21fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.ByteBuffer; 22fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.CharBuffer; 23fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharacterCodingException; 2456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonimport java.nio.charset.Charset; 25fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharsetDecoder; 26fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CharsetEncoder; 27fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CoderResult; 28fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giroimport java.nio.charset.CodingErrorAction; 292a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughesimport java.nio.charset.StandardCharsets; 3056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 3156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson/** 32fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encodes and decodes “application/x-www-form-urlencoded” content. 3356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson * 34fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t. 35fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to 36fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form. 37fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * e.g a 4 byte sequence might look like” %c6%ef%e0%e8” 3856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 3956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilsonpublic abstract class UriCodec { 4056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 41fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Returns true iff. ‘c’ does not need to be escaped. 42fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be 43fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * escaped. This set is referred to as the ``whitelist''. 4456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 4556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson protected abstract boolean isRetained(char c); 4656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 47fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static boolean isWhitelisted(char c) { 48fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9'); 49fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 50fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 51fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private boolean isWhitelistedOrRetained(char c) { 52595904f2798b24d16b7b180a278ee964dcefd637Sergio Giro return isWhitelisted(c) || isRetained(c); 53fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 54fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 5556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 56fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Throw URISyntaxException if any of the characters in the range [start, end) are not valid 57fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * according to this codec. 58fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * - If a char is in the whitelist or retained, it is valid both escaped and unescaped. 59fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * - All escaped octets appearing in the input are structurally valid hex, i.e convertible to 60fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * decimals. 61fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 62fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * On success, the substring [start, end) is returned. 63fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * {@code name} is not used, except to generate debugging info. 6456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 65ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson public final String validate(String uri, int start, int end, String name) 66ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson throws URISyntaxException { 67fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro int i = start; 68fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro while (i < end) { 69fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro char c = uri.charAt(i++); 70fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (isWhitelistedOrRetained(c)) { 71fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro continue; 72fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 73fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // c is either '%' or character not allowed in a uri. 74fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (c != '%') { 75fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw unexpectedCharacterException(uri, name, c, i - 1); 76fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 77fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Expect two characters representing a number in hex. 78fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro for (int j = 0; j < 2; j++) { 79fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro c = getNextCharacter(uri, i++, end, name); 80fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (hexCharToValue(c) < 0) { 81fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw unexpectedCharacterException(uri, name, c, i - 1); 8256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 85ce257b03a1e5ff6075967e6a84cdb439cb2b01c8Jesse Wilson return uri.substring(start, end); 8656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 8756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 8856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson /** 89fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f'). 9056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson */ 91fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static int hexCharToValue(char c) { 92fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if('0' <= c && c <= '9') { 93fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return c - '0'; 94fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 95fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if ('a' <= c && c <= 'f') { 96fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return 10 + c - 'a'; 97fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 98fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if ('A' <= c && c <= 'F') { 99fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return 10 + c - 'A'; 10056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 101fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return -1; 10256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 10356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 104fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static URISyntaxException unexpectedCharacterException( 105fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro String uri, String name, char unexpected, int index) { 106fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro String nameString = (name == null) ? "" : " in [" + name + "]"; 107fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return new URISyntaxException( 108fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro uri, "Unexpected character" + nameString + ": " + unexpected, index); 109fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 110fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 111fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static char getNextCharacter(String uri, int index, int end, String name) 112fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throws URISyntaxException { 113fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (index >= end) { 114fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro String nameString = (name == null) ? "" : " in [" + name + "]"; 115fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new URISyntaxException( 116fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro uri, "Unexpected end of string" + nameString, index); 11756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 118fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return uri.charAt(index); 119fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 12056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 121fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 122fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor 123fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * in {@code legal}. 124fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 125fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro public static void validateSimple(String uri, String legal) throws URISyntaxException { 126fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro for (int i = 0; i < uri.length(); i++) { 127fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro char c = uri.charAt(i); 128fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (!isWhitelisted(c) && legal.indexOf(c) < 0) { 129fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw unexpectedCharacterException(uri, null /* name */, c, i); 13056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 13156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 13256099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 13356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 134fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 135fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 136fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 137fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes. 138fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 13910527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final String encode(String s, Charset charset) { 140fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder = new StringBuilder(s.length()); 14132559028b14b9b321b10eede050afd554a376569Jesse Wilson appendEncoded(builder, s, charset, false); 14232559028b14b9b321b10eede050afd554a376569Jesse Wilson return builder.toString(); 14356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 14456099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 145fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 146fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 147fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 148fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 149fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 15010527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final void appendEncoded(StringBuilder builder, String s) { 1512a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes appendEncoded(builder, s, StandardCharsets.UTF_8, false); 15232559028b14b9b321b10eede050afd554a376569Jesse Wilson } 15356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 154fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 155fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 156fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 157fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 158fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * This method must produce partially encoded output. What this means is that if encoded octets 159fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * appear in the input string, they are passed through unmodified, instead of being double 160fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * escaped. Consider a decoder operating on the global whitelist dealing with a string 161fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it 162fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * will be double encoded into “foo%2525bar”. 163fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 16410527ac8763cc50fa9eca0d4ce495909899f0b9aJesse Wilson public final void appendPartiallyEncoded(StringBuilder builder, String s) { 1652a6f23ff8690ac2f025588a360547ce96cde0943Elliott Hughes appendEncoded(builder, s, StandardCharsets.UTF_8, true); 16656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 16756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 168fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private void appendEncoded( 169fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) { 170fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharsetEncoder encoder = charset.newEncoder() 171fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro .onMalformedInput(CodingErrorAction.REPORT) 172fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro .onUnmappableCharacter(CodingErrorAction.REPORT); 173fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharBuffer cBuffer = CharBuffer.allocate(s.length()); 174fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro for (int i = 0; i < s.length(); i++) { 17556099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson char c = s.charAt(i); 176fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (c == '%' && partiallyEncoded) { 177fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // In case there are characters waiting to be encoded. 178fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushEncodingCharBuffer(builder, encoder, cBuffer); 179fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append('%'); 180fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro continue; 181fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 182fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 183fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (c == ' ' && isRetained(' ')) { 184595904f2798b24d16b7b180a278ee964dcefd637Sergio Giro flushEncodingCharBuffer(builder, encoder, cBuffer); 185fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append('+'); 186fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro continue; 187fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 188fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 189fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (isWhitelistedOrRetained(c)) { 190fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushEncodingCharBuffer(builder, encoder, cBuffer); 191fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(c); 192fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro continue; 19356099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 194fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 195fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Put the character in the queue for encoding. 196fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro cBuffer.put(c); 19756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 198fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushEncodingCharBuffer(builder, encoder, cBuffer); 19956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 20056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 201fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static void flushEncodingCharBuffer( 202fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder, 203fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharsetEncoder encoder, 204fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharBuffer cBuffer) { 205fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (cBuffer.position() == 0) { 206fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return; 207fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 208fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // We are reading from the buffer now. 209fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro cBuffer.flip(); 210fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro ByteBuffer byteBuffer = ByteBuffer.allocate( 211fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar())); 212fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.position(0); 213fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */); 214fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // According to the {@code CharsetEncoder#encode} spec, the method returns underflow 215fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // and leaves an empty output when all bytes were processed correctly. 216fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (result != CoderResult.UNDERFLOW) { 217fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException( 218fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro "Error encoding, unexpected result [" 219fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + result.toString() 220fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + "] using encoder for [" 221fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + encoder.charset().name() 222fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + "]"); 223fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 224fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (cBuffer.hasRemaining()) { 225fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException( 226fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro "Encoder for [" + encoder.charset().name() + "] failed with underflow with " 227fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + "remaining input [" + cBuffer + "]"); 228fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 229fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Need to flush in case the encoder saves internal state. 230fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro encoder.flush(byteBuffer); 231fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (result != CoderResult.UNDERFLOW) { 232fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException( 233fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro "Error encoding, unexpected result [" 234fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + result.toString() 235fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + "] flushing encoder for [" 236fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + encoder.charset().name() 237fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro + "]"); 238fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 239fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro encoder.reset(); 240fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 241fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.flip(); 242fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Write the encoded bytes. 243fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro while(byteBuffer.hasRemaining()) { 244fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byte b = byteBuffer.get(); 245fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append('%'); 246fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(intToHexDigit((b & 0xf0) >>> 4)); 247fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(intToHexDigit(b & 0x0f)); 248fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 249fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 250fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Use the character buffer to write again. 251fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro cBuffer.flip(); 252fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro cBuffer.limit(cBuffer.capacity()); 253fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 254fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 255fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static char intToHexDigit(int b) { 256fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (b < 10) { 257fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return (char) ('0' + b); 25832559028b14b9b321b10eede050afd554a376569Jesse Wilson } else { 259fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return (char) ('A' + b - 10); 26032559028b14b9b321b10eede050afd554a376569Jesse Wilson } 26132559028b14b9b321b10eede050afd554a376569Jesse Wilson } 26232559028b14b9b321b10eede050afd554a376569Jesse Wilson 263fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 264fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Decode a string according to the rules of this decoder. 265fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * 266fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘ 267fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * (white space) 268fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for 269fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets. 270fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 271fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro public static String decode( 272fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro String s, boolean convertPlus, Charset charset, boolean throwOnFailure) { 273fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder = new StringBuilder(s.length()); 274fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro appendDecoded(builder, s, convertPlus, charset, throwOnFailure); 275fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return builder.toString(); 276fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 277fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 278fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 279fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Character to be output when there's an error decoding an input. 280fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 281fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static final char INVALID_INPUT_CHARACTER = '\ufffd'; 282fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro 283fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static void appendDecoded( 284fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder, 285fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro String s, 286fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro boolean convertPlus, 287fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro Charset charset, 288fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro boolean throwOnFailure) { 289fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharsetDecoder decoder = charset.newDecoder() 290e3101cdcd024a26212f73604e0a48b96422be9ecSergio Giro .onMalformedInput(CodingErrorAction.REPLACE) 291e3101cdcd024a26212f73604e0a48b96422be9ecSergio Giro .replaceWith("\ufffd") 292fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro .onUnmappableCharacter(CodingErrorAction.REPORT); 293fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Holds the bytes corresponding to the escaped chars being read (empty if the last char 294fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // wasn't a escaped char). 295fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro ByteBuffer byteBuffer = ByteBuffer.allocate(s.length()); 296fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro int i = 0; 297fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro while (i < s.length()) { 298fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro char c = s.charAt(i); 299fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro i++; 300fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro switch (c) { 301fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro case '+': 302fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushDecodingByteAccumulator( 303fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder, decoder, byteBuffer, throwOnFailure); 304fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(convertPlus ? ' ' : '+'); 305fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro break; 306fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro case '%': 307fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Expect two characters representing a number in hex. 308fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byte hexValue = 0; 309fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro for (int j = 0; j < 2; j++) { 310fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro try { 311fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro c = getNextCharacter(s, i, s.length(), null /* name */); 312fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } catch (URISyntaxException e) { 313fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Unexpected end of input. 314fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (throwOnFailure) { 315fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException(e); 316fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } else { 317fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushDecodingByteAccumulator( 318fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder, decoder, byteBuffer, throwOnFailure); 319fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(INVALID_INPUT_CHARACTER); 320fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return; 321fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 322fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 323fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro i++; 324fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro int newDigit = hexCharToValue(c); 325fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (newDigit < 0) { 326fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (throwOnFailure) { 327fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException( 328fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro unexpectedCharacterException(s, null /* name */, c, i - 1)); 329fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } else { 330fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushDecodingByteAccumulator( 331fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder, decoder, byteBuffer, throwOnFailure); 332fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(INVALID_INPUT_CHARACTER); 333fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro break; 334fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 335fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 336fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro hexValue = (byte) (hexValue * 0x10 + newDigit); 337fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 338fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.put(hexValue); 339fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro break; 340fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro default: 341fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 342fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(c); 343fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 344fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 345fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 34656099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 34756099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 348fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro private static void flushDecodingByteAccumulator( 349fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro StringBuilder builder, 350fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro CharsetDecoder decoder, 351fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro ByteBuffer byteBuffer, 352fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro boolean throwOnFailure) { 353fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (byteBuffer.position() == 0) { 354fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return; 355fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 356fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.flip(); 357fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro try { 358fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(decoder.decode(byteBuffer)); 359fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } catch (CharacterCodingException e) { 360fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro if (throwOnFailure) { 361fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro throw new IllegalArgumentException(e); 362fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } else { 363fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro builder.append(INVALID_INPUT_CHARACTER); 364fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } 365fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro } finally { 366fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro // Use the byte buffer to write again. 367fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.flip(); 368fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro byteBuffer.limit(byteBuffer.capacity()); 36956099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 37056099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 37156099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson 372fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro /** 373fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro * Equivalent to {@code decode(s, false, UTF_8, true)} 374fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro */ 375fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro public static String decode(String s) { 376fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro return decode( 377fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */); 37856099d23fcb002b164bff8fb7f14d6ec0453509eJesse Wilson } 379fda56554dbf5caf1af1982cad020a8dca5632244Sergio Giro}