1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.ByteArrayOutputStream; 21import java.io.UnsupportedEncodingException; 22 23import org.apache.harmony.luni.util.Msg; 24 25/** 26 * This class is used to encode a string using the format required by {@code 27 * application/x-www-form-urlencoded} MIME content type. It contains helper 28 * methods used by the URI class, and performs encoding and decoding in a 29 * slightly different way than {@code URLEncoder} and {@code URLDecoder}. 30 */ 31class URIEncoderDecoder { 32 33 static final String digits = "0123456789ABCDEF"; //$NON-NLS-1$ 34 35 static final String encoding = "UTF8"; //$NON-NLS-1$ 36 37 /** 38 * Validate a string by checking if it contains any characters other than: 39 * 1. letters ('a'..'z', 'A'..'Z') 2. numbers ('0'..'9') 3. characters in 40 * the legalset parameter 4. others (unicode characters that are not in 41 * US-ASCII set, and are not ISO Control or are not ISO Space characters) 42 * <p> 43 * called from {@code URI.Helper.parseURI()} to validate each component 44 * 45 * @param s 46 * {@code java.lang.String} the string to be validated 47 * @param legal 48 * {@code java.lang.String} the characters allowed in the String 49 * s 50 */ 51 static void validate(String s, String legal) throws URISyntaxException { 52 for (int i = 0; i < s.length();) { 53 char ch = s.charAt(i); 54 if (ch == '%') { 55 do { 56 if (i + 2 >= s.length()) { 57 throw new URISyntaxException(s, Msg.getString("K0313"), //$NON-NLS-1$ 58 i); 59 } 60 int d1 = Character.digit(s.charAt(i + 1), 16); 61 int d2 = Character.digit(s.charAt(i + 2), 16); 62 if (d1 == -1 || d2 == -1) { 63 throw new URISyntaxException(s, Msg.getString("K0314", //$NON-NLS-1$ 64 s.substring(i, i + 3)), i); 65 } 66 67 i += 3; 68 } while (i < s.length() && s.charAt(i) == '%'); 69 70 continue; 71 } 72 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 73 || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1 || (ch > 127 74 && !Character.isSpaceChar(ch) && !Character 75 .isISOControl(ch)))) { 76 throw new URISyntaxException(s, Msg.getString("K00c1"), i); //$NON-NLS-1$ 77 } 78 i++; 79 } 80 } 81 82 static void validateSimple(String s, String legal) 83 throws URISyntaxException { 84 for (int i = 0; i < s.length();) { 85 char ch = s.charAt(i); 86 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 87 || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1)) { 88 throw new URISyntaxException(s, Msg.getString("K00c1"), i); //$NON-NLS-1$ 89 } 90 i++; 91 } 92 } 93 94 /** 95 * All characters except letters ('a'..'z', 'A'..'Z') and numbers ('0'..'9') 96 * and legal characters are converted into their hexidecimal value prepended 97 * by '%'. 98 * <p> 99 * For example: '#' -> %23 100 * Other characters, which are unicode chars that are not US-ASCII, and are 101 * not ISO Control or are not ISO Space chars, are preserved. 102 * <p> 103 * Called from {@code URI.quoteComponent()} (for multiple argument 104 * constructors) 105 * 106 * @param s 107 * java.lang.String the string to be converted 108 * @param legal 109 * java.lang.String the characters allowed to be preserved in the 110 * string s 111 * @return java.lang.String the converted string 112 */ 113 static String quoteIllegal(String s, String legal) 114 throws UnsupportedEncodingException { 115 StringBuilder buf = new StringBuilder(); 116 for (int i = 0; i < s.length(); i++) { 117 char ch = s.charAt(i); 118 if ((ch >= 'a' && ch <= 'z') 119 || (ch >= 'A' && ch <= 'Z') 120 || (ch >= '0' && ch <= '9') 121 || legal.indexOf(ch) > -1 122 || (ch > 127 && !Character.isSpaceChar(ch) && !Character 123 .isISOControl(ch))) { 124 buf.append(ch); 125 } else { 126 byte[] bytes = new String(new char[] { ch }).getBytes(encoding); 127 for (int j = 0; j < bytes.length; j++) { 128 buf.append('%'); 129 buf.append(digits.charAt((bytes[j] & 0xf0) >> 4)); 130 buf.append(digits.charAt(bytes[j] & 0xf)); 131 } 132 } 133 } 134 return buf.toString(); 135 } 136 137 /** 138 * Other characters, which are Unicode chars that are not US-ASCII, and are 139 * not ISO Control or are not ISO Space chars are not preserved. They are 140 * converted into their hexidecimal value prepended by '%'. 141 * <p> 142 * For example: Euro currency symbol -> "%E2%82%AC". 143 * <p> 144 * Called from URI.toASCIIString() 145 * 146 * @param s 147 * java.lang.String the string to be converted 148 * @return java.lang.String the converted string 149 */ 150 static String encodeOthers(String s) throws UnsupportedEncodingException { 151 StringBuilder buf = new StringBuilder(); 152 for (int i = 0; i < s.length(); i++) { 153 char ch = s.charAt(i); 154 if (ch <= 127) { 155 buf.append(ch); 156 } else { 157 byte[] bytes = new String(new char[] { ch }).getBytes(encoding); 158 for (int j = 0; j < bytes.length; j++) { 159 buf.append('%'); 160 buf.append(digits.charAt((bytes[j] & 0xf0) >> 4)); 161 buf.append(digits.charAt(bytes[j] & 0xf)); 162 } 163 } 164 } 165 return buf.toString(); 166 } 167 168 /** 169 * Decodes the string argument which is assumed to be encoded in the {@code 170 * x-www-form-urlencoded} MIME content type using the UTF-8 encoding scheme. 171 * <p> 172 *'%' and two following hex digit characters are converted to the 173 * equivalent byte value. All other characters are passed through 174 * unmodified. 175 * <p> 176 * e.g. "A%20B%20C %24%25" -> "A B C $%" 177 * <p> 178 * Called from URI.getXYZ() methods 179 * 180 * @param s 181 * java.lang.String The encoded string. 182 * @return java.lang.String The decoded version. 183 */ 184 static String decode(String s) throws UnsupportedEncodingException { 185 186 StringBuilder result = new StringBuilder(); 187 ByteArrayOutputStream out = new ByteArrayOutputStream(); 188 for (int i = 0; i < s.length();) { 189 char c = s.charAt(i); 190 if (c == '%') { 191 out.reset(); 192 do { 193 if (i + 2 >= s.length()) { 194 throw new IllegalArgumentException(Msg.getString( 195 "K01fe", i)); //$NON-NLS-1$ 196 } 197 int d1 = Character.digit(s.charAt(i + 1), 16); 198 int d2 = Character.digit(s.charAt(i + 2), 16); 199 if (d1 == -1 || d2 == -1) { 200 throw new IllegalArgumentException(Msg.getString( 201 "K01ff", s.substring(i, i + 3), //$NON-NLS-1$ 202 String.valueOf(i))); 203 } 204 out.write((byte) ((d1 << 4) + d2)); 205 i += 3; 206 } while (i < s.length() && s.charAt(i) == '%'); 207 result.append(out.toString(encoding)); 208 continue; 209 } 210 result.append(c); 211 i++; 212 } 213 return result.toString(); 214 } 215 216} 217