1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.ByteArrayOutputStream;
21import java.io.UnsupportedEncodingException;
22
23import org.apache.harmony.luni.util.Msg;
24
25/**
26 * This class is used to encode a string using the format required by {@code
27 * application/x-www-form-urlencoded} MIME content type. It contains helper
28 * methods used by the URI class, and performs encoding and decoding in a
29 * slightly different way than {@code URLEncoder} and {@code URLDecoder}.
30 */
31class URIEncoderDecoder {
32
33    static final String digits = "0123456789ABCDEF"; //$NON-NLS-1$
34
35    static final String encoding = "UTF8"; //$NON-NLS-1$
36
37    /**
38     * Validate a string by checking if it contains any characters other than:
39     * 1. letters ('a'..'z', 'A'..'Z') 2. numbers ('0'..'9') 3. characters in
40     * the legalset parameter 4. others (unicode characters that are not in
41     * US-ASCII set, and are not ISO Control or are not ISO Space characters)
42     * <p>
43     * called from {@code URI.Helper.parseURI()} to validate each component
44     *
45     * @param s
46     *            {@code java.lang.String} the string to be validated
47     * @param legal
48     *            {@code java.lang.String} the characters allowed in the String
49     *            s
50     */
51    static void validate(String s, String legal) throws URISyntaxException {
52        for (int i = 0; i < s.length();) {
53            char ch = s.charAt(i);
54            if (ch == '%') {
55                do {
56                    if (i + 2 >= s.length()) {
57                        throw new URISyntaxException(s, Msg.getString("K0313"), //$NON-NLS-1$
58                                i);
59                    }
60                    int d1 = Character.digit(s.charAt(i + 1), 16);
61                    int d2 = Character.digit(s.charAt(i + 2), 16);
62                    if (d1 == -1 || d2 == -1) {
63                        throw new URISyntaxException(s, Msg.getString("K0314", //$NON-NLS-1$
64                                s.substring(i, i + 3)), i);
65                    }
66
67                    i += 3;
68                } while (i < s.length() && s.charAt(i) == '%');
69
70                continue;
71            }
72            if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
73                    || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1 || (ch > 127
74                    && !Character.isSpaceChar(ch) && !Character
75                    .isISOControl(ch)))) {
76                throw new URISyntaxException(s, Msg.getString("K00c1"), i); //$NON-NLS-1$
77            }
78            i++;
79        }
80    }
81
82    static void validateSimple(String s, String legal)
83            throws URISyntaxException {
84        for (int i = 0; i < s.length();) {
85            char ch = s.charAt(i);
86            if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
87                    || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1)) {
88                throw new URISyntaxException(s, Msg.getString("K00c1"), i); //$NON-NLS-1$
89            }
90            i++;
91        }
92    }
93
94    /**
95     * All characters except letters ('a'..'z', 'A'..'Z') and numbers ('0'..'9')
96     * and legal characters are converted into their hexidecimal value prepended
97     * by '%'.
98     * <p>
99     * For example: '#' -> %23
100     * Other characters, which are unicode chars that are not US-ASCII, and are
101     * not ISO Control or are not ISO Space chars, are preserved.
102     * <p>
103     * Called from {@code URI.quoteComponent()} (for multiple argument
104     * constructors)
105     *
106     * @param s
107     *            java.lang.String the string to be converted
108     * @param legal
109     *            java.lang.String the characters allowed to be preserved in the
110     *            string s
111     * @return java.lang.String the converted string
112     */
113    static String quoteIllegal(String s, String legal)
114            throws UnsupportedEncodingException {
115        StringBuilder buf = new StringBuilder();
116        for (int i = 0; i < s.length(); i++) {
117            char ch = s.charAt(i);
118            if ((ch >= 'a' && ch <= 'z')
119                    || (ch >= 'A' && ch <= 'Z')
120                    || (ch >= '0' && ch <= '9')
121                    || legal.indexOf(ch) > -1
122                    || (ch > 127 && !Character.isSpaceChar(ch) && !Character
123                            .isISOControl(ch))) {
124                buf.append(ch);
125            } else {
126                byte[] bytes = new String(new char[] { ch }).getBytes(encoding);
127                for (int j = 0; j < bytes.length; j++) {
128                    buf.append('%');
129                    buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
130                    buf.append(digits.charAt(bytes[j] & 0xf));
131                }
132            }
133        }
134        return buf.toString();
135    }
136
137    /**
138     * Other characters, which are Unicode chars that are not US-ASCII, and are
139     * not ISO Control or are not ISO Space chars are not preserved. They are
140     * converted into their hexidecimal value prepended by '%'.
141     * <p>
142     * For example: Euro currency symbol -> "%E2%82%AC".
143     * <p>
144     * Called from URI.toASCIIString()
145     *
146     * @param s
147     *            java.lang.String the string to be converted
148     * @return java.lang.String the converted string
149     */
150    static String encodeOthers(String s) throws UnsupportedEncodingException {
151        StringBuilder buf = new StringBuilder();
152        for (int i = 0; i < s.length(); i++) {
153            char ch = s.charAt(i);
154            if (ch <= 127) {
155                buf.append(ch);
156            } else {
157                byte[] bytes = new String(new char[] { ch }).getBytes(encoding);
158                for (int j = 0; j < bytes.length; j++) {
159                    buf.append('%');
160                    buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
161                    buf.append(digits.charAt(bytes[j] & 0xf));
162                }
163            }
164        }
165        return buf.toString();
166    }
167
168    /**
169     * Decodes the string argument which is assumed to be encoded in the {@code
170     * x-www-form-urlencoded} MIME content type using the UTF-8 encoding scheme.
171     * <p>
172     *'%' and two following hex digit characters are converted to the
173     * equivalent byte value. All other characters are passed through
174     * unmodified.
175     * <p>
176     * e.g. "A%20B%20C %24%25" -> "A B C $%"
177     * <p>
178     * Called from URI.getXYZ() methods
179     *
180     * @param s
181     *            java.lang.String The encoded string.
182     * @return java.lang.String The decoded version.
183     */
184    static String decode(String s) throws UnsupportedEncodingException {
185
186        StringBuilder result = new StringBuilder();
187        ByteArrayOutputStream out = new ByteArrayOutputStream();
188        for (int i = 0; i < s.length();) {
189            char c = s.charAt(i);
190            if (c == '%') {
191                out.reset();
192                do {
193                    if (i + 2 >= s.length()) {
194                        throw new IllegalArgumentException(Msg.getString(
195                                "K01fe", i)); //$NON-NLS-1$
196                    }
197                    int d1 = Character.digit(s.charAt(i + 1), 16);
198                    int d2 = Character.digit(s.charAt(i + 2), 16);
199                    if (d1 == -1 || d2 == -1) {
200                        throw new IllegalArgumentException(Msg.getString(
201                                "K01ff", s.substring(i, i + 3), //$NON-NLS-1$
202                                String.valueOf(i)));
203                    }
204                    out.write((byte) ((d1 << 4) + d2));
205                    i += 3;
206                } while (i < s.length() && s.charAt(i) == '%');
207                result.append(out.toString(encoding));
208                continue;
209            }
210            result.append(c);
211            i++;
212        }
213        return result.toString();
214    }
215
216}
217