1/*
2 * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $
3 * $Revision: 602520 $
4 * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $
5 *
6 * ====================================================================
7 * Licensed to the Apache Software Foundation (ASF) under one
8 * or more contributor license agreements.  See the NOTICE file
9 * distributed with this work for additional information
10 * regarding copyright ownership.  The ASF licenses this file
11 * to you under the Apache License, Version 2.0 (the
12 * "License"); you may not use this file except in compliance
13 * with the License.  You may obtain a copy of the License at
14 *
15 *   http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing,
18 * software distributed under the License is distributed on an
19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 * KIND, either express or implied.  See the License for the
21 * specific language governing permissions and limitations
22 * under the License.
23 * ====================================================================
24 *
25 * This software consists of voluntary contributions made by many
26 * individuals on behalf of the Apache Software Foundation.  For more
27 * information on the Apache Software Foundation, please see
28 * <http://www.apache.org/>.
29 *
30 */
31
32package org.apache.http.message;
33
34import java.util.NoSuchElementException;
35
36import org.apache.http.HeaderIterator;
37import org.apache.http.ParseException;
38import org.apache.http.TokenIterator;
39
40/**
41 * Basic implementation of a {@link TokenIterator}.
42 * This implementation parses <tt>#token<tt> sequences as
43 * defined by RFC 2616, section 2.
44 * It extends that definition somewhat beyond US-ASCII.
45 *
46 * @version $Revision: 602520 $
47 *
48 * @deprecated Please use {@link java.net.URL#openConnection} instead.
49 *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
50 *     for further details.
51 */
52@Deprecated
53public class BasicTokenIterator implements TokenIterator {
54
55    /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
56    // the order of the characters here is adjusted to put the
57    // most likely candidates at the beginning of the collection
58    public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
59
60
61    /** The iterator from which to obtain the next header. */
62    protected final HeaderIterator headerIt;
63
64    /**
65     * The value of the current header.
66     * This is the header value that includes {@link #currentToken}.
67     * Undefined if the iteration is over.
68     */
69    protected String currentHeader;
70
71    /**
72     * The token to be returned by the next call to {@link #currentToken}.
73     * <code>null</code> if the iteration is over.
74     */
75    protected String currentToken;
76
77    /**
78     * The position after {@link #currentToken} in {@link #currentHeader}.
79     * Undefined if the iteration is over.
80     */
81    protected int searchPos;
82
83
84    /**
85     * Creates a new instance of {@link BasicTokenIterator}.
86     *
87     * @param headerIterator    the iterator for the headers to tokenize
88     */
89    public BasicTokenIterator(final HeaderIterator headerIterator) {
90        if (headerIterator == null) {
91            throw new IllegalArgumentException
92                ("Header iterator must not be null.");
93        }
94
95        this.headerIt = headerIterator;
96        this.searchPos = findNext(-1);
97    }
98
99
100    // non-javadoc, see interface TokenIterator
101    public boolean hasNext() {
102        return (this.currentToken != null);
103    }
104
105
106    /**
107     * Obtains the next token from this iteration.
108     *
109     * @return  the next token in this iteration
110     *
111     * @throws NoSuchElementException   if the iteration is already over
112     * @throws ParseException   if an invalid header value is encountered
113     */
114    public String nextToken()
115        throws NoSuchElementException, ParseException {
116
117        if (this.currentToken == null) {
118            throw new NoSuchElementException("Iteration already finished.");
119        }
120
121        final String result = this.currentToken;
122        // updates currentToken, may trigger ParseException:
123        this.searchPos = findNext(this.searchPos);
124
125        return result;
126    }
127
128
129    /**
130     * Returns the next token.
131     * Same as {@link #nextToken}, but with generic return type.
132     *
133     * @return  the next token in this iteration
134     *
135     * @throws NoSuchElementException   if there are no more tokens
136     * @throws ParseException   if an invalid header value is encountered
137     */
138    public final Object next()
139        throws NoSuchElementException, ParseException {
140        return nextToken();
141    }
142
143
144    /**
145     * Removing tokens is not supported.
146     *
147     * @throws UnsupportedOperationException    always
148     */
149    public final void remove()
150        throws UnsupportedOperationException {
151
152        throw new UnsupportedOperationException
153            ("Removing tokens is not supported.");
154    }
155
156
157    /**
158     * Determines the next token.
159     * If found, the token is stored in {@link #currentToken}.
160     * The return value indicates the position after the token
161     * in {@link #currentHeader}. If necessary, the next header
162     * will be obtained from {@link #headerIt}.
163     * If not found, {@link #currentToken} is set to <code>null</code>.
164     *
165     * @param from      the position in the current header at which to
166     *                  start the search, -1 to search in the first header
167     *
168     * @return  the position after the found token in the current header, or
169     *          negative if there was no next token
170     *
171     * @throws ParseException   if an invalid header value is encountered
172     */
173    protected int findNext(int from)
174        throws ParseException {
175
176        if (from < 0) {
177            // called from the constructor, initialize the first header
178            if (!this.headerIt.hasNext()) {
179                return -1;
180            }
181            this.currentHeader = this.headerIt.nextHeader().getValue();
182            from = 0;
183        } else {
184            // called after a token, make sure there is a separator
185            from = findTokenSeparator(from);
186        }
187
188        int start = findTokenStart(from);
189        if (start < 0) {
190            this.currentToken = null;
191            return -1; // nothing found
192        }
193
194        int end = findTokenEnd(start);
195        this.currentToken = createToken(this.currentHeader, start, end);
196        return end;
197    }
198
199
200    /**
201     * Creates a new token to be returned.
202     * Called from {@link #findNext findNext} after the token is identified.
203     * The default implementation simply calls
204     * {@link java.lang.String#substring String.substring}.
205     * <br/>
206     * If header values are significantly longer than tokens, and some
207     * tokens are permanently referenced by the application, there can
208     * be problems with garbage collection. A substring will hold a
209     * reference to the full characters of the original string and
210     * therefore occupies more memory than might be expected.
211     * To avoid this, override this method and create a new string
212     * instead of a substring.
213     *
214     * @param value     the full header value from which to create a token
215     * @param start     the index of the first token character
216     * @param end       the index after the last token character
217     *
218     * @return  a string representing the token identified by the arguments
219     */
220    protected String createToken(String value, int start, int end) {
221        return value.substring(start, end);
222    }
223
224
225    /**
226     * Determines the starting position of the next token.
227     * This method will iterate over headers if necessary.
228     *
229     * @param from      the position in the current header at which to
230     *                  start the search
231     *
232     * @return  the position of the token start in the current header,
233     *          negative if no token start could be found
234     */
235    protected int findTokenStart(int from) {
236        if (from < 0) {
237            throw new IllegalArgumentException
238                ("Search position must not be negative: " + from);
239        }
240
241        boolean found = false;
242        while (!found && (this.currentHeader != null)) {
243
244            final int to = this.currentHeader.length();
245            while (!found && (from < to)) {
246
247                final char ch = this.currentHeader.charAt(from);
248                if (isTokenSeparator(ch) || isWhitespace(ch)) {
249                    // whitspace and token separators are skipped
250                    from++;
251                } else if (isTokenChar(this.currentHeader.charAt(from))) {
252                    // found the start of a token
253                    found = true;
254                } else {
255                    throw new ParseException
256                        ("Invalid character before token (pos " + from +
257                         "): " + this.currentHeader);
258                }
259            }
260            if (!found) {
261                if (this.headerIt.hasNext()) {
262                    this.currentHeader = this.headerIt.nextHeader().getValue();
263                    from = 0;
264                } else {
265                    this.currentHeader = null;
266                }
267            }
268        } // while headers
269
270        return found ? from : -1;
271    }
272
273
274    /**
275     * Determines the position of the next token separator.
276     * Because of multi-header joining rules, the end of a
277     * header value is a token separator. This method does
278     * therefore not need to iterate over headers.
279     *
280     * @param from      the position in the current header at which to
281     *                  start the search
282     *
283     * @return  the position of a token separator in the current header,
284     *          or at the end
285     *
286     * @throws ParseException
287     *         if a new token is found before a token separator.
288     *         RFC 2616, section 2.1 explicitly requires a comma between
289     *         tokens for <tt>#</tt>.
290     */
291    protected int findTokenSeparator(int from) {
292        if (from < 0) {
293            throw new IllegalArgumentException
294                ("Search position must not be negative: " + from);
295        }
296
297        boolean found = false;
298        final int to = this.currentHeader.length();
299        while (!found && (from < to)) {
300            final char ch = this.currentHeader.charAt(from);
301            if (isTokenSeparator(ch)) {
302                found = true;
303            } else if (isWhitespace(ch)) {
304                from++;
305            } else if (isTokenChar(ch)) {
306                throw new ParseException
307                    ("Tokens without separator (pos " + from +
308                     "): " + this.currentHeader);
309            } else {
310                throw new ParseException
311                    ("Invalid character after token (pos " + from +
312                     "): " + this.currentHeader);
313            }
314        }
315
316        return from;
317    }
318
319
320    /**
321     * Determines the ending position of the current token.
322     * This method will not leave the current header value,
323     * since the end of the header value is a token boundary.
324     *
325     * @param from      the position of the first character of the token
326     *
327     * @return  the position after the last character of the token.
328     *          The behavior is undefined if <code>from</code> does not
329     *          point to a token character in the current header value.
330     */
331    protected int findTokenEnd(int from) {
332        if (from < 0) {
333            throw new IllegalArgumentException
334                ("Token start position must not be negative: " + from);
335        }
336
337        final int to = this.currentHeader.length();
338        int end = from+1;
339        while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
340            end++;
341        }
342
343        return end;
344    }
345
346
347    /**
348     * Checks whether a character is a token separator.
349     * RFC 2616, section 2.1 defines comma as the separator for
350     * <tt>#token</tt> sequences. The end of a header value will
351     * also separate tokens, but that is not a character check.
352     *
353     * @param ch        the character to check
354     *
355     * @return  <code>true</code> if the character is a token separator,
356     *          <code>false</code> otherwise
357     */
358    protected boolean isTokenSeparator(char ch) {
359        return (ch == ',');
360    }
361
362
363    /**
364     * Checks whether a character is a whitespace character.
365     * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
366     * The optional preceeding line break is irrelevant, since header
367     * continuation is handled transparently when parsing messages.
368     *
369     * @param ch        the character to check
370     *
371     * @return  <code>true</code> if the character is whitespace,
372     *          <code>false</code> otherwise
373     */
374    protected boolean isWhitespace(char ch) {
375
376        // we do not use Character.isWhitspace(ch) here, since that allows
377        // many control characters which are not whitespace as per RFC 2616
378        return ((ch == '\t') || Character.isSpaceChar(ch));
379    }
380
381
382    /**
383     * Checks whether a character is a valid token character.
384     * Whitespace, control characters, and HTTP separators are not
385     * valid token characters. The HTTP specification (RFC 2616, section 2.2)
386     * defines tokens only for the US-ASCII character set, this
387     * method extends the definition to other character sets.
388     *
389     * @param ch        the character to check
390     *
391     * @return  <code>true</code> if the character is a valid token start,
392     *          <code>false</code> otherwise
393     */
394    protected boolean isTokenChar(char ch) {
395
396        // common sense extension of ALPHA + DIGIT
397        if (Character.isLetterOrDigit(ch))
398            return true;
399
400        // common sense extension of CTL
401        if (Character.isISOControl(ch))
402            return false;
403
404        // no common sense extension for this
405        if (isHttpSeparator(ch))
406            return false;
407
408        // RFC 2616, section 2.2 defines a token character as
409        // "any CHAR except CTLs or separators". The controls
410        // and separators are included in the checks above.
411        // This will yield unexpected results for Unicode format characters.
412        // If that is a problem, overwrite isHttpSeparator(char) to filter
413        // out the false positives.
414        return true;
415    }
416
417
418    /**
419     * Checks whether a character is an HTTP separator.
420     * The implementation in this class checks only for the HTTP separators
421     * defined in RFC 2616, section 2.2. If you need to detect other
422     * separators beyond the US-ASCII character set, override this method.
423     *
424     * @param ch        the character to check
425     *
426     * @return  <code>true</code> if the character is an HTTP separator
427     */
428    protected boolean isHttpSeparator(char ch) {
429        return (HTTP_SEPARATORS.indexOf(ch) >= 0);
430    }
431
432
433} // class BasicTokenIterator
434
435