1# Copyright 2011, Google Inc.
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31"""Utilities for parsing and formatting headers that follow the grammar defined
32in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt.
33"""
34
35
36import urlparse
37
38
39_SEPARATORS = '()<>@,;:\\"/[]?={} \t'
40
41
42def _is_char(c):
43    """Returns true iff c is in CHAR as specified in HTTP RFC."""
44
45    return ord(c) <= 127
46
47
48def _is_ctl(c):
49    """Returns true iff c is in CTL as specified in HTTP RFC."""
50
51    return ord(c) <= 31 or ord(c) == 127
52
53
54class ParsingState(object):
55
56    def __init__(self, data):
57        self.data = data
58        self.head = 0
59
60
61def peek(state, pos=0):
62    """Peeks the character at pos from the head of data."""
63
64    if state.head + pos >= len(state.data):
65        return None
66
67    return state.data[state.head + pos]
68
69
70def consume(state, amount=1):
71    """Consumes specified amount of bytes from the head and returns the
72    consumed bytes. If there's not enough bytes to consume, returns None.
73    """
74
75    if state.head + amount > len(state.data):
76        return None
77
78    result = state.data[state.head:state.head + amount]
79    state.head = state.head + amount
80    return result
81
82
83def consume_string(state, expected):
84    """Given a parsing state and a expected string, consumes the string from
85    the head. Returns True if consumed successfully. Otherwise, returns
86    False.
87    """
88
89    pos = 0
90
91    for c in expected:
92        if c != peek(state, pos):
93            return False
94        pos += 1
95
96    consume(state, pos)
97    return True
98
99
100def consume_lws(state):
101    """Consumes a LWS from the head. Returns True if any LWS is consumed.
102    Otherwise, returns False.
103
104    LWS = [CRLF] 1*( SP | HT )
105    """
106
107    original_head = state.head
108
109    consume_string(state, '\r\n')
110
111    pos = 0
112
113    while True:
114        c = peek(state, pos)
115        if c == ' ' or c == '\t':
116            pos += 1
117        else:
118            if pos == 0:
119                state.head = original_head
120                return False
121            else:
122                consume(state, pos)
123                return True
124
125
126def consume_lwses(state):
127    """Consumes *LWS from the head."""
128
129    while consume_lws(state):
130        pass
131
132
133def consume_token(state):
134    """Consumes a token from the head. Returns the token or None if no token
135    was found.
136    """
137
138    pos = 0
139
140    while True:
141        c = peek(state, pos)
142        if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
143            if pos == 0:
144                return None
145
146            return consume(state, pos)
147        else:
148            pos += 1
149
150
151def consume_token_or_quoted_string(state):
152    """Consumes a token or a quoted-string, and returns the token or unquoted
153    string. If no token or quoted-string was found, returns None.
154    """
155
156    original_head = state.head
157
158    if not consume_string(state, '"'):
159        return consume_token(state)
160
161    result = []
162
163    expect_quoted_pair = False
164
165    while True:
166        if not expect_quoted_pair and consume_lws(state):
167            result.append(' ')
168            continue
169
170        c = consume(state)
171        if c is None:
172            # quoted-string is not enclosed with double quotation
173            state.head = original_head
174            return None
175        elif expect_quoted_pair:
176            expect_quoted_pair = False
177            if _is_char(c):
178                result.append(c)
179            else:
180                # Non CHAR character found in quoted-pair
181                state.head = original_head
182                return None
183        elif c == '\\':
184            expect_quoted_pair = True
185        elif c == '"':
186            return ''.join(result)
187        elif _is_ctl(c):
188            # Invalid character %r found in qdtext
189            state.head = original_head
190            return None
191        else:
192            result.append(c)
193
194
195def quote_if_necessary(s):
196    """Quotes arbitrary string into quoted-string."""
197
198    quote = False
199    if s == '':
200        return '""'
201
202    result = []
203    for c in s:
204        if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
205            quote = True
206
207        if c == '"' or _is_ctl(c):
208            result.append('\\' + c)
209        else:
210            result.append(c)
211
212    if quote:
213        return '"' + ''.join(result) + '"'
214    else:
215        return ''.join(result)
216
217
218def parse_uri(uri):
219    """Parse absolute URI then return host, port and resource."""
220
221    parsed = urlparse.urlsplit(uri)
222    if parsed.scheme != 'wss' and parsed.scheme != 'ws':
223        # |uri| must be a relative URI.
224        # TODO(toyoshim): Should validate |uri|.
225        return None, None, uri
226
227    if parsed.hostname is None:
228        return None, None, None
229
230    port = None
231    try:
232        port = parsed.port
233    except ValueError, e:
234        # port property cause ValueError on invalid null port description like
235        # 'ws://host:/path'.
236        return None, None, None
237
238    if port is None:
239        if parsed.scheme == 'ws':
240            port = 80
241        else:
242            port = 443
243
244    path = parsed.path
245    if not path:
246        path += '/'
247    if parsed.query:
248        path += '?' + parsed.query
249    if parsed.fragment:
250        path += '#' + parsed.fragment
251
252    return parsed.hostname, port, path
253
254
255try:
256    urlparse.uses_netloc.index('ws')
257except ValueError, e:
258    # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries.
259    urlparse.uses_netloc.append('ws')
260    urlparse.uses_netloc.append('wss')
261
262
263# vi:sts=4 sw=4 et
264