1# Copyright (C) 2001-2010 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8    'collapse_rfc2231_value',
9    'decode_params',
10    'decode_rfc2231',
11    'encode_rfc2231',
12    'formataddr',
13    'formatdate',
14    'getaddresses',
15    'make_msgid',
16    'mktime_tz',
17    'parseaddr',
18    'parsedate',
19    'parsedate_tz',
20    'unquote',
21    ]
22
23import os
24import re
25import time
26import base64
27import random
28import socket
29import urllib
30import warnings
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56# Helpers
57
58def _identity(s):
59    return s
60
61
62def _bdecode(s):
63    """Decodes a base64 string.
64
65    This function is equivalent to base64.decodestring and it's retained only
66    for backward compatibility. It used to remove the last \\n of the decoded
67    string, if it had any (see issue 7143).
68    """
69    if not s:
70        return s
71    return base64.decodestring(s)
72
73
74
75def fix_eols(s):
76    """Replace all line-ending characters with \\r\\n."""
77    # Fix newlines with no preceding carriage return
78    s = re.sub(r'(?<!\r)\n', CRLF, s)
79    # Fix carriage returns with no following newline
80    s = re.sub(r'\r(?!\n)', CRLF, s)
81    return s
82
83
84
85def formataddr(pair):
86    """The inverse of parseaddr(), this takes a 2-tuple of the form
87    (realname, email_address) and returns the string value suitable
88    for an RFC 2822 From, To or Cc header.
89
90    If the first element of pair is false, then the second element is
91    returned unmodified.
92    """
93    name, address = pair
94    if name:
95        quotes = ''
96        if specialsre.search(name):
97            quotes = '"'
98        name = escapesre.sub(r'\\\g<0>', name)
99        return '%s%s%s <%s>' % (quotes, name, quotes, address)
100    return address
101
102
103
104def getaddresses(fieldvalues):
105    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
106    all = COMMASPACE.join(fieldvalues)
107    a = _AddressList(all)
108    return a.addresslist
109
110
111
112ecre = re.compile(r'''
113  =\?                   # literal =?
114  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
115  \?                    # literal ?
116  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
117  \?                    # literal ?
118  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
119  \?=                   # literal ?=
120  ''', re.VERBOSE | re.IGNORECASE)
121
122
123
124def formatdate(timeval=None, localtime=False, usegmt=False):
125    """Returns a date string as specified by RFC 2822, e.g.:
126
127    Fri, 09 Nov 2001 01:08:47 -0000
128
129    Optional timeval if given is a floating point time value as accepted by
130    gmtime() and localtime(), otherwise the current time is used.
131
132    Optional localtime is a flag that when True, interprets timeval, and
133    returns a date relative to the local timezone instead of UTC, properly
134    taking daylight savings time into account.
135
136    Optional argument usegmt means that the timezone is written out as
137    an ascii string, not numeric one (so "GMT" instead of "+0000"). This
138    is needed for HTTP, and is only used when localtime==False.
139    """
140    # Note: we cannot use strftime() because that honors the locale and RFC
141    # 2822 requires that day and month names be the English abbreviations.
142    if timeval is None:
143        timeval = time.time()
144    if localtime:
145        now = time.localtime(timeval)
146        # Calculate timezone offset, based on whether the local zone has
147        # daylight savings time, and whether DST is in effect.
148        if time.daylight and now[-1]:
149            offset = time.altzone
150        else:
151            offset = time.timezone
152        hours, minutes = divmod(abs(offset), 3600)
153        # Remember offset is in seconds west of UTC, but the timezone is in
154        # minutes east of UTC, so the signs differ.
155        if offset > 0:
156            sign = '-'
157        else:
158            sign = '+'
159        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
160    else:
161        now = time.gmtime(timeval)
162        # Timezone offset is always -0000
163        if usegmt:
164            zone = 'GMT'
165        else:
166            zone = '-0000'
167    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
168        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
169        now[2],
170        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
171         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
172        now[0], now[3], now[4], now[5],
173        zone)
174
175
176
177def make_msgid(idstring=None):
178    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
179
180    <20020201195627.33539.96671@nightshade.la.mastaler.com>
181
182    Optional idstring if given is a string used to strengthen the
183    uniqueness of the message id.
184    """
185    timeval = time.time()
186    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
187    pid = os.getpid()
188    randint = random.randrange(100000)
189    if idstring is None:
190        idstring = ''
191    else:
192        idstring = '.' + idstring
193    idhost = socket.getfqdn()
194    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
195    return msgid
196
197
198
199# These functions are in the standalone mimelib version only because they've
200# subsequently been fixed in the latest Python versions.  We use this to worm
201# around broken older Pythons.
202def parsedate(data):
203    if not data:
204        return None
205    return _parsedate(data)
206
207
208def parsedate_tz(data):
209    if not data:
210        return None
211    return _parsedate_tz(data)
212
213
214def parseaddr(addr):
215    addrs = _AddressList(addr).addresslist
216    if not addrs:
217        return '', ''
218    return addrs[0]
219
220
221# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
222def unquote(str):
223    """Remove quotes from a string."""
224    if len(str) > 1:
225        if str.startswith('"') and str.endswith('"'):
226            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
227        if str.startswith('<') and str.endswith('>'):
228            return str[1:-1]
229    return str
230
231
232
233# RFC2231-related functions - parameter encoding and decoding
234def decode_rfc2231(s):
235    """Decode string according to RFC 2231"""
236    parts = s.split(TICK, 2)
237    if len(parts) <= 2:
238        return None, None, s
239    return parts
240
241
242def encode_rfc2231(s, charset=None, language=None):
243    """Encode string according to RFC 2231.
244
245    If neither charset nor language is given, then s is returned as-is.  If
246    charset is given but not language, the string is encoded using the empty
247    string for language.
248    """
249    import urllib
250    s = urllib.quote(s, safe='')
251    if charset is None and language is None:
252        return s
253    if language is None:
254        language = ''
255    return "%s'%s'%s" % (charset, language, s)
256
257
258rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
259
260def decode_params(params):
261    """Decode parameters list according to RFC 2231.
262
263    params is a sequence of 2-tuples containing (param name, string value).
264    """
265    # Copy params so we don't mess with the original
266    params = params[:]
267    new_params = []
268    # Map parameter's name to a list of continuations.  The values are a
269    # 3-tuple of the continuation number, the string value, and a flag
270    # specifying whether a particular segment is %-encoded.
271    rfc2231_params = {}
272    name, value = params.pop(0)
273    new_params.append((name, value))
274    while params:
275        name, value = params.pop(0)
276        if name.endswith('*'):
277            encoded = True
278        else:
279            encoded = False
280        value = unquote(value)
281        mo = rfc2231_continuation.match(name)
282        if mo:
283            name, num = mo.group('name', 'num')
284            if num is not None:
285                num = int(num)
286            rfc2231_params.setdefault(name, []).append((num, value, encoded))
287        else:
288            new_params.append((name, '"%s"' % quote(value)))
289    if rfc2231_params:
290        for name, continuations in rfc2231_params.items():
291            value = []
292            extended = False
293            # Sort by number
294            continuations.sort()
295            # And now append all values in numerical order, converting
296            # %-encodings for the encoded segments.  If any of the
297            # continuation names ends in a *, then the entire string, after
298            # decoding segments and concatenating, must have the charset and
299            # language specifiers at the beginning of the string.
300            for num, s, encoded in continuations:
301                if encoded:
302                    s = urllib.unquote(s)
303                    extended = True
304                value.append(s)
305            value = quote(EMPTYSTRING.join(value))
306            if extended:
307                charset, language, value = decode_rfc2231(value)
308                new_params.append((name, (charset, language, '"%s"' % value)))
309            else:
310                new_params.append((name, '"%s"' % value))
311    return new_params
312
313def collapse_rfc2231_value(value, errors='replace',
314                           fallback_charset='us-ascii'):
315    if isinstance(value, tuple):
316        rawval = unquote(value[2])
317        charset = value[0] or 'us-ascii'
318        try:
319            return unicode(rawval, charset, errors)
320        except LookupError:
321            # XXX charset is unknown to Python.
322            return unicode(rawval, fallback_charset, errors)
323    else:
324        return unquote(value)
325