14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (C) 2001-2006 Python Software Foundation 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Author: Barry Warsaw 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Contact: email-sig@python.org 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""Basic message object for the email package object model.""" 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ['Message'] 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport uu 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport binascii 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport warnings 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom cStringIO import StringIO 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Intrapackage imports 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport email.charset 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom email import utils 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom email import errors 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmSEMISPACE = '; ' 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Regular expression that matches `special' characters in parameters, the 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# existence of which force quoting of the parameter value. 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Helper functions 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _splitparam(param): 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Split header parameters. BAW: this may be too simple. It isn't 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # found in the wild. We may eventually need a full fledged parser 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # eventually. 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a, sep, b = param.partition(';') 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not sep: 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return a.strip(), None 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return a.strip(), b.strip() 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _formatparam(param, value=None, quote=True): 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Convenience function to format and return a key=value pair. 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This will quote the value if needed or if quote is true. If value is a 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm three tuple (charset, language, value), it will be encoded according 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm to RFC2231 rules. 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if value is not None and len(value) > 0: 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # A tuple is used for RFC 2231 encoded parameter values where items 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # are (charset, language, value). charset is a string, not a Charset 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # instance. 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(value, tuple): 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Encode as per RFC 2231 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm param += '*' 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = utils.encode_rfc2231(value[2], value[0], value[1]) 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # BAW: Please check this. I think that if quote is set it should 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # force quoting even if not necessary. 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if quote or tspecials.search(value): 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return '%s="%s"' % (param, utils.quote(value)) 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return '%s=%s' % (param, value) 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return param 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _parseparam(s): 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm plist = [] 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while s[:1] == ';': 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = s[1:] 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm end = s.find(';') 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm end = s.find(';', end + 1) 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if end < 0: 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm end = len(s) 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = s[:end] 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if '=' in f: 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = f.index('=') 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = f[:i].strip().lower() + '=' + f[i+1:].strip() 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm plist.append(f.strip()) 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = s[end:] 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return plist 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _unquotevalue(value): 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This is different than utils.collapse_rfc2231_value() because it doesn't 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # try to convert the value to a unicode. Message.get_param() and 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Message.get_params() are both currently defined to return the tuple in 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the face of RFC 2231 parameters. 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(value, tuple): 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return value[0], value[1], utils.unquote(value[2]) 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return utils.unquote(value) 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Message: 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Basic message object. 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A message object is defined as something that has a bunch of RFC 2822 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm headers and a payload. It may optionally have an envelope header 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm multipart or a message/rfc822), then the payload is a list of Message 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm objects, otherwise it is a string. 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Message objects implement part of the `mapping' interface, which assumes 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm there is exactly one occurrence of the header per message. Some headers 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm do in fact appear multiple times (e.g. Received) and for those headers, 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm you must use the explicit API to set or get all the headers. Not all of 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the mapping methods are implemented. 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self): 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers = [] 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._unixfrom = None 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = None 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._charset = None 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Defaults for multipart messages 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.preamble = self.epilogue = None 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.defects = [] 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Default content type 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._default_type = 'text/plain' 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the entire formatted message as a string. 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This includes the headers, body, and envelope header. 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.as_string(unixfrom=True) 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def as_string(self, unixfrom=False): 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the entire formatted message as a string. 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Optional `unixfrom' when True, means include the Unix From_ envelope 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header. 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is a convenience method and may not generate the message exactly 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm as you intend because by default it mangles lines that begin with 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "From ". For more flexibility, use the flatten() method of a 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Generator instance. 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from email.generator import Generator 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fp = StringIO() 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = Generator(fp) 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g.flatten(self, unixfrom=unixfrom) 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return fp.getvalue() 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def is_multipart(self): 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return True if the message consists of multiple parts.""" 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return isinstance(self._payload, list) 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Unix From_ line 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_unixfrom(self, unixfrom): 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._unixfrom = unixfrom 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_unixfrom(self): 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._unixfrom 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Payload manipulation. 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def attach(self, payload): 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Add the given payload to the current payload. 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The current payload will always be a list of objects after this method 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is called. If you want to set the payload to a scalar object, use 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm set_payload() instead. 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._payload is None: 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = [payload] 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload.append(payload) 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_payload(self, i=None, decode=False): 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a reference to the payload. 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The payload will either be a list object or a string. If you mutate 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the list object, you modify the message's payload in place. Optional 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i returns that index into the payload. 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Optional decode is a flag indicating whether the payload should be 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm decoded or not, according to the Content-Transfer-Encoding header 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (default is False). 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm When True and the message is not a multipart, the payload will be 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm decoded if this header's value is `quoted-printable' or `base64'. If 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm some other encoding is used, or the header is missing, or if the 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload has bogus data (i.e. bogus base64 or uuencoded data), the 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload is returned as-is. 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the message is a multipart and the decode flag is True, then None 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is returned. 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i is None: 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload = self._payload 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif not isinstance(self._payload, list): 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise TypeError('Expected list, got %s' % type(self._payload)) 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload = self._payload[i] 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if decode: 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_multipart(): 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cte = self.get('content-transfer-encoding', '').lower() 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cte == 'quoted-printable': 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return utils._qdecode(payload) 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cte == 'base64': 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return utils._bdecode(payload) 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except binascii.Error: 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Incorrect padding 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return payload 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sfp = StringIO() 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm uu.decode(StringIO(payload+'\n'), sfp, quiet=True) 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload = sfp.getvalue() 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except uu.Error: 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Some decoding problem 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return payload 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Everything else, including encodings with 8bit or 7bit are returned 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # unchanged. 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return payload 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_payload(self, payload, charset=None): 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the payload to the given value. 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Optional charset sets the message's default character set. See 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm set_charset() for details. 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = payload 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if charset is not None: 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.set_charset(charset) 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_charset(self, charset): 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the charset of the payload to a given character set. 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset can be a Charset instance, a string naming a character set, or 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm None. If it is a string it will be converted to a Charset instance. 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If charset is None, the charset parameter will be removed from the 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Content-Type field. Anything else will generate a TypeError. 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The message will be assumed to be of type text/* encoded with 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset.input_charset. It will be converted to charset.output_charset 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and encoded properly, if needed, when generating the plain text 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm representation of the message. MIME headers (MIME-Version, 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Content-Type, Content-Transfer-Encoding) will be added as needed. 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if charset is None: 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.del_param('charset') 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._charset = None 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(charset, basestring): 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = email.charset.Charset(charset) 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(charset, email.charset.Charset): 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise TypeError(charset) 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # BAW: should we accept strings that can serve as arguments to the 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Charset constructor? 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._charset = charset 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 'MIME-Version' not in self: 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.add_header('MIME-Version', '1.0') 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 'Content-Type' not in self: 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.add_header('Content-Type', 'text/plain', 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset=charset.get_output_charset()) 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.set_param('charset', charset.get_output_charset()) 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(self._payload, unicode): 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = self._payload.encode(charset.output_charset) 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if str(charset) != charset.get_output_charset(): 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = charset.body_encode(self._payload) 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 'Content-Transfer-Encoding' not in self: 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cte = charset.get_body_encoding() 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cte(self) 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except TypeError: 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._payload = charset.body_encode(self._payload) 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.add_header('Content-Transfer-Encoding', cte) 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_charset(self): 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the Charset instance associated with the message's payload. 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._charset 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # MAPPING INTERFACE (partial) 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __len__(self): 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the total number of headers, including duplicates.""" 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return len(self._headers) 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __getitem__(self, name): 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Get a header value. 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return None if the header is missing instead of raising an exception. 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Note that if the header appeared multiple times, exactly which 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm occurrence gets returned is undefined. Use get_all() to get all 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the values matching a header field name. 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.get(name) 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __setitem__(self, name, val): 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the value of a header. 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Note: this does not overwrite an existing header with the same field 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name. Use __delitem__() first to delete any existing headers. 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers.append((name, val)) 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __delitem__(self, name): 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Delete all occurrences of a header, if present. 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Does not raise an exception if the header is missing. 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = name.lower() 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newheaders = [] 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in self._headers: 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k.lower() != name: 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newheaders.append((k, v)) 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers = newheaders 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __contains__(self, name): 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return name.lower() in [k.lower() for k, v in self._headers] 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def has_key(self, name): 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return true if the message contains the header.""" 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.get(name, missing) is not missing 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def keys(self): 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list of all the message's header field names. 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm These will be sorted in the order they appeared in the original 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, or were added to the message, and may contain duplicates. 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Any fields deleted and re-inserted are always appended to the header 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list. 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [k for k, v in self._headers] 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def values(self): 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list of all the message's header values. 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm These will be sorted in the order they appeared in the original 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, or were added to the message, and may contain duplicates. 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Any fields deleted and re-inserted are always appended to the header 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list. 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [v for k, v in self._headers] 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def items(self): 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Get all the message's header fields and values. 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm These will be sorted in the order they appeared in the original 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, or were added to the message, and may contain duplicates. 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Any fields deleted and re-inserted are always appended to the header 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm list. 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._headers[:] 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get(self, name, failobj=None): 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Get a header value. 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Like __getitem__() but return failobj instead of None when the field 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is missing. 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = name.lower() 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in self._headers: 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k.lower() == name: 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return v 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Additional useful stuff 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_all(self, name, failobj=None): 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list of all the values for the named field. 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm These will be sorted in the order they appeared in the original 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, and may contain duplicates. Any fields deleted and 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm re-inserted are always appended to the header list. 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If no such fields exist, failobj is returned (defaults to None). 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values = [] 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = name.lower() 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in self._headers: 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k.lower() == name: 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values.append(v) 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not values: 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return values 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def add_header(self, _name, _value, **_params): 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Extended header setting. 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name is the header field to add. keyword arguments can be used to set 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm additional parameters for the header field, with underscores converted 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm to dashes. Normally the parameter will be added as key="value" unless 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value is None, in which case only the key will be added. If a 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parameter value contains non-ASCII characters it must be specified as a 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm three-tuple of (charset, language, value), in which case it will be 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoded according to RFC2231 rules. 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Example: 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm msg.add_header('content-disposition', 'attachment', filename='bud.gif') 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts = [] 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in _params.items(): 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v is None: 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts.append(k.replace('_', '-')) 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts.append(_formatparam(k.replace('_', '-'), v)) 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if _value is not None: 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts.insert(0, _value) 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers.append((_name, SEMISPACE.join(parts))) 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def replace_header(self, _name, _value): 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Replace a header. 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Replace the first matching header found in the message, retaining 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header order and case. If no matching header was found, a KeyError is 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raised. 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _name = _name.lower() 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i, (k, v) in zip(range(len(self._headers)), self._headers): 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k.lower() == _name: 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers[i] = (k, _value) 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise KeyError(_name) 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Use these three methods instead of the three above. 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_content_type(self): 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the message's content type. 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The returned string is coerced to lower case of the form 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm `maintype/subtype'. If there was no Content-Type header in the 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, the default type as given by get_default_type() will be 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm returned. Since according to RFC 2045, messages always have a default 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type this will always return a value. 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm RFC 2045 defines a message's default type to be text/plain unless it 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm appears inside a multipart/digest container, in which case it would be 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message/rfc822. 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = self.get('content-type', missing) 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if value is missing: 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This should have no parameters 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.get_default_type() 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = _splitparam(value)[0].lower() 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2045, section 5.2 says if its invalid, use text/plain 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ctype.count('/') != 1: 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 'text/plain' 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ctype 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_content_maintype(self): 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the message's main content type. 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is the `maintype' part of the string returned by 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm get_content_type(). 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = self.get_content_type() 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ctype.split('/')[0] 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_content_subtype(self): 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Returns the message's sub-content type. 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is the `subtype' part of the string returned by 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm get_content_type(). 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = self.get_content_type() 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ctype.split('/')[1] 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_default_type(self): 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the `default' content type. 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Most messages have a default content type of text/plain, except for 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm messages that are subparts of multipart/digest containers. Such 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm subparts have a default content type of message/rfc822. 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._default_type 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_default_type(self, ctype): 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the `default' content type. 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype should be either "text/plain" or "message/rfc822", although this 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm is not enforced. The default content type is not stored in the 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Content-Type header. 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._default_type = ctype 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _get_params_preserve(self, failobj, header): 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Like get_params() but preserves the quoting of values. BAW: 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # should this be part of the public interface? 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = self.get(header, missing) 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if value is missing: 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params = [] 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for p in _parseparam(';' + value): 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, val = p.split('=', 1) 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = name.strip() 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm val = val.strip() 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Must have been a bare attribute 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = p.strip() 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm val = '' 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params.append((name, val)) 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params = utils.decode_params(params) 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return params 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_params(self, failobj=None, header='content-type', unquote=True): 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the message's Content-Type parameters, as a list. 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The elements of the returned list are 2-tuples of key/value pairs, as 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm split on the `=' sign. The left hand side of the `=' is the key, 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while the right hand side is the value. If there is no `=' sign in 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the parameter the value is the empty string. The value is as 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm described in the get_param() method. 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Optional failobj is the object to return if there is no Content-Type 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header. Optional header is the header to search instead of 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Content-Type. If unquote is True, the value is unquoted. 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params = self._get_params_preserve(missing, header) 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if params is missing: 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if unquote: 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [(k, _unquotevalue(v)) for k, v in params] 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return params 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_param(self, param, failobj=None, header='content-type', 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unquote=True): 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the parameter value if found in the Content-Type header. 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Optional failobj is the object to return if there is no Content-Type 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header, or the Content-Type header has no such parameter. Optional 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header is the header to search instead of Content-Type. 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Parameter keys are always compared case insensitively. The return 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value can either be a string, or a 3-tuple if the parameter was RFC 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2231 encoded. When it's a 3-tuple, the elements of the value are of 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm LANGUAGE can be None, in which case you should consider VALUE to be 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm encoded in the us-ascii charset. You can usually ignore LANGUAGE. 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Your application should be prepared to deal with 3-tuple return 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values, and can convert the parameter to a Unicode string like so: 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm param = msg.get_param('foo') 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(param, tuple): 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm param = unicode(param[2], param[0] or 'us-ascii') 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm In any case, the parameter value (either the returned string, or the 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm VALUE item in the 3-tuple) is always unquoted, unless unquote is set 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm to False. 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if header not in self: 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in self._get_params_preserve(failobj, header): 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k.lower() == param.lower(): 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if unquote: 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _unquotevalue(v) 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return v 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_param(self, param, value, header='Content-Type', requote=True, 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset=None, language=''): 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set a parameter in the Content-Type header. 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the parameter already exists in the header, its value will be 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm replaced with the new value. 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If header is Content-Type and has not yet been defined for this 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message, it will be set to "text/plain" and the new parameter and 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value will be appended as per RFC 2045. 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm An alternate header can specified in the header argument, and all 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parameters will be quoted as necessary unless requote is False. 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If charset is specified, the parameter will be encoded according to RFC 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2231. Optional language specifies the RFC 2231 language, defaulting 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm to the empty string. Both charset and language should be strings. 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not isinstance(value, tuple) and charset: 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = (charset, language, value) 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if header not in self and header.lower() == 'content-type': 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = 'text/plain' 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = self.get(header) 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self.get_param(param, header=header): 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not ctype: 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = _formatparam(param, value, requote) 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = SEMISPACE.join( 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [ctype, _formatparam(param, value, requote)]) 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = '' 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for old_param, old_value in self.get_params(header=header, 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unquote=requote): 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append_param = '' 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if old_param.lower() == param.lower(): 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append_param = _formatparam(param, value, requote) 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm append_param = _formatparam(old_param, old_value, requote) 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not ctype: 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = append_param 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ctype = SEMISPACE.join([ctype, append_param]) 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ctype != self.get(header): 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self[header] 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self[header] = ctype 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def del_param(self, param, header='content-type', requote=True): 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Remove the given parameter completely from the Content-Type header. 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The header will be re-written in place without the parameter or its 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value. All values will be quoted as necessary unless requote is 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm False. Optional header specifies an alternative to the Content-Type 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header. 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if header not in self: 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new_ctype = '' 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for p, v in self.get_params(header=header, unquote=requote): 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if p.lower() != param.lower(): 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not new_ctype: 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new_ctype = _formatparam(p, v, requote) 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm new_ctype = SEMISPACE.join([new_ctype, 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _formatparam(p, v, requote)]) 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if new_ctype != self.get(header): 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self[header] 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self[header] = new_ctype 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_type(self, type, header='Content-Type', requote=True): 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the main type and subtype for the Content-Type header. 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm type must be a string in the form "maintype/subtype", otherwise a 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ValueError is raised. 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This method replaces the Content-Type header, keeping all the 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parameters in place. If requote is False, this leaves the existing 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header's quoting as is. Otherwise, the parameters will be quoted (the 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm default). 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm An alternative header can be specified in the header argument. When 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the Content-Type header is set, we'll always also add a MIME-Version 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header. 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # BAW: should we be strict? 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not type.count('/') == 1: 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Set the Content-Type, you get a MIME-Version 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if header.lower() == 'content-type': 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self['mime-version'] 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self['MIME-Version'] = '1.0' 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if header not in self: 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self[header] = type 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params = self.get_params(header=header, unquote=requote) 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self[header] 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self[header] = type 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Skip the first param; it's the old type. 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for p, v in params[1:]: 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.set_param(p, v, header, requote) 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_filename(self, failobj=None): 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the filename associated with the payload if present. 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The filename is extracted from the Content-Disposition header's 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm `filename' parameter, and it is unquoted. If that header is missing 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the `filename' parameter, this method falls back to looking for the 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm `name' parameter. 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm filename = self.get_param('filename', missing, 'content-disposition') 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is missing: 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm filename = self.get_param('name', missing, 'content-type') 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is missing: 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return utils.collapse_rfc2231_value(filename).strip() 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_boundary(self, failobj=None): 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the boundary associated with the payload if present. 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The boundary is extracted from the Content-Type header's `boundary' 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parameter, and it is unquoted. 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm boundary = self.get_param('boundary', missing) 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if boundary is missing: 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2046 says that boundaries may begin but not end in w/s 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return utils.collapse_rfc2231_value(boundary).rstrip() 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_boundary(self, boundary): 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the boundary parameter in Content-Type to 'boundary'. 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is subtly different than deleting the Content-Type header and 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm adding a new one with a new boundary parameter via add_header(). The 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm main difference is that using the set_boundary() method preserves the 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm order of the Content-Type header in the original message. 7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm HeaderParseError is raised if the message has no Content-Type header. 7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm params = self._get_params_preserve(missing, 'content-type') 7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if params is missing: 7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # There was no Content-Type header, and we don't know what type 7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # to set it to, so raise an exception. 7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise errors.HeaderParseError('No Content-Type header found') 7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newparams = [] 7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm foundp = False 7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for pk, pv in params: 7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pk.lower() == 'boundary': 7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newparams.append(('boundary', '"%s"' % boundary)) 7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm foundp = True 7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newparams.append((pk, pv)) 7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not foundp: 7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # The original Content-Type header had no boundary attribute. 7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Tack one on the end. BAW: should we raise an exception 7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # instead??? 7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newparams.append(('boundary', '"%s"' % boundary)) 7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Replace the existing Content-Type header with the new value 7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newheaders = [] 7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for h, v in self._headers: 7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if h.lower() == 'content-type': 7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts = [] 7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in newparams: 7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v == '': 7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts.append(k) 7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts.append('%s=%s' % (k, v)) 7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newheaders.append((h, SEMISPACE.join(parts))) 7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm newheaders.append((h, v)) 7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._headers = newheaders 7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_content_charset(self, failobj=None): 7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the charset parameter of the Content-Type header. 7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The returned string is always coerced to lower case. If there is no 7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Content-Type header, or if that header has no charset parameter, 7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm failobj is returned. 7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm missing = object() 7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = self.get_param('charset', missing) 7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if charset is missing: 7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(charset, tuple): 7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2231 encoded, so decode it, and it better end up as ascii. 7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pcharset = charset[0] or 'us-ascii' 7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # LookupError will be raised if the charset isn't known to 7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Python. UnicodeError will be raised if the encoded text 7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # contains a character not in the charset. 7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = unicode(charset[2], pcharset).encode('us-ascii') 7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (LookupError, UnicodeError): 7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = charset[2] 7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # charset character must be in us-ascii range 7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(charset, str): 7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = unicode(charset, 'us-ascii') 7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset = charset.encode('us-ascii') 7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except UnicodeError: 7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return failobj 7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2046, $4.1.2 says charsets are not case sensitive 7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return charset.lower() 7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_charsets(self, failobj=None): 7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list containing the charset(s) used in this message. 7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The returned list of items describes the Content-Type headers' 7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charset parameter for this message and all the subparts in its 7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm payload. 7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Each item will either be a string (the value of the charset parameter 7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm in the Content-Type header of that part) or the value of the 7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'failobj' parameter (defaults to None), if the part does not have a 7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm main MIME type of "text", or the charset is not defined. 7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The list will contain one string for each part of the message, plus 7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm one for the container message (i.e. self), so that a non-multipart 7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm message will still return a list of length 1. 7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [part.get_content_charset(failobj) for part in self.walk()] 7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # I.e. def walk(self): ... 7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from email.iterators import walk 798