183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (C) 2001-2006 Python Software Foundation
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Author: Ben Gertzfield, Barry Warsaw
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Contact: email-sig@python.org
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh__all__ = [
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'Charset',
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'add_alias',
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'add_charset',
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'add_codec',
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    ]
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport codecs
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport email.base64mime
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport email.quoprimime
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom email import errors
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom email.encoders import encode_7or8bit
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Flags for types of header encodings
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehQP          = 1 # Quoted-Printable
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBASE64      = 2 # Base64
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSHORTEST    = 3 # the shorter of QP and base64, but only for headers
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehMISC_LEN = 7
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehDEFAULT_CHARSET = 'us-ascii'
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Defaults
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehCHARSETS = {
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # input        header enc  body enc output conv
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-1':  (QP,        QP,      None),
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-2':  (QP,        QP,      None),
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-3':  (QP,        QP,      None),
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-4':  (QP,        QP,      None),
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # iso-8859-5 is Cyrillic, and not especially used
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # iso-8859-6 is Arabic, also not particularly used
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # iso-8859-7 is Greek, QP will not make it readable
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # iso-8859-8 is Hebrew, QP will not make it readable
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-9':  (QP,        QP,      None),
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-10': (QP,        QP,      None),
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # iso-8859-11 is Thai, QP will not make it readable
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-13': (QP,        QP,      None),
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-14': (QP,        QP,      None),
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-15': (QP,        QP,      None),
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-8859-16': (QP,        QP,      None),
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'windows-1252':(QP,        QP,      None),
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'viscii':      (QP,        QP,      None),
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'us-ascii':    (None,      None,    None),
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'big5':        (BASE64,    BASE64,  None),
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'gb2312':      (BASE64,    BASE64,  None),
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'euc-jp':      (BASE64,    None,    'iso-2022-jp'),
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'shift_jis':   (BASE64,    None,    'iso-2022-jp'),
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'iso-2022-jp': (BASE64,    None,    None),
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'koi8-r':      (BASE64,    BASE64,  None),
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'utf-8':       (SHORTEST,  BASE64, 'utf-8'),
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # We're making this one up to represent raw unencoded 8-bit
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    '8bit':        (None,      BASE64, 'utf-8'),
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    }
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Aliases for other commonly-used names for character sets.  Map
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# them to the real ones used in email.
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehALIASES = {
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_1': 'iso-8859-1',
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-1': 'iso-8859-1',
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_2': 'iso-8859-2',
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-2': 'iso-8859-2',
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_3': 'iso-8859-3',
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-3': 'iso-8859-3',
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_4': 'iso-8859-4',
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-4': 'iso-8859-4',
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_5': 'iso-8859-9',
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-5': 'iso-8859-9',
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_6': 'iso-8859-10',
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-6': 'iso-8859-10',
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_7': 'iso-8859-13',
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-7': 'iso-8859-13',
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_8': 'iso-8859-14',
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-8': 'iso-8859-14',
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_9': 'iso-8859-15',
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-9': 'iso-8859-15',
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin_10':'iso-8859-16',
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'latin-10':'iso-8859-16',
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'cp949':   'ks_c_5601-1987',
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'euc_jp':  'euc-jp',
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'euc_kr':  'euc-kr',
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'ascii':   'us-ascii',
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    }
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Map charsets to their Unicode codec strings.
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehCODEC_MAP = {
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'gb2312':      'eucgb2312_cn',
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'big5':        'big5_tw',
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # Let that stuff pass through without conversion to/from Unicode.
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    'us-ascii':    None,
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    }
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Convenience functions for extending the above mappings
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Add character set properties to the global registry.
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    charset is the input character set, and must be the canonical name of a
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    character set.
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Optional header_enc and body_enc is either Charset.QP for
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    the shortest of qp or base64 encoding, or None for no encoding.  SHORTEST
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    is only valid for header_enc.  It describes how message headers and
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    message bodies in the input charset are to be encoded.  Default is no
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    encoding.
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Optional output_charset is the character set that the output should be
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    in.  Conversions will proceed from input charset, to Unicode, to the
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    output charset when the method Charset.convert() is called.  The default
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    is to output in the same character set as the input.
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Both input_charset and output_charset must have Unicode codec entries in
12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    the module's charset-to-codec mapping; use add_codec(charset, codecname)
12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    to add codecs the module does not know about.  See the codecs module's
12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    documentation for more information.
13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if body_enc == SHORTEST:
13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        raise ValueError('SHORTEST not allowed for body_enc')
13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    CHARSETS[charset] = (header_enc, body_enc, output_charset)
13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_alias(alias, canonical):
13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Add a character set alias.
13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    alias is the alias name, e.g. latin-1
14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    canonical is the character set's canonical name, e.g. iso-8859-1
14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    ALIASES[alias] = canonical
14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_codec(charset, codecname):
14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Add a codec that map characters in the given charset to/from Unicode.
14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    charset is the canonical name of a character set.  codecname is the name
14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    of a Python codec, as appropriate for the second argument to the unicode()
15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    built-in, or to the encode() method of a Unicode string.
15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    CODEC_MAP[charset] = codecname
15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass Charset:
15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """Map character sets to their email properties.
15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    This class provides information about the requirements imposed on email
16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    for a specific character set.  It also provides convenience routines for
16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    converting between character sets, given the availability of the
16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    applicable codecs.  Given a character set, it will do its best to provide
16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    information on how to use that character set in an email in an
16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    RFC-compliant way.
16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Certain character sets must be encoded with quoted-printable or base64
16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    when used in email headers or bodies.  Certain character sets must be
16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    converted outright, and are not allowed in email.  Instances of this
16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    module expose the following information about a character set:
17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    input_charset: The initial character set specified.  Common aliases
17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                   are converted to their `official' email names (e.g. latin_1
17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                   is converted to iso-8859-1).  Defaults to 7-bit us-ascii.
17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    header_encoding: If the character set must be encoded before it can be
17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                     used in an email header, this attribute will be set to
17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                     Charset.QP (for quoted-printable), Charset.BASE64 (for
17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                     base64 encoding), or Charset.SHORTEST for the shortest of
17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                     QP or BASE64 encoding.  Otherwise, it will be None.
18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    body_encoding: Same as header_encoding, but describes the encoding for the
18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                   mail message's body, which indeed may be different than the
18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                   header encoding.  Charset.SHORTEST is not allowed for
18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                   body_encoding.
18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    output_charset: Some character sets must be converted before the can be
18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    used in email headers or bodies.  If the input_charset is
18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    one of them, this attribute will contain the name of the
18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    charset output will be converted to.  Otherwise, it will
19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    be None.
19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    input_codec: The name of the Python codec used to convert the
19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 input_charset to Unicode.  If no conversion codec is
19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 necessary, this attribute will be None.
19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    output_codec: The name of the Python codec used to convert Unicode
19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                  to the output_charset.  If no conversion codec is necessary,
19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                  this attribute will have the same value as the input_codec.
19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __init__(self, input_charset=DEFAULT_CHARSET):
20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # RFC 2046, $4.1.2 says charsets are not case sensitive.  We coerce to
20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # unicode because its .lower() is locale insensitive.  If the argument
20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # is already a unicode, we leave it at that, but ensure that the
20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # charset is ASCII, as the standard (RFC XXX) requires.
20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if isinstance(input_charset, unicode):
20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                input_charset.encode('ascii')
20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                input_charset = unicode(input_charset, 'ascii')
21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except UnicodeError:
21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            raise errors.CharsetError(input_charset)
21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        input_charset = input_charset.lower().encode('ascii')
21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Set the input charset after filtering through the aliases and/or codecs
21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if not (input_charset in ALIASES or input_charset in CHARSETS):
21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            try:
21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                input_charset = codecs.lookup(input_charset).name
21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            except LookupError:
21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                pass
21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.input_charset = ALIASES.get(input_charset, input_charset)
22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # We can try to guess which encoding and conversion to use by the
22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # charset_map dictionary.  Try that first, but let the user override
22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # it.
22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        henc, benc, conv = CHARSETS.get(self.input_charset,
22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                                        (SHORTEST, BASE64, None))
22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if not conv:
22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            conv = self.input_charset
22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Set the attributes, allowing the arguments to override the default.
22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.header_encoding = henc
22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.body_encoding = benc
23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.output_charset = ALIASES.get(conv, conv)
23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Now set the codecs.  If one isn't defined for input_charset,
23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # guess and try a Unicode codec with the same name as input_codec.
23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.input_codec = CODEC_MAP.get(self.input_charset,
23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                                         self.input_charset)
23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.output_codec = CODEC_MAP.get(self.output_charset,
23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                                          self.output_charset)
23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __str__(self):
23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return self.input_charset.lower()
24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    __repr__ = __str__
24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __eq__(self, other):
24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return str(self) == str(other).lower()
24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __ne__(self, other):
24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return not self.__eq__(other)
24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def get_body_encoding(self):
25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Return the content-transfer-encoding used for body encoding.
25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        This is either the string `quoted-printable' or `base64' depending on
25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        the encoding used, or it is a function in which case you should call
25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        the function with a single argument, the Message object being
25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        encoded.  The function should then set the Content-Transfer-Encoding
25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        header itself to whatever is appropriate.
25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Returns "quoted-printable" if self.body_encoding is QP.
25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Returns "base64" if self.body_encoding is BASE64.
26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Returns "7bit" otherwise.
26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        assert self.body_encoding != SHORTEST
26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.body_encoding == QP:
26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 'quoted-printable'
26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.body_encoding == BASE64:
26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 'base64'
26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return encode_7or8bit
26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def convert(self, s):
27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Convert a string from the input_codec to the output_codec."""
27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.input_codec != self.output_codec:
27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return unicode(s, self.input_codec).encode(self.output_codec)
27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return s
27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def to_splittable(self, s):
27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Convert a possibly multibyte string to a safely splittable format.
27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Uses the input_codec to try and convert the string to Unicode, so it
28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        can be safely split on character boundaries (even for multibyte
28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        characters).
28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Returns the string as-is if it isn't known how to convert it to
28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Unicode with the input_charset.
28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Characters that could not be converted to Unicode will be replaced
28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        with the Unicode replacement character U+FFFD.
28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if isinstance(s, unicode) or self.input_codec is None:
29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return s
29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return unicode(s, self.input_codec, 'replace')
29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except LookupError:
29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # Input codec not installed on system, so return the original
29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # string unchanged.
29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return s
29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def from_splittable(self, ustr, to_output=True):
30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Convert a splittable string back into an encoded string.
30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Uses the proper codec to try and convert the string from Unicode back
30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        into an encoded format.  Return the string as-is if it is not Unicode,
30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        or if it could not be converted from Unicode.
30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        Characters that could not be converted from Unicode will be replaced
30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        with an appropriate character (usually '?').
30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        If to_output is True (the default), uses output_codec to convert to an
31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        encoded format.  If to_output is False, uses input_codec.
31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if to_output:
31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            codec = self.output_codec
31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            codec = self.input_codec
31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if not isinstance(ustr, unicode) or codec is None:
31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return ustr
31883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
31983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return ustr.encode(codec, 'replace')
32083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except LookupError:
32183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # Output codec not installed
32283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return ustr
32383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
32483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def get_output_charset(self):
32583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Return the output character set.
32683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
32783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        This is self.output_charset if that is not None, otherwise it is
32883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.input_charset.
32983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
33083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return self.output_charset or self.input_charset
33183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
33283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def encoded_header_len(self, s):
33383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Return the length of the encoded header string."""
33483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        cset = self.get_output_charset()
33583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # The len(s) of a 7bit encoding is len(s)
33683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.header_encoding == BASE64:
33783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.base64mime.base64_len(s) + len(cset) + MISC_LEN
33883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.header_encoding == QP:
33983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN
34083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.header_encoding == SHORTEST:
34183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            lenb64 = email.base64mime.base64_len(s)
34283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            lenqp = email.quoprimime.header_quopri_len(s)
34383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return min(lenb64, lenqp) + len(cset) + MISC_LEN
34483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
34583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return len(s)
34683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
34783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def header_encode(self, s, convert=False):
34883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Header-encode a string, optionally converting it to output_charset.
34983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
35083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        If convert is True, the string will be converted from the input
35183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        charset to the output charset automatically.  This is not useful for
35283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        multibyte character sets, which have line length issues (multibyte
35383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        characters must be split on a character, not a byte boundary); use the
35483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        high-level Header class to deal with these issues.  convert defaults
35583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        to False.
35683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
35783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        The type of encoding (base64 or quoted-printable) will be based on
35883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.header_encoding.
35983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
36083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        cset = self.get_output_charset()
36183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if convert:
36283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            s = self.convert(s)
36383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # 7bit/8bit encodings return the string unchanged (modulo conversions)
36483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.header_encoding == BASE64:
36583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.base64mime.header_encode(s, cset)
36683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.header_encoding == QP:
36783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.quoprimime.header_encode(s, cset, maxlinelen=None)
36883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.header_encoding == SHORTEST:
36983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            lenb64 = email.base64mime.base64_len(s)
37083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            lenqp = email.quoprimime.header_quopri_len(s)
37183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if lenb64 < lenqp:
37283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                return email.base64mime.header_encode(s, cset)
37383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
37483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                return email.quoprimime.header_encode(s, cset, maxlinelen=None)
37583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
37683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return s
37783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
37883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def body_encode(self, s, convert=True):
37983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """Body-encode a string and convert it to output_charset.
38083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
38183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        If convert is True (the default), the string will be converted from
38283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        the input charset to output charset automatically.  Unlike
38383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        header_encode(), there are no issues with byte boundaries and
38483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        multibyte charsets in email bodies, so this is usually pretty safe.
38583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
38683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        The type of encoding (base64 or quoted-printable) will be based on
38783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.body_encoding.
38883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
38983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if convert:
39083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            s = self.convert(s)
39183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # 7bit/8bit encodings return the string unchanged (module conversions)
39283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.body_encoding is BASE64:
39383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.base64mime.body_encode(s)
39483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif self.body_encoding is QP:
39583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return email.quoprimime.body_encode(s)
39683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
39783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return s
398