1# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header.  This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding.  To deal with the various line wrapping issues, use the email.header
24module.
25"""
26
27__all__ = [
28    'body_decode',
29    'body_encode',
30    'decode',
31    'decodestring',
32    'header_encode',
33    'header_length',
34    ]
35
36
37from base64 import b64encode
38from binascii import b2a_base64, a2b_base64
39
40CRLF = '\r\n'
41NL = '\n'
42EMPTYSTRING = ''
43
44# See also Charset.py
45MISC_LEN = 7
46
47
48
49# Helpers
50def header_length(bytearray):
51    """Return the length of s when it is encoded with base64."""
52    groups_of_3, leftover = divmod(len(bytearray), 3)
53    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
54    n = groups_of_3 * 4
55    if leftover:
56        n += 4
57    return n
58
59
60
61def header_encode(header_bytes, charset='iso-8859-1'):
62    """Encode a single header line with Base64 encoding in a given charset.
63
64    charset names the character set to use to encode the header.  It defaults
65    to iso-8859-1.  Base64 encoding is defined in RFC 2045.
66    """
67    if not header_bytes:
68        return ""
69    if isinstance(header_bytes, str):
70        header_bytes = header_bytes.encode(charset)
71    encoded = b64encode(header_bytes).decode("ascii")
72    return '=?%s?b?%s?=' % (charset, encoded)
73
74
75
76def body_encode(s, maxlinelen=76, eol=NL):
77    r"""Encode a string with base64.
78
79    Each line will be wrapped at, at most, maxlinelen characters (defaults to
80    76 characters).
81
82    Each line of encoded text will end with eol, which defaults to "\n".  Set
83    this to "\r\n" if you will be using the result of this function directly
84    in an email.
85    """
86    if not s:
87        return s
88
89    encvec = []
90    max_unencoded = maxlinelen * 3 // 4
91    for i in range(0, len(s), max_unencoded):
92        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
93        # adding a newline to the encoded string?
94        enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
95        if enc.endswith(NL) and eol != NL:
96            enc = enc[:-1] + eol
97        encvec.append(enc)
98    return EMPTYSTRING.join(encvec)
99
100
101
102def decode(string):
103    """Decode a raw base64 string, returning a bytes object.
104
105    This function does not parse a full MIME header value encoded with
106    base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
107    level email.header class for that functionality.
108    """
109    if not string:
110        return bytes()
111    elif isinstance(string, str):
112        return a2b_base64(string.encode('raw-unicode-escape'))
113    else:
114        return a2b_base64(string)
115
116
117# For convenience and backwards compatibility w/ standard base64 module
118body_decode = decode
119decodestring = decode
120