base64mime.py revision 2cc1f6d95b11f017f04921ea33a5cfc369e32e97
1# Copyright (C) 2002-2007 Python Software Foundation 2# Author: Ben Gertzfield 3# Contact: email-sig@python.org 4 5"""Base64 content transfer encoding per RFCs 2045-2047. 6 7This module handles the content transfer encoding method defined in RFC 2045 8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit 9characters encoding known as Base64. 10 11It is used in the MIME standards for email to attach images, audio, and text 12using some 8-bit character sets to messages. 13 14This module provides an interface to encode and decode both headers and bodies 15with Base64 encoding. 16 17RFC 2045 defines a method for including character set information in an 18`encoded-word' in a header. This method is commonly used for 8-bit real names 19in To:, From:, Cc:, etc. fields, as well as Subject: lines. 20 21This module does not do the line wrapping or end-of-line character conversion 22necessary for proper internationalized headers; it only does dumb encoding and 23decoding. To deal with the various line wrapping issues, use the email.Header 24module. 25""" 26 27__all__ = [ 28 'body_decode', 29 'body_encode', 30 'decode', 31 'decodestring', 32 'encode', 33 'encodestring', 34 'header_encode', 35 'header_length', 36 ] 37 38import re 39 40from base64 import b64encode 41from binascii import b2a_base64, a2b_base64 42 43CRLF = '\r\n' 44NL = '\n' 45EMPTYSTRING = '' 46 47# See also Charset.py 48MISC_LEN = 7 49 50 51 52# Helpers 53def header_length(bytearray): 54 """Return the length of s when it is encoded with base64.""" 55 groups_of_3, leftover = divmod(len(bytearray), 3) 56 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 57 n = groups_of_3 * 4 58 if leftover: 59 n += 4 60 return n 61 62 63 64def header_encode(header_bytes, charset='iso-8859-1'): 65 """Encode a single header line with Base64 encoding in a given charset. 66 67 charset names the character set to use to encode the header. It defaults 68 to iso-8859-1. Base64 encoding is defined in RFC 2045. 69 """ 70 # Return empty headers unchanged 71 if not header_bytes: 72 return str(header_bytes) 73 encoded = b64encode(header_bytes) 74 return '=?%s?b?%s?=' % (charset, encoded) 75 76 77 78def body_encode(s, maxlinelen=76, eol=NL): 79 """Encode a string with base64. 80 81 Each line will be wrapped at, at most, maxlinelen characters (defaults to 82 76 characters). 83 84 Each line of encoded text will end with eol, which defaults to "\\n". Set 85 this to "\r\n" if you will be using the result of this function directly 86 in an email. 87 """ 88 if not s: 89 return s 90 91 encvec = [] 92 max_unencoded = maxlinelen * 3 // 4 93 for i in range(0, len(s), max_unencoded): 94 # BAW: should encode() inherit b2a_base64()'s dubious behavior in 95 # adding a newline to the encoded string? 96 enc = str(b2a_base64(s[i:i + max_unencoded])) 97 if enc.endswith(NL) and eol != NL: 98 enc = enc[:-1] + eol 99 encvec.append(enc) 100 return EMPTYSTRING.join(encvec) 101 102 103 104def decode(string): 105 """Decode a raw base64 string, returning a bytes object. 106 107 This function does not parse a full MIME header value encoded with 108 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high 109 level email.Header class for that functionality. 110 """ 111 if not string: 112 return bytes() 113 elif isinstance(string, str): 114 return a2b_base64(string.encode('raw-unicode-escape')) 115 else: 116 return a2b_base64(s) 117 118 119# For convenience and backwards compatibility w/ standard base64 module 120body_decode = decode 121decodestring = decode 122