1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3
4"""Classes to generate plain text from a message object tree."""
5
6__all__ = ['Generator', 'DecodedGenerator']
7
8import re
9import sys
10import time
11import random
12import warnings
13
14from cStringIO import StringIO
15from email.header import Header
16
17UNDERSCORE = '_'
18NL = '\n'
19
20fcre = re.compile(r'^From ', re.MULTILINE)
21
22def _is8bitstring(s):
23    if isinstance(s, str):
24        try:
25            unicode(s, 'us-ascii')
26        except UnicodeError:
27            return True
28    return False
29
30
31
32class Generator:
33    """Generates output from a Message object tree.
34
35    This basic generator writes the message to the given file object as plain
36    text.
37    """
38    #
39    # Public interface
40    #
41
42    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
43        """Create the generator for message flattening.
44
45        outfp is the output file-like object for writing the message to.  It
46        must have a write() method.
47
48        Optional mangle_from_ is a flag that, when True (the default), escapes
49        From_ lines in the body of the message by putting a `>' in front of
50        them.
51
52        Optional maxheaderlen specifies the longest length for a non-continued
53        header.  When a header line is longer (in characters, with tabs
54        expanded to 8 spaces) than maxheaderlen, the header will split as
55        defined in the Header class.  Set maxheaderlen to zero to disable
56        header wrapping.  The default is 78, as recommended (but not required)
57        by RFC 2822.
58        """
59        self._fp = outfp
60        self._mangle_from_ = mangle_from_
61        self._maxheaderlen = maxheaderlen
62
63    def write(self, s):
64        # Just delegate to the file object
65        self._fp.write(s)
66
67    def flatten(self, msg, unixfrom=False):
68        """Print the message object tree rooted at msg to the output file
69        specified when the Generator instance was created.
70
71        unixfrom is a flag that forces the printing of a Unix From_ delimiter
72        before the first object in the message tree.  If the original message
73        has no From_ delimiter, a `standard' one is crafted.  By default, this
74        is False to inhibit the printing of any From_ delimiter.
75
76        Note that for subobjects, no From_ line is printed.
77        """
78        if unixfrom:
79            ufrom = msg.get_unixfrom()
80            if not ufrom:
81                ufrom = 'From nobody ' + time.ctime(time.time())
82            print >> self._fp, ufrom
83        self._write(msg)
84
85    def clone(self, fp):
86        """Clone this generator with the exact same options."""
87        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
88
89    #
90    # Protected interface - undocumented ;/
91    #
92
93    def _write(self, msg):
94        # We can't write the headers yet because of the following scenario:
95        # say a multipart message includes the boundary string somewhere in
96        # its body.  We'd have to calculate the new boundary /before/ we write
97        # the headers so that we can write the correct Content-Type:
98        # parameter.
99        #
100        # The way we do this, so as to make the _handle_*() methods simpler,
101        # is to cache any subpart writes into a StringIO.  The we write the
102        # headers and the StringIO contents.  That way, subpart handlers can
103        # Do The Right Thing, and can still modify the Content-Type: header if
104        # necessary.
105        oldfp = self._fp
106        try:
107            self._fp = sfp = StringIO()
108            self._dispatch(msg)
109        finally:
110            self._fp = oldfp
111        # Write the headers.  First we see if the message object wants to
112        # handle that itself.  If not, we'll do it generically.
113        meth = getattr(msg, '_write_headers', None)
114        if meth is None:
115            self._write_headers(msg)
116        else:
117            meth(self)
118        self._fp.write(sfp.getvalue())
119
120    def _dispatch(self, msg):
121        # Get the Content-Type: for the message, then try to dispatch to
122        # self._handle_<maintype>_<subtype>().  If there's no handler for the
123        # full MIME type, then dispatch to self._handle_<maintype>().  If
124        # that's missing too, then dispatch to self._writeBody().
125        main = msg.get_content_maintype()
126        sub = msg.get_content_subtype()
127        specific = UNDERSCORE.join((main, sub)).replace('-', '_')
128        meth = getattr(self, '_handle_' + specific, None)
129        if meth is None:
130            generic = main.replace('-', '_')
131            meth = getattr(self, '_handle_' + generic, None)
132            if meth is None:
133                meth = self._writeBody
134        meth(msg)
135
136    #
137    # Default handlers
138    #
139
140    def _write_headers(self, msg):
141        for h, v in msg.items():
142            print >> self._fp, '%s:' % h,
143            if self._maxheaderlen == 0:
144                # Explicit no-wrapping
145                print >> self._fp, v
146            elif isinstance(v, Header):
147                # Header instances know what to do
148                print >> self._fp, v.encode()
149            elif _is8bitstring(v):
150                # If we have raw 8bit data in a byte string, we have no idea
151                # what the encoding is.  There is no safe way to split this
152                # string.  If it's ascii-subset, then we could do a normal
153                # ascii split, but if it's multibyte then we could break the
154                # string.  There's no way to know so the least harm seems to
155                # be to not split the string and risk it being too long.
156                print >> self._fp, v
157            else:
158                # Header's got lots of smarts, so use it.  Note that this is
159                # fundamentally broken though because we lose idempotency when
160                # the header string is continued with tabs.  It will now be
161                # continued with spaces.  This was reversedly broken before we
162                # fixed bug 1974.  Either way, we lose.
163                print >> self._fp, Header(
164                    v, maxlinelen=self._maxheaderlen, header_name=h).encode()
165        # A blank line always separates headers from body
166        print >> self._fp
167
168    #
169    # Handlers for writing types and subtypes
170    #
171
172    def _handle_text(self, msg):
173        payload = msg.get_payload()
174        if payload is None:
175            return
176        if not isinstance(payload, basestring):
177            raise TypeError('string payload expected: %s' % type(payload))
178        if self._mangle_from_:
179            payload = fcre.sub('>From ', payload)
180        self._fp.write(payload)
181
182    # Default body handler
183    _writeBody = _handle_text
184
185    def _handle_multipart(self, msg):
186        # The trick here is to write out each part separately, merge them all
187        # together, and then make sure that the boundary we've chosen isn't
188        # present in the payload.
189        msgtexts = []
190        subparts = msg.get_payload()
191        if subparts is None:
192            subparts = []
193        elif isinstance(subparts, basestring):
194            # e.g. a non-strict parse of a message with no starting boundary.
195            self._fp.write(subparts)
196            return
197        elif not isinstance(subparts, list):
198            # Scalar payload
199            subparts = [subparts]
200        for part in subparts:
201            s = StringIO()
202            g = self.clone(s)
203            g.flatten(part, unixfrom=False)
204            msgtexts.append(s.getvalue())
205        # BAW: What about boundaries that are wrapped in double-quotes?
206        boundary = msg.get_boundary()
207        if not boundary:
208            # Create a boundary that doesn't appear in any of the
209            # message texts.
210            alltext = NL.join(msgtexts)
211            boundary = _make_boundary(alltext)
212            msg.set_boundary(boundary)
213        # If there's a preamble, write it out, with a trailing CRLF
214        if msg.preamble is not None:
215            if self._mangle_from_:
216                preamble = fcre.sub('>From ', msg.preamble)
217            else:
218                preamble = msg.preamble
219            print >> self._fp, preamble
220        # dash-boundary transport-padding CRLF
221        print >> self._fp, '--' + boundary
222        # body-part
223        if msgtexts:
224            self._fp.write(msgtexts.pop(0))
225        # *encapsulation
226        # --> delimiter transport-padding
227        # --> CRLF body-part
228        for body_part in msgtexts:
229            # delimiter transport-padding CRLF
230            print >> self._fp, '\n--' + boundary
231            # body-part
232            self._fp.write(body_part)
233        # close-delimiter transport-padding
234        self._fp.write('\n--' + boundary + '--')
235        if msg.epilogue is not None:
236            print >> self._fp
237            if self._mangle_from_:
238                epilogue = fcre.sub('>From ', msg.epilogue)
239            else:
240                epilogue = msg.epilogue
241            self._fp.write(epilogue)
242
243    def _handle_multipart_signed(self, msg):
244        # The contents of signed parts has to stay unmodified in order to keep
245        # the signature intact per RFC1847 2.1, so we disable header wrapping.
246        # RDM: This isn't enough to completely preserve the part, but it helps.
247        old_maxheaderlen = self._maxheaderlen
248        try:
249            self._maxheaderlen = 0
250            self._handle_multipart(msg)
251        finally:
252            self._maxheaderlen = old_maxheaderlen
253
254    def _handle_message_delivery_status(self, msg):
255        # We can't just write the headers directly to self's file object
256        # because this will leave an extra newline between the last header
257        # block and the boundary.  Sigh.
258        blocks = []
259        for part in msg.get_payload():
260            s = StringIO()
261            g = self.clone(s)
262            g.flatten(part, unixfrom=False)
263            text = s.getvalue()
264            lines = text.split('\n')
265            # Strip off the unnecessary trailing empty line
266            if lines and lines[-1] == '':
267                blocks.append(NL.join(lines[:-1]))
268            else:
269                blocks.append(text)
270        # Now join all the blocks with an empty line.  This has the lovely
271        # effect of separating each block with an empty line, but not adding
272        # an extra one after the last one.
273        self._fp.write(NL.join(blocks))
274
275    def _handle_message(self, msg):
276        s = StringIO()
277        g = self.clone(s)
278        # The payload of a message/rfc822 part should be a multipart sequence
279        # of length 1.  The zeroth element of the list should be the Message
280        # object for the subpart.  Extract that object, stringify it, and
281        # write it out.
282        # Except, it turns out, when it's a string instead, which happens when
283        # and only when HeaderParser is used on a message of mime type
284        # message/rfc822.  Such messages are generated by, for example,
285        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
286        # in that case we just emit the string body.
287        payload = msg.get_payload()
288        if isinstance(payload, list):
289            g.flatten(msg.get_payload(0), unixfrom=False)
290            payload = s.getvalue()
291        self._fp.write(payload)
292
293
294
295_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
296
297class DecodedGenerator(Generator):
298    """Generates a text representation of a message.
299
300    Like the Generator base class, except that non-text parts are substituted
301    with a format string representing the part.
302    """
303    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
304        """Like Generator.__init__() except that an additional optional
305        argument is allowed.
306
307        Walks through all subparts of a message.  If the subpart is of main
308        type `text', then it prints the decoded payload of the subpart.
309
310        Otherwise, fmt is a format string that is used instead of the message
311        payload.  fmt is expanded with the following keywords (in
312        %(keyword)s format):
313
314        type       : Full MIME type of the non-text part
315        maintype   : Main MIME type of the non-text part
316        subtype    : Sub-MIME type of the non-text part
317        filename   : Filename of the non-text part
318        description: Description associated with the non-text part
319        encoding   : Content transfer encoding of the non-text part
320
321        The default value for fmt is None, meaning
322
323        [Non-text (%(type)s) part of message omitted, filename %(filename)s]
324        """
325        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
326        if fmt is None:
327            self._fmt = _FMT
328        else:
329            self._fmt = fmt
330
331    def _dispatch(self, msg):
332        for part in msg.walk():
333            maintype = part.get_content_maintype()
334            if maintype == 'text':
335                print >> self, part.get_payload(decode=True)
336            elif maintype == 'multipart':
337                # Just skip this
338                pass
339            else:
340                print >> self, self._fmt % {
341                    'type'       : part.get_content_type(),
342                    'maintype'   : part.get_content_maintype(),
343                    'subtype'    : part.get_content_subtype(),
344                    'filename'   : part.get_filename('[no filename]'),
345                    'description': part.get('Content-Description',
346                                            '[no description]'),
347                    'encoding'   : part.get('Content-Transfer-Encoding',
348                                            '[no encoding]'),
349                    }
350
351
352
353# Helper
354_width = len(repr(sys.maxint-1))
355_fmt = '%%0%dd' % _width
356
357def _make_boundary(text=None):
358    # Craft a random boundary.  If text is given, ensure that the chosen
359    # boundary doesn't appear in the text.
360    token = random.randrange(sys.maxint)
361    boundary = ('=' * 15) + (_fmt % token) + '=='
362    if text is None:
363        return boundary
364    b = boundary
365    counter = 0
366    while True:
367        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
368        if not cre.search(text):
369            break
370        b = boundary + '.' + str(counter)
371        counter += 1
372    return b
373