1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3
4"""Classes to generate plain text from a message object tree."""
5
6__all__ = ['Generator', 'DecodedGenerator']
7
8import re
9import sys
10import time
11import random
12import warnings
13
14from cStringIO import StringIO
15from email.header import Header
16
17UNDERSCORE = '_'
18NL = '\n'
19
20fcre = re.compile(r'^From ', re.MULTILINE)
21
22def _is8bitstring(s):
23    if isinstance(s, str):
24        try:
25            unicode(s, 'us-ascii')
26        except UnicodeError:
27            return True
28    return False
29
30
31
32class Generator:
33    """Generates output from a Message object tree.
34
35    This basic generator writes the message to the given file object as plain
36    text.
37    """
38    #
39    # Public interface
40    #
41
42    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
43        """Create the generator for message flattening.
44
45        outfp is the output file-like object for writing the message to.  It
46        must have a write() method.
47
48        Optional mangle_from_ is a flag that, when True (the default), escapes
49        From_ lines in the body of the message by putting a `>' in front of
50        them.
51
52        Optional maxheaderlen specifies the longest length for a non-continued
53        header.  When a header line is longer (in characters, with tabs
54        expanded to 8 spaces) than maxheaderlen, the header will split as
55        defined in the Header class.  Set maxheaderlen to zero to disable
56        header wrapping.  The default is 78, as recommended (but not required)
57        by RFC 2822.
58        """
59        self._fp = outfp
60        self._mangle_from_ = mangle_from_
61        self._maxheaderlen = maxheaderlen
62
63    def write(self, s):
64        # Just delegate to the file object
65        self._fp.write(s)
66
67    def flatten(self, msg, unixfrom=False):
68        """Print the message object tree rooted at msg to the output file
69        specified when the Generator instance was created.
70
71        unixfrom is a flag that forces the printing of a Unix From_ delimiter
72        before the first object in the message tree.  If the original message
73        has no From_ delimiter, a `standard' one is crafted.  By default, this
74        is False to inhibit the printing of any From_ delimiter.
75
76        Note that for subobjects, no From_ line is printed.
77        """
78        if unixfrom:
79            ufrom = msg.get_unixfrom()
80            if not ufrom:
81                ufrom = 'From nobody ' + time.ctime(time.time())
82            print >> self._fp, ufrom
83        self._write(msg)
84
85    def clone(self, fp):
86        """Clone this generator with the exact same options."""
87        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
88
89    #
90    # Protected interface - undocumented ;/
91    #
92
93    def _write(self, msg):
94        # We can't write the headers yet because of the following scenario:
95        # say a multipart message includes the boundary string somewhere in
96        # its body.  We'd have to calculate the new boundary /before/ we write
97        # the headers so that we can write the correct Content-Type:
98        # parameter.
99        #
100        # The way we do this, so as to make the _handle_*() methods simpler,
101        # is to cache any subpart writes into a StringIO.  The we write the
102        # headers and the StringIO contents.  That way, subpart handlers can
103        # Do The Right Thing, and can still modify the Content-Type: header if
104        # necessary.
105        oldfp = self._fp
106        try:
107            self._fp = sfp = StringIO()
108            self._dispatch(msg)
109        finally:
110            self._fp = oldfp
111        # Write the headers.  First we see if the message object wants to
112        # handle that itself.  If not, we'll do it generically.
113        meth = getattr(msg, '_write_headers', None)
114        if meth is None:
115            self._write_headers(msg)
116        else:
117            meth(self)
118        self._fp.write(sfp.getvalue())
119
120    def _dispatch(self, msg):
121        # Get the Content-Type: for the message, then try to dispatch to
122        # self._handle_<maintype>_<subtype>().  If there's no handler for the
123        # full MIME type, then dispatch to self._handle_<maintype>().  If
124        # that's missing too, then dispatch to self._writeBody().
125        main = msg.get_content_maintype()
126        sub = msg.get_content_subtype()
127        specific = UNDERSCORE.join((main, sub)).replace('-', '_')
128        meth = getattr(self, '_handle_' + specific, None)
129        if meth is None:
130            generic = main.replace('-', '_')
131            meth = getattr(self, '_handle_' + generic, None)
132            if meth is None:
133                meth = self._writeBody
134        meth(msg)
135
136    #
137    # Default handlers
138    #
139
140    def _write_headers(self, msg):
141        for h, v in msg.items():
142            print >> self._fp, '%s:' % h,
143            if self._maxheaderlen == 0:
144                # Explicit no-wrapping
145                print >> self._fp, v
146            elif isinstance(v, Header):
147                # Header instances know what to do
148                print >> self._fp, v.encode()
149            elif _is8bitstring(v):
150                # If we have raw 8bit data in a byte string, we have no idea
151                # what the encoding is.  There is no safe way to split this
152                # string.  If it's ascii-subset, then we could do a normal
153                # ascii split, but if it's multibyte then we could break the
154                # string.  There's no way to know so the least harm seems to
155                # be to not split the string and risk it being too long.
156                print >> self._fp, v
157            else:
158                # Header's got lots of smarts, so use it.  Note that this is
159                # fundamentally broken though because we lose idempotency when
160                # the header string is continued with tabs.  It will now be
161                # continued with spaces.  This was reversedly broken before we
162                # fixed bug 1974.  Either way, we lose.
163                print >> self._fp, Header(
164                    v, maxlinelen=self._maxheaderlen, header_name=h).encode()
165        # A blank line always separates headers from body
166        print >> self._fp
167
168    #
169    # Handlers for writing types and subtypes
170    #
171
172    def _handle_text(self, msg):
173        payload = msg.get_payload()
174        if payload is None:
175            return
176        if not isinstance(payload, basestring):
177            raise TypeError('string payload expected: %s' % type(payload))
178        if self._mangle_from_:
179            payload = fcre.sub('>From ', payload)
180        self._fp.write(payload)
181
182    # Default body handler
183    _writeBody = _handle_text
184
185    def _handle_multipart(self, msg):
186        # The trick here is to write out each part separately, merge them all
187        # together, and then make sure that the boundary we've chosen isn't
188        # present in the payload.
189        msgtexts = []
190        subparts = msg.get_payload()
191        if subparts is None:
192            subparts = []
193        elif isinstance(subparts, basestring):
194            # e.g. a non-strict parse of a message with no starting boundary.
195            self._fp.write(subparts)
196            return
197        elif not isinstance(subparts, list):
198            # Scalar payload
199            subparts = [subparts]
200        for part in subparts:
201            s = StringIO()
202            g = self.clone(s)
203            g.flatten(part, unixfrom=False)
204            msgtexts.append(s.getvalue())
205        # BAW: What about boundaries that are wrapped in double-quotes?
206        boundary = msg.get_boundary()
207        if not boundary:
208            # Create a boundary that doesn't appear in any of the
209            # message texts.
210            alltext = NL.join(msgtexts)
211            boundary = _make_boundary(alltext)
212            msg.set_boundary(boundary)
213        # If there's a preamble, write it out, with a trailing CRLF
214        if msg.preamble is not None:
215            if self._mangle_from_:
216                preamble = fcre.sub('>From ', msg.preamble)
217            else:
218                preamble = msg.preamble
219            print >> self._fp, preamble
220        # dash-boundary transport-padding CRLF
221        print >> self._fp, '--' + boundary
222        # body-part
223        if msgtexts:
224            self._fp.write(msgtexts.pop(0))
225        # *encapsulation
226        # --> delimiter transport-padding
227        # --> CRLF body-part
228        for body_part in msgtexts:
229            # delimiter transport-padding CRLF
230            print >> self._fp, '\n--' + boundary
231            # body-part
232            self._fp.write(body_part)
233        # close-delimiter transport-padding
234        self._fp.write('\n--' + boundary + '--' + NL)
235        if msg.epilogue is not None:
236            if self._mangle_from_:
237                epilogue = fcre.sub('>From ', msg.epilogue)
238            else:
239                epilogue = msg.epilogue
240            self._fp.write(epilogue)
241
242    def _handle_multipart_signed(self, msg):
243        # The contents of signed parts has to stay unmodified in order to keep
244        # the signature intact per RFC1847 2.1, so we disable header wrapping.
245        # RDM: This isn't enough to completely preserve the part, but it helps.
246        old_maxheaderlen = self._maxheaderlen
247        try:
248            self._maxheaderlen = 0
249            self._handle_multipart(msg)
250        finally:
251            self._maxheaderlen = old_maxheaderlen
252
253    def _handle_message_delivery_status(self, msg):
254        # We can't just write the headers directly to self's file object
255        # because this will leave an extra newline between the last header
256        # block and the boundary.  Sigh.
257        blocks = []
258        for part in msg.get_payload():
259            s = StringIO()
260            g = self.clone(s)
261            g.flatten(part, unixfrom=False)
262            text = s.getvalue()
263            lines = text.split('\n')
264            # Strip off the unnecessary trailing empty line
265            if lines and lines[-1] == '':
266                blocks.append(NL.join(lines[:-1]))
267            else:
268                blocks.append(text)
269        # Now join all the blocks with an empty line.  This has the lovely
270        # effect of separating each block with an empty line, but not adding
271        # an extra one after the last one.
272        self._fp.write(NL.join(blocks))
273
274    def _handle_message(self, msg):
275        s = StringIO()
276        g = self.clone(s)
277        # The payload of a message/rfc822 part should be a multipart sequence
278        # of length 1.  The zeroth element of the list should be the Message
279        # object for the subpart.  Extract that object, stringify it, and
280        # write it out.
281        # Except, it turns out, when it's a string instead, which happens when
282        # and only when HeaderParser is used on a message of mime type
283        # message/rfc822.  Such messages are generated by, for example,
284        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
285        # in that case we just emit the string body.
286        payload = msg.get_payload()
287        if isinstance(payload, list):
288            g.flatten(msg.get_payload(0), unixfrom=False)
289            payload = s.getvalue()
290        self._fp.write(payload)
291
292
293
294_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
295
296class DecodedGenerator(Generator):
297    """Generates a text representation of a message.
298
299    Like the Generator base class, except that non-text parts are substituted
300    with a format string representing the part.
301    """
302    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
303        """Like Generator.__init__() except that an additional optional
304        argument is allowed.
305
306        Walks through all subparts of a message.  If the subpart is of main
307        type `text', then it prints the decoded payload of the subpart.
308
309        Otherwise, fmt is a format string that is used instead of the message
310        payload.  fmt is expanded with the following keywords (in
311        %(keyword)s format):
312
313        type       : Full MIME type of the non-text part
314        maintype   : Main MIME type of the non-text part
315        subtype    : Sub-MIME type of the non-text part
316        filename   : Filename of the non-text part
317        description: Description associated with the non-text part
318        encoding   : Content transfer encoding of the non-text part
319
320        The default value for fmt is None, meaning
321
322        [Non-text (%(type)s) part of message omitted, filename %(filename)s]
323        """
324        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
325        if fmt is None:
326            self._fmt = _FMT
327        else:
328            self._fmt = fmt
329
330    def _dispatch(self, msg):
331        for part in msg.walk():
332            maintype = part.get_content_maintype()
333            if maintype == 'text':
334                print >> self, part.get_payload(decode=True)
335            elif maintype == 'multipart':
336                # Just skip this
337                pass
338            else:
339                print >> self, self._fmt % {
340                    'type'       : part.get_content_type(),
341                    'maintype'   : part.get_content_maintype(),
342                    'subtype'    : part.get_content_subtype(),
343                    'filename'   : part.get_filename('[no filename]'),
344                    'description': part.get('Content-Description',
345                                            '[no description]'),
346                    'encoding'   : part.get('Content-Transfer-Encoding',
347                                            '[no encoding]'),
348                    }
349
350
351
352# Helper
353_width = len(repr(sys.maxint-1))
354_fmt = '%%0%dd' % _width
355
356def _make_boundary(text=None):
357    # Craft a random boundary.  If text is given, ensure that the chosen
358    # boundary doesn't appear in the text.
359    token = random.randrange(sys.maxint)
360    boundary = ('=' * 15) + (_fmt % token) + '=='
361    if text is None:
362        return boundary
363    b = boundary
364    counter = 0
365    while True:
366        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
367        if not cre.search(text):
368            break
369        b = boundary + '.' + str(counter)
370        counter += 1
371    return b
372