1"""RFC 2822 message manipulation.
2
3Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822.  This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it).  Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed.  Consider any RFC 2822 non-conformance to be a bug.
10
11    RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12    RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
13
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
17
18  fp = open(file, 'r')
19
20You can use any other legal way of getting an open file object, e.g. use
21sys.stdin or call os.popen().  Then pass the open file object to the Message()
22constructor:
23
24  m = Message(fp)
25
26This class can work with any input object that supports a readline method.  If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream.  If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines.  Thus this class
31can be used to parse messages coming from a buffered stream.
32
33The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work.  For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a file object created from a socket object.  If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400.  For other nonzero values of seekable, this test is not made.
41
42To get the text of a particular header there are several methods:
43
44  str = m.getheader(name)
45  str = m.getrawheader(name)
46
47where name is the name of the header, e.g. 'Subject'.  The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't.  Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
52
53For addresses and address lists there are functions
54
55  realname, mailaddress = m.getaddr(name)
56  list = m.getaddrlist(name)
57
58where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
61
62  time = m.getdate(name)
63
64which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
72# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73
74import time
75
76from warnings import warnpy3k
77warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
78         stacklevel=2)
79
80__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
81
82_blanklines = ('\r\n', '\n')            # Optimization for islast()
83
84
85class Message:
86    """Represents a single RFC 2822-compliant message."""
87
88    def __init__(self, fp, seekable = 1):
89        """Initialize the class instance and read the headers."""
90        if seekable == 1:
91            # Exercise tell() to make sure it works
92            # (and then assume seek() works, too)
93            try:
94                fp.tell()
95            except (AttributeError, IOError):
96                seekable = 0
97        self.fp = fp
98        self.seekable = seekable
99        self.startofheaders = None
100        self.startofbody = None
101        #
102        if self.seekable:
103            try:
104                self.startofheaders = self.fp.tell()
105            except IOError:
106                self.seekable = 0
107        #
108        self.readheaders()
109        #
110        if self.seekable:
111            try:
112                self.startofbody = self.fp.tell()
113            except IOError:
114                self.seekable = 0
115
116    def rewindbody(self):
117        """Rewind the file to the start of the body (if seekable)."""
118        if not self.seekable:
119            raise IOError, "unseekable file"
120        self.fp.seek(self.startofbody)
121
122    def readheaders(self):
123        """Read header lines.
124
125        Read header lines up to the entirely blank line that terminates them.
126        The (normally blank) line that ends the headers is skipped, but not
127        included in the returned list.  If a non-header line ends the headers,
128        (which is an error), an attempt is made to backspace over it; it is
129        never included in the returned list.
130
131        The variable self.status is set to the empty string if all went well,
132        otherwise it is an error message.  The variable self.headers is a
133        completely uninterpreted list of lines contained in the header (so
134        printing them will reproduce the header exactly as it appears in the
135        file).
136        """
137        self.dict = {}
138        self.unixfrom = ''
139        self.headers = lst = []
140        self.status = ''
141        headerseen = ""
142        firstline = 1
143        startofline = unread = tell = None
144        if hasattr(self.fp, 'unread'):
145            unread = self.fp.unread
146        elif self.seekable:
147            tell = self.fp.tell
148        while 1:
149            if tell:
150                try:
151                    startofline = tell()
152                except IOError:
153                    startofline = tell = None
154                    self.seekable = 0
155            line = self.fp.readline()
156            if not line:
157                self.status = 'EOF in headers'
158                break
159            # Skip unix From name time lines
160            if firstline and line.startswith('From '):
161                self.unixfrom = self.unixfrom + line
162                continue
163            firstline = 0
164            if headerseen and line[0] in ' \t':
165                # It's a continuation line.
166                lst.append(line)
167                x = (self.dict[headerseen] + "\n " + line.strip())
168                self.dict[headerseen] = x.strip()
169                continue
170            elif self.iscomment(line):
171                # It's a comment.  Ignore it.
172                continue
173            elif self.islast(line):
174                # Note! No pushback here!  The delimiter line gets eaten.
175                break
176            headerseen = self.isheader(line)
177            if headerseen:
178                # It's a legal header line, save it.
179                lst.append(line)
180                self.dict[headerseen] = line[len(headerseen)+1:].strip()
181                continue
182            else:
183                # It's not a header line; throw it back and stop here.
184                if not self.dict:
185                    self.status = 'No headers'
186                else:
187                    self.status = 'Non-header line where header expected'
188                # Try to undo the read.
189                if unread:
190                    unread(line)
191                elif tell:
192                    self.fp.seek(startofline)
193                else:
194                    self.status = self.status + '; bad seek'
195                break
196
197    def isheader(self, line):
198        """Determine whether a given line is a legal header.
199
200        This method should return the header name, suitably canonicalized.
201        You may override this method in order to use Message parsing on tagged
202        data in RFC 2822-like formats with special header formats.
203        """
204        i = line.find(':')
205        if i > 0:
206            return line[:i].lower()
207        return None
208
209    def islast(self, line):
210        """Determine whether a line is a legal end of RFC 2822 headers.
211
212        You may override this method if your application wants to bend the
213        rules, e.g. to strip trailing whitespace, or to recognize MH template
214        separators ('--------').  For convenience (e.g. for code reading from
215        sockets) a line consisting of \\r\\n also matches.
216        """
217        return line in _blanklines
218
219    def iscomment(self, line):
220        """Determine whether a line should be skipped entirely.
221
222        You may override this method in order to use Message parsing on tagged
223        data in RFC 2822-like formats that support embedded comments or
224        free-text data.
225        """
226        return False
227
228    def getallmatchingheaders(self, name):
229        """Find all header lines matching a given header name.
230
231        Look through the list of headers and find all lines matching a given
232        header name (and their continuation lines).  A list of the lines is
233        returned, without interpretation.  If the header does not occur, an
234        empty list is returned.  If the header occurs multiple times, all
235        occurrences are returned.  Case is not important in the header name.
236        """
237        name = name.lower() + ':'
238        n = len(name)
239        lst = []
240        hit = 0
241        for line in self.headers:
242            if line[:n].lower() == name:
243                hit = 1
244            elif not line[:1].isspace():
245                hit = 0
246            if hit:
247                lst.append(line)
248        return lst
249
250    def getfirstmatchingheader(self, name):
251        """Get the first header line matching name.
252
253        This is similar to getallmatchingheaders, but it returns only the
254        first matching header (and its continuation lines).
255        """
256        name = name.lower() + ':'
257        n = len(name)
258        lst = []
259        hit = 0
260        for line in self.headers:
261            if hit:
262                if not line[:1].isspace():
263                    break
264            elif line[:n].lower() == name:
265                hit = 1
266            if hit:
267                lst.append(line)
268        return lst
269
270    def getrawheader(self, name):
271        """A higher-level interface to getfirstmatchingheader().
272
273        Return a string containing the literal text of the header but with the
274        keyword stripped.  All leading, trailing and embedded whitespace is
275        kept in the string, however.  Return None if the header does not
276        occur.
277        """
278
279        lst = self.getfirstmatchingheader(name)
280        if not lst:
281            return None
282        lst[0] = lst[0][len(name) + 1:]
283        return ''.join(lst)
284
285    def getheader(self, name, default=None):
286        """Get the header value for a name.
287
288        This is the normal interface: it returns a stripped version of the
289        header value for a given header name, or None if it doesn't exist.
290        This uses the dictionary version which finds the *last* such header.
291        """
292        return self.dict.get(name.lower(), default)
293    get = getheader
294
295    def getheaders(self, name):
296        """Get all values for a header.
297
298        This returns a list of values for headers given more than once; each
299        value in the result list is stripped in the same way as the result of
300        getheader().  If the header is not given, return an empty list.
301        """
302        result = []
303        current = ''
304        have_header = 0
305        for s in self.getallmatchingheaders(name):
306            if s[0].isspace():
307                if current:
308                    current = "%s\n %s" % (current, s.strip())
309                else:
310                    current = s.strip()
311            else:
312                if have_header:
313                    result.append(current)
314                current = s[s.find(":") + 1:].strip()
315                have_header = 1
316        if have_header:
317            result.append(current)
318        return result
319
320    def getaddr(self, name):
321        """Get a single address from a header, as a tuple.
322
323        An example return value:
324        ('Guido van Rossum', 'guido@cwi.nl')
325        """
326        # New, by Ben Escoto
327        alist = self.getaddrlist(name)
328        if alist:
329            return alist[0]
330        else:
331            return (None, None)
332
333    def getaddrlist(self, name):
334        """Get a list of addresses from a header.
335
336        Retrieves a list of addresses from a header, where each address is a
337        tuple as returned by getaddr().  Scans all named headers, so it works
338        properly with multiple To: or Cc: headers for example.
339        """
340        raw = []
341        for h in self.getallmatchingheaders(name):
342            if h[0] in ' \t':
343                raw.append(h)
344            else:
345                if raw:
346                    raw.append(', ')
347                i = h.find(':')
348                if i > 0:
349                    addr = h[i+1:]
350                raw.append(addr)
351        alladdrs = ''.join(raw)
352        a = AddressList(alladdrs)
353        return a.addresslist
354
355    def getdate(self, name):
356        """Retrieve a date field from a header.
357
358        Retrieves a date field from the named header, returning a tuple
359        compatible with time.mktime().
360        """
361        try:
362            data = self[name]
363        except KeyError:
364            return None
365        return parsedate(data)
366
367    def getdate_tz(self, name):
368        """Retrieve a date field from a header as a 10-tuple.
369
370        The first 9 elements make up a tuple compatible with time.mktime(),
371        and the 10th is the offset of the poster's time zone from GMT/UTC.
372        """
373        try:
374            data = self[name]
375        except KeyError:
376            return None
377        return parsedate_tz(data)
378
379
380    # Access as a dictionary (only finds *last* header of each type):
381
382    def __len__(self):
383        """Get the number of headers in a message."""
384        return len(self.dict)
385
386    def __getitem__(self, name):
387        """Get a specific header, as from a dictionary."""
388        return self.dict[name.lower()]
389
390    def __setitem__(self, name, value):
391        """Set the value of a header.
392
393        Note: This is not a perfect inversion of __getitem__, because any
394        changed headers get stuck at the end of the raw-headers list rather
395        than where the altered header was.
396        """
397        del self[name] # Won't fail if it doesn't exist
398        self.dict[name.lower()] = value
399        text = name + ": " + value
400        for line in text.split("\n"):
401            self.headers.append(line + "\n")
402
403    def __delitem__(self, name):
404        """Delete all occurrences of a specific header, if it is present."""
405        name = name.lower()
406        if not name in self.dict:
407            return
408        del self.dict[name]
409        name = name + ':'
410        n = len(name)
411        lst = []
412        hit = 0
413        for i in range(len(self.headers)):
414            line = self.headers[i]
415            if line[:n].lower() == name:
416                hit = 1
417            elif not line[:1].isspace():
418                hit = 0
419            if hit:
420                lst.append(i)
421        for i in reversed(lst):
422            del self.headers[i]
423
424    def setdefault(self, name, default=""):
425        lowername = name.lower()
426        if lowername in self.dict:
427            return self.dict[lowername]
428        else:
429            text = name + ": " + default
430            for line in text.split("\n"):
431                self.headers.append(line + "\n")
432            self.dict[lowername] = default
433            return default
434
435    def has_key(self, name):
436        """Determine whether a message contains the named header."""
437        return name.lower() in self.dict
438
439    def __contains__(self, name):
440        """Determine whether a message contains the named header."""
441        return name.lower() in self.dict
442
443    def __iter__(self):
444        return iter(self.dict)
445
446    def keys(self):
447        """Get all of a message's header field names."""
448        return self.dict.keys()
449
450    def values(self):
451        """Get all of a message's header field values."""
452        return self.dict.values()
453
454    def items(self):
455        """Get all of a message's headers.
456
457        Returns a list of name, value tuples.
458        """
459        return self.dict.items()
460
461    def __str__(self):
462        return ''.join(self.headers)
463
464
465# Utility functions
466# -----------------
467
468# XXX Should fix unquote() and quote() to be really conformant.
469# XXX The inverses of the parse functions may also be useful.
470
471
472def unquote(s):
473    """Remove quotes from a string."""
474    if len(s) > 1:
475        if s.startswith('"') and s.endswith('"'):
476            return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
477        if s.startswith('<') and s.endswith('>'):
478            return s[1:-1]
479    return s
480
481
482def quote(s):
483    """Add quotes around a string."""
484    return s.replace('\\', '\\\\').replace('"', '\\"')
485
486
487def parseaddr(address):
488    """Parse an address into a (realname, mailaddr) tuple."""
489    a = AddressList(address)
490    lst = a.addresslist
491    if not lst:
492        return (None, None)
493    return lst[0]
494
495
496class AddrlistClass:
497    """Address parser class by Ben Escoto.
498
499    To understand what this class does, it helps to have a copy of
500    RFC 2822 in front of you.
501
502    http://www.faqs.org/rfcs/rfc2822.html
503
504    Note: this class interface is deprecated and may be removed in the future.
505    Use rfc822.AddressList instead.
506    """
507
508    def __init__(self, field):
509        """Initialize a new instance.
510
511        `field' is an unparsed address header field, containing one or more
512        addresses.
513        """
514        self.specials = '()<>@,:;.\"[]'
515        self.pos = 0
516        self.LWS = ' \t'
517        self.CR = '\r\n'
518        self.atomends = self.specials + self.LWS + self.CR
519        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
520        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
521        # syntax, so allow dots in phrases.
522        self.phraseends = self.atomends.replace('.', '')
523        self.field = field
524        self.commentlist = []
525
526    def gotonext(self):
527        """Parse up to the start of the next address."""
528        while self.pos < len(self.field):
529            if self.field[self.pos] in self.LWS + '\n\r':
530                self.pos = self.pos + 1
531            elif self.field[self.pos] == '(':
532                self.commentlist.append(self.getcomment())
533            else: break
534
535    def getaddrlist(self):
536        """Parse all addresses.
537
538        Returns a list containing all of the addresses.
539        """
540        result = []
541        ad = self.getaddress()
542        while ad:
543            result += ad
544            ad = self.getaddress()
545        return result
546
547    def getaddress(self):
548        """Parse the next address."""
549        self.commentlist = []
550        self.gotonext()
551
552        oldpos = self.pos
553        oldcl = self.commentlist
554        plist = self.getphraselist()
555
556        self.gotonext()
557        returnlist = []
558
559        if self.pos >= len(self.field):
560            # Bad email address technically, no domain.
561            if plist:
562                returnlist = [(' '.join(self.commentlist), plist[0])]
563
564        elif self.field[self.pos] in '.@':
565            # email address is just an addrspec
566            # this isn't very efficient since we start over
567            self.pos = oldpos
568            self.commentlist = oldcl
569            addrspec = self.getaddrspec()
570            returnlist = [(' '.join(self.commentlist), addrspec)]
571
572        elif self.field[self.pos] == ':':
573            # address is a group
574            returnlist = []
575
576            fieldlen = len(self.field)
577            self.pos += 1
578            while self.pos < len(self.field):
579                self.gotonext()
580                if self.pos < fieldlen and self.field[self.pos] == ';':
581                    self.pos += 1
582                    break
583                returnlist = returnlist + self.getaddress()
584
585        elif self.field[self.pos] == '<':
586            # Address is a phrase then a route addr
587            routeaddr = self.getrouteaddr()
588
589            if self.commentlist:
590                returnlist = [(' '.join(plist) + ' (' + \
591                         ' '.join(self.commentlist) + ')', routeaddr)]
592            else: returnlist = [(' '.join(plist), routeaddr)]
593
594        else:
595            if plist:
596                returnlist = [(' '.join(self.commentlist), plist[0])]
597            elif self.field[self.pos] in self.specials:
598                self.pos += 1
599
600        self.gotonext()
601        if self.pos < len(self.field) and self.field[self.pos] == ',':
602            self.pos += 1
603        return returnlist
604
605    def getrouteaddr(self):
606        """Parse a route address (Return-path value).
607
608        This method just skips all the route stuff and returns the addrspec.
609        """
610        if self.field[self.pos] != '<':
611            return
612
613        expectroute = 0
614        self.pos += 1
615        self.gotonext()
616        adlist = ""
617        while self.pos < len(self.field):
618            if expectroute:
619                self.getdomain()
620                expectroute = 0
621            elif self.field[self.pos] == '>':
622                self.pos += 1
623                break
624            elif self.field[self.pos] == '@':
625                self.pos += 1
626                expectroute = 1
627            elif self.field[self.pos] == ':':
628                self.pos += 1
629            else:
630                adlist = self.getaddrspec()
631                self.pos += 1
632                break
633            self.gotonext()
634
635        return adlist
636
637    def getaddrspec(self):
638        """Parse an RFC 2822 addr-spec."""
639        aslist = []
640
641        self.gotonext()
642        while self.pos < len(self.field):
643            if self.field[self.pos] == '.':
644                aslist.append('.')
645                self.pos += 1
646            elif self.field[self.pos] == '"':
647                aslist.append('"%s"' % self.getquote())
648            elif self.field[self.pos] in self.atomends:
649                break
650            else: aslist.append(self.getatom())
651            self.gotonext()
652
653        if self.pos >= len(self.field) or self.field[self.pos] != '@':
654            return ''.join(aslist)
655
656        aslist.append('@')
657        self.pos += 1
658        self.gotonext()
659        return ''.join(aslist) + self.getdomain()
660
661    def getdomain(self):
662        """Get the complete domain name from an address."""
663        sdlist = []
664        while self.pos < len(self.field):
665            if self.field[self.pos] in self.LWS:
666                self.pos += 1
667            elif self.field[self.pos] == '(':
668                self.commentlist.append(self.getcomment())
669            elif self.field[self.pos] == '[':
670                sdlist.append(self.getdomainliteral())
671            elif self.field[self.pos] == '.':
672                self.pos += 1
673                sdlist.append('.')
674            elif self.field[self.pos] in self.atomends:
675                break
676            else: sdlist.append(self.getatom())
677        return ''.join(sdlist)
678
679    def getdelimited(self, beginchar, endchars, allowcomments = 1):
680        """Parse a header fragment delimited by special characters.
681
682        `beginchar' is the start character for the fragment.  If self is not
683        looking at an instance of `beginchar' then getdelimited returns the
684        empty string.
685
686        `endchars' is a sequence of allowable end-delimiting characters.
687        Parsing stops when one of these is encountered.
688
689        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
690        within the parsed fragment.
691        """
692        if self.field[self.pos] != beginchar:
693            return ''
694
695        slist = ['']
696        quote = 0
697        self.pos += 1
698        while self.pos < len(self.field):
699            if quote == 1:
700                slist.append(self.field[self.pos])
701                quote = 0
702            elif self.field[self.pos] in endchars:
703                self.pos += 1
704                break
705            elif allowcomments and self.field[self.pos] == '(':
706                slist.append(self.getcomment())
707                continue        # have already advanced pos from getcomment
708            elif self.field[self.pos] == '\\':
709                quote = 1
710            else:
711                slist.append(self.field[self.pos])
712            self.pos += 1
713
714        return ''.join(slist)
715
716    def getquote(self):
717        """Get a quote-delimited fragment from self's field."""
718        return self.getdelimited('"', '"\r', 0)
719
720    def getcomment(self):
721        """Get a parenthesis-delimited fragment from self's field."""
722        return self.getdelimited('(', ')\r', 1)
723
724    def getdomainliteral(self):
725        """Parse an RFC 2822 domain-literal."""
726        return '[%s]' % self.getdelimited('[', ']\r', 0)
727
728    def getatom(self, atomends=None):
729        """Parse an RFC 2822 atom.
730
731        Optional atomends specifies a different set of end token delimiters
732        (the default is to use self.atomends).  This is used e.g. in
733        getphraselist() since phrase endings must not include the `.' (which
734        is legal in phrases)."""
735        atomlist = ['']
736        if atomends is None:
737            atomends = self.atomends
738
739        while self.pos < len(self.field):
740            if self.field[self.pos] in atomends:
741                break
742            else: atomlist.append(self.field[self.pos])
743            self.pos += 1
744
745        return ''.join(atomlist)
746
747    def getphraselist(self):
748        """Parse a sequence of RFC 2822 phrases.
749
750        A phrase is a sequence of words, which are in turn either RFC 2822
751        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
752        runs of continuous whitespace into one space.
753        """
754        plist = []
755
756        while self.pos < len(self.field):
757            if self.field[self.pos] in self.LWS:
758                self.pos += 1
759            elif self.field[self.pos] == '"':
760                plist.append(self.getquote())
761            elif self.field[self.pos] == '(':
762                self.commentlist.append(self.getcomment())
763            elif self.field[self.pos] in self.phraseends:
764                break
765            else:
766                plist.append(self.getatom(self.phraseends))
767
768        return plist
769
770class AddressList(AddrlistClass):
771    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
772    def __init__(self, field):
773        AddrlistClass.__init__(self, field)
774        if field:
775            self.addresslist = self.getaddrlist()
776        else:
777            self.addresslist = []
778
779    def __len__(self):
780        return len(self.addresslist)
781
782    def __str__(self):
783        return ", ".join(map(dump_address_pair, self.addresslist))
784
785    def __add__(self, other):
786        # Set union
787        newaddr = AddressList(None)
788        newaddr.addresslist = self.addresslist[:]
789        for x in other.addresslist:
790            if not x in self.addresslist:
791                newaddr.addresslist.append(x)
792        return newaddr
793
794    def __iadd__(self, other):
795        # Set union, in-place
796        for x in other.addresslist:
797            if not x in self.addresslist:
798                self.addresslist.append(x)
799        return self
800
801    def __sub__(self, other):
802        # Set difference
803        newaddr = AddressList(None)
804        for x in self.addresslist:
805            if not x in other.addresslist:
806                newaddr.addresslist.append(x)
807        return newaddr
808
809    def __isub__(self, other):
810        # Set difference, in-place
811        for x in other.addresslist:
812            if x in self.addresslist:
813                self.addresslist.remove(x)
814        return self
815
816    def __getitem__(self, index):
817        # Make indexing, slices, and 'in' work
818        return self.addresslist[index]
819
820def dump_address_pair(pair):
821    """Dump a (name, address) pair in a canonicalized form."""
822    if pair[0]:
823        return '"' + pair[0] + '" <' + pair[1] + '>'
824    else:
825        return pair[1]
826
827# Parse a date field
828
829_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
830               'aug', 'sep', 'oct', 'nov', 'dec',
831               'january', 'february', 'march', 'april', 'may', 'june', 'july',
832               'august', 'september', 'october', 'november', 'december']
833_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
834
835# The timezone table does not include the military time zones defined
836# in RFC822, other than Z.  According to RFC1123, the description in
837# RFC822 gets the signs wrong, so we can't rely on any such time
838# zones.  RFC1123 recommends that numeric timezone indicators be used
839# instead of timezone names.
840
841_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
842              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
843              'EST': -500, 'EDT': -400,  # Eastern
844              'CST': -600, 'CDT': -500,  # Central
845              'MST': -700, 'MDT': -600,  # Mountain
846              'PST': -800, 'PDT': -700   # Pacific
847              }
848
849
850def parsedate_tz(data):
851    """Convert a date string to a time tuple.
852
853    Accounts for military timezones.
854    """
855    if not data:
856        return None
857    data = data.split()
858    if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
859        # There's a dayname here. Skip it
860        del data[0]
861    else:
862        # no space after the "weekday,"?
863        i = data[0].rfind(',')
864        if i >= 0:
865            data[0] = data[0][i+1:]
866    if len(data) == 3: # RFC 850 date, deprecated
867        stuff = data[0].split('-')
868        if len(stuff) == 3:
869            data = stuff + data[1:]
870    if len(data) == 4:
871        s = data[3]
872        i = s.find('+')
873        if i > 0:
874            data[3:] = [s[:i], s[i+1:]]
875        else:
876            data.append('') # Dummy tz
877    if len(data) < 5:
878        return None
879    data = data[:5]
880    [dd, mm, yy, tm, tz] = data
881    mm = mm.lower()
882    if not mm in _monthnames:
883        dd, mm = mm, dd.lower()
884        if not mm in _monthnames:
885            return None
886    mm = _monthnames.index(mm)+1
887    if mm > 12: mm = mm - 12
888    if dd[-1] == ',':
889        dd = dd[:-1]
890    i = yy.find(':')
891    if i > 0:
892        yy, tm = tm, yy
893    if yy[-1] == ',':
894        yy = yy[:-1]
895    if not yy[0].isdigit():
896        yy, tz = tz, yy
897    if tm[-1] == ',':
898        tm = tm[:-1]
899    tm = tm.split(':')
900    if len(tm) == 2:
901        [thh, tmm] = tm
902        tss = '0'
903    elif len(tm) == 3:
904        [thh, tmm, tss] = tm
905    else:
906        return None
907    try:
908        yy = int(yy)
909        dd = int(dd)
910        thh = int(thh)
911        tmm = int(tmm)
912        tss = int(tss)
913    except ValueError:
914        return None
915    tzoffset = None
916    tz = tz.upper()
917    if tz in _timezones:
918        tzoffset = _timezones[tz]
919    else:
920        try:
921            tzoffset = int(tz)
922        except ValueError:
923            pass
924    # Convert a timezone offset into seconds ; -0500 -> -18000
925    if tzoffset:
926        if tzoffset < 0:
927            tzsign = -1
928            tzoffset = -tzoffset
929        else:
930            tzsign = 1
931        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
932    return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
933
934
935def parsedate(data):
936    """Convert a time string to a time tuple."""
937    t = parsedate_tz(data)
938    if t is None:
939        return t
940    return t[:9]
941
942
943def mktime_tz(data):
944    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
945    if data[9] is None:
946        # No zone info, so localtime is better assumption than GMT
947        return time.mktime(data[:8] + (-1,))
948    else:
949        t = time.mktime(data[:8] + (0,))
950        return t - data[9] - time.timezone
951
952def formatdate(timeval=None):
953    """Returns time format preferred for Internet standards.
954
955    Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
956
957    According to RFC 1123, day and month names must always be in
958    English.  If not for that, this code could use strftime().  It
959    can't because strftime() honors the locale and could generated
960    non-English names.
961    """
962    if timeval is None:
963        timeval = time.time()
964    timeval = time.gmtime(timeval)
965    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
966            ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
967            timeval[2],
968            ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
969             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
970                                timeval[0], timeval[3], timeval[4], timeval[5])
971
972
973# When used as script, run a small test program.
974# The first command line argument must be a filename containing one
975# message in RFC-822 format.
976
977if __name__ == '__main__':
978    import sys, os
979    file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
980    if sys.argv[1:]: file = sys.argv[1]
981    f = open(file, 'r')
982    m = Message(f)
983    print 'From:', m.getaddr('from')
984    print 'To:', m.getaddrlist('to')
985    print 'Subject:', m.getheader('subject')
986    print 'Date:', m.getheader('date')
987    date = m.getdate_tz('date')
988    tz = date[-1]
989    date = time.localtime(mktime_tz(date))
990    if date:
991        print 'ParsedDate:', time.asctime(date),
992        hhmmss = tz
993        hhmm, ss = divmod(hhmmss, 60)
994        hh, mm = divmod(hhmm, 60)
995        print "%+03d%02d" % (hh, mm),
996        if ss: print ".%02d" % ss,
997        print
998    else:
999        print 'ParsedDate:', None
1000    m.rewindbody()
1001    n = 0
1002    while f.readline():
1003        n += 1
1004    print 'Lines:', n
1005    print '-'*70
1006    print 'len =', len(m)
1007    if 'Date' in m: print 'Date =', m['Date']
1008    if 'X-Nonsense' in m: pass
1009    print 'keys =', m.keys()
1010    print 'values =', m.values()
1011    print 'items =', m.items()
1012