_strptime.py revision 4d895fa1258205e01073f0f9c2aba9a57843ae21
1"""Strptime-related classes and functions.
2
3CLASSES:
4    LocaleTime -- Discovers and/or stores locale-specific time information
5    TimeRE -- Creates regexes for pattern matching a string of text containing
6                time information as is returned by time.strftime()
7
8FUNCTIONS:
9    firstjulian -- Calculates the Julian date up to the first of the specified
10                    year
11    gregorian -- Calculates the Gregorian date based on the Julian day and
12                    year
13    julianday -- Calculates the Julian day since the first of the year based
14                    on the Gregorian date
15    dayofweek -- Calculates the day of the week from the Gregorian date.
16    strptime -- Calculates the time struct represented by the passed-in string
17
18Requires Python 2.2.1 or higher.
19Can be used in Python 2.2 if the following line is added:
20    >>> True = 1; False = 0
21"""
22import time
23import locale
24import calendar
25from re import compile as re_compile
26from re import IGNORECASE
27from string import whitespace as whitespace_string
28
29__version__ = (2,1,6)
30__author__ = "Brett Cannon"
31__email__ = "drifty@bigfoot.com"
32
33__all__ = ['strptime']
34
35RegexpType = type(re_compile(''))
36
37
38class LocaleTime(object):
39    """Stores and handles locale-specific information related to time.
40
41    ATTRIBUTES (all read-only after instance creation! Instance variables that
42                store the values have mangled names):
43        f_weekday -- full weekday names (7-item list)
44        a_weekday -- abbreviated weekday names (7-item list)
45        f_month -- full weekday names (14-item list; dummy value in [0], which
46                    is added by code)
47        a_month -- abbreviated weekday names (13-item list, dummy value in
48                    [0], which is added by code)
49        am_pm -- AM/PM representation (2-item list)
50        LC_date_time -- format string for date/time representation (string)
51        LC_date -- format string for date representation (string)
52        LC_time -- format string for time representation (string)
53        timezone -- daylight- and non-daylight-savings timezone representation
54                    (3-item list; code tacks on blank item at end for
55                    possible lack of timezone such as UTC)
56        lang -- Language used by instance (string)
57    """
58
59    def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
60                 a_month=None, am_pm=None, LC_date_time=None, LC_time=None,
61                 LC_date=None, timezone=None, lang=None):
62        """Optionally set attributes with passed-in values."""
63        if f_weekday is None:
64            self.__f_weekday = None
65        elif len(f_weekday) == 7:
66            self.__f_weekday = list(f_weekday)
67        else:
68            raise TypeError("full weekday names must be a 7-item sequence")
69        if a_weekday is None:
70            self.__a_weekday = None
71        elif len(a_weekday) == 7:
72            self.__a_weekday = list(a_weekday)
73        else:
74            raise TypeError(
75                "abbreviated weekday names must be a 7-item  sequence")
76        if f_month is None:
77            self.__f_month = None
78        elif len(f_month) == 12:
79            self.__f_month = self.__pad(f_month, True)
80        else:
81            raise TypeError("full month names must be a 12-item sequence")
82        if a_month is None:
83            self.__a_month = None
84        elif len(a_month) == 12:
85            self.__a_month = self.__pad(a_month, True)
86        else:
87            raise TypeError(
88                "abbreviated month names must be a 12-item sequence")
89        if am_pm is None:
90            self.__am_pm = None
91        elif len(am_pm) == 2:
92            self.__am_pm = am_pm
93        else:
94            raise TypeError("AM/PM representation must be a 2-item sequence")
95        self.__LC_date_time = LC_date_time
96        self.__LC_time = LC_time
97        self.__LC_date = LC_date
98        self.__timezone = timezone
99        if timezone:
100            if len(timezone) != 2:
101                raise TypeError("timezone names must contain 2 items")
102            else:
103                self.__timezone = self.__pad(timezone, False)
104        self.__lang = lang
105
106    def __pad(self, seq, front):
107        # Add '' to seq to either front (is True), else the back.
108        seq = list(seq)
109        if front:
110            seq.insert(0, '')
111        else:
112            seq.append('')
113        return seq
114
115    def __set_nothing(self, stuff):
116        # Raise TypeError when trying to set an attribute.
117        raise TypeError("attribute does not support assignment")
118
119    def __get_f_weekday(self):
120        # Fetch self.f_weekday.
121        if not self.__f_weekday:
122            self.__calc_weekday()
123        return self.__f_weekday
124
125    def __get_a_weekday(self):
126        # Fetch self.a_weekday.
127        if not self.__a_weekday:
128            self.__calc_weekday()
129        return self.__a_weekday
130
131    f_weekday = property(__get_f_weekday, __set_nothing,
132                         doc="Full weekday names")
133    a_weekday = property(__get_a_weekday, __set_nothing,
134                         doc="Abbreviated weekday names")
135
136    def __get_f_month(self):
137        # Fetch self.f_month.
138        if not self.__f_month:
139            self.__calc_month()
140        return self.__f_month
141
142    def __get_a_month(self):
143        # Fetch self.a_month.
144        if not self.__a_month:
145            self.__calc_month()
146        return self.__a_month
147
148    f_month = property(__get_f_month, __set_nothing,
149                       doc="Full month names (dummy value at index 0)")
150    a_month = property(__get_a_month, __set_nothing,
151                       doc="Abbreviated month names (dummy value at index 0)")
152
153    def __get_am_pm(self):
154        # Fetch self.am_pm.
155        if not self.__am_pm:
156            self.__calc_am_pm()
157        return self.__am_pm
158
159    am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
160
161    def __get_timezone(self):
162        # Fetch self.timezone.
163        if not self.__timezone:
164            self.__calc_timezone()
165        return self.__timezone
166
167    timezone = property(__get_timezone, __set_nothing,
168                        doc="Timezone representation (dummy value at index 2)")
169
170    def __get_LC_date_time(self):
171        # Fetch self.LC_date_time.
172        if not self.__LC_date_time:
173            self.__calc_date_time()
174        return self.__LC_date_time
175
176    def __get_LC_date(self):
177        # Fetch self.LC_date.
178        if not self.__LC_date:
179            self.__calc_date_time()
180        return self.__LC_date
181
182    def __get_LC_time(self):
183        # Fetch self.LC_time.
184        if not self.__LC_time:
185            self.__calc_date_time()
186        return self.__LC_time
187
188    LC_date_time = property(
189        __get_LC_date_time, __set_nothing,
190        doc=
191        "Format string for locale's date/time representation ('%c' format)")
192    LC_date = property(__get_LC_date, __set_nothing,
193        doc="Format string for locale's date representation ('%x' format)")
194    LC_time = property(__get_LC_time, __set_nothing,
195        doc="Format string for locale's time representation ('%X' format)")
196
197    def __get_lang(self):
198        # Fetch self.lang.
199        if not self.__lang:
200            self.__calc_lang()
201        return self.__lang
202
203    lang = property(__get_lang, __set_nothing,
204                    doc="Language used for instance")
205
206    def __calc_weekday(self):
207        # Set self.__a_weekday and self.__f_weekday using the calendar
208        # module.
209        a_weekday = [calendar.day_abbr[i] for i in range(7)]
210        f_weekday = [calendar.day_name[i] for i in range(7)]
211        if not self.__a_weekday:
212            self.__a_weekday = a_weekday
213        if not self.__f_weekday:
214            self.__f_weekday = f_weekday
215
216    def __calc_month(self):
217        # Set self.__f_month and self.__a_month using the calendar module.
218        a_month = [calendar.month_abbr[i] for i in range(13)]
219        f_month = [calendar.month_name[i] for i in range(13)]
220        if not self.__a_month:
221            self.__a_month = a_month
222        if not self.__f_month:
223            self.__f_month = f_month
224
225    def __calc_am_pm(self):
226        # Set self.__am_pm by using time.strftime().
227
228        # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
229        # magical; just happened to have used it everywhere else where a
230        # static date was needed.
231        am_pm = []
232        for hour in (01,22):
233            time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
234            am_pm.append(time.strftime("%p", time_tuple))
235        self.__am_pm = am_pm
236
237    def __calc_date_time(self):
238        # Set self.__date_time, self.__date, & self.__time by using
239        # time.strftime().
240
241        # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
242        # overloaded numbers is minimized.  The order in which searches for
243        # values within the format string is very important; it eliminates
244        # possible ambiguity for what something represents.
245        time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
246        date_time = [None, None, None]
247        date_time[0] = time.strftime("%c", time_tuple)
248        date_time[1] = time.strftime("%x", time_tuple)
249        date_time[2] = time.strftime("%X", time_tuple)
250        for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
251            current_format = date_time[offset]
252            for old, new in (
253                    ('%', '%%'), (self.f_weekday[2], '%A'),
254                    (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
255                    (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
256                    (self.timezone[0], '%Z'), (self.timezone[1], '%Z'),
257                    ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
258                    ('44', '%M'), ('55', '%S'), ('76', '%j'),
259                    ('17', '%d'), ('03', '%m'), ('3', '%m'),
260                    # '3' needed for when no leading zero.
261                    ('2', '%w'), ('10', '%I')):
262                try:
263                    # Done this way to deal with possible lack of locale info
264                    # manifesting itself as the empty string (i.e., Swedish's
265                    # lack of AM/PM info).
266                    current_format = current_format.replace(old, new)
267                except ValueError:
268                    pass
269            time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
270            if time.strftime(directive, time_tuple).find('00'):
271                U_W = '%U'
272            else:
273                U_W = '%W'
274            date_time[offset] = current_format.replace('11', U_W)
275        if not self.__LC_date_time:
276            self.__LC_date_time = date_time[0]
277        if not self.__LC_date:
278            self.__LC_date = date_time[1]
279        if not self.__LC_time:
280            self.__LC_time = date_time[2]
281
282    def __calc_timezone(self):
283        # Set self.__timezone by using time.tzname.
284        #
285        # Empty string used for matching when timezone is not used/needed such
286        # as with UTC.
287        self.__timezone = self.__pad(time.tzname, 0)
288
289    def __calc_lang(self):
290        # Set self.lang by using locale.getlocale() or
291        # locale.getdefaultlocale().
292        current_lang = locale.getlocale(locale.LC_TIME)[0]
293        if current_lang:
294            self.__lang = current_lang
295        else:
296            self.__lang = locale.getdefaultlocale()[0]
297
298
299class TimeRE(dict):
300    """Handle conversion from format directives to regexes."""
301
302    def __init__(self, locale_time=LocaleTime()):
303        """Init inst with non-locale regexes and store LocaleTime object."""
304        super(TimeRE,self).__init__({
305            # The " \d" option is to make %c from ANSI C work
306            'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)",
307            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
308            'I': r"(?P<I>0\d|1[0-2]|\d)",
309            'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
310            'm': r"(?P<m>0\d|1[0-2]|\d)",
311            'M': r"(?P<M>[0-5]\d|\d)",
312            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
313            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
314            'w': r"(?P<w>[0-6])",
315            'W': r"(?P<W>5[0-3]|[0-4]\d|\d)",  # Same as U
316            'y': r"(?P<y>\d\d)",
317            'Y': r"(?P<Y>\d\d\d\d)"})
318        self.locale_time = locale_time
319
320    def __getitem__(self, fetch):
321        """Try to fetch regex; if it does not exist, construct it."""
322        try:
323            return super(TimeRE, self).__getitem__(fetch)
324        except KeyError:
325            constructors = {
326                'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch),
327                'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch),
328                'B': lambda: self.__seqToRE(self.locale_time.f_month[1:],
329                                            fetch),
330                'b': lambda: self.__seqToRE(self.locale_time.a_month[1:],
331                                            fetch),
332                'c': lambda: self.pattern(self.locale_time.LC_date_time),
333                'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch),
334                'x': lambda: self.pattern(self.locale_time.LC_date),
335                'X': lambda: self.pattern(self.locale_time.LC_time),
336                'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch),
337                '%': lambda: '%',
338                }
339            if fetch in constructors:
340                self[fetch] = constructors[fetch]()
341                return self[fetch]
342            else:
343                raise
344
345    def __seqToRE(self, to_convert, directive):
346        """Convert a list to a regex string for matching directive."""
347        def sorter(a, b):
348            """Sort based on length.
349
350            Done in case for some strange reason that names in the locale only
351            differ by a suffix and thus want the name with the suffix to match
352            first.
353            """
354            try:
355                a_length = len(a)
356            except TypeError:
357                a_length = 0
358            try:
359                b_length = len(b)
360            except TypeError:
361                b_length = 0
362            return cmp(b_length, a_length)
363
364        to_convert = to_convert[:]  # Don't want to change value in-place.
365        to_convert.sort(sorter)
366        regex = '|'.join(to_convert)
367        regex = '(?P<%s>%s' % (directive, regex)
368        return '%s)' % regex
369
370    def pattern(self, format):
371        """Return re pattern for the format string."""
372        processed_format = ''
373        for whitespace in whitespace_string:
374            format = format.replace(whitespace, r'\s*')
375        while format.find('%') != -1:
376            directive_index = format.index('%')+1
377            processed_format = "%s%s%s" % (processed_format,
378                                           format[:directive_index-1],
379                                           self[format[directive_index]])
380            format = format[directive_index+1:]
381        return "%s%s" % (processed_format, format)
382
383    def compile(self, format):
384        """Return a compiled re object for the format string."""
385        format = "(?#%s)%s" % (self.locale_time.lang,format)
386        return re_compile(self.pattern(format), IGNORECASE)
387
388
389def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
390    """Return a time struct based on the input data and the format string.
391
392    The format argument may either be a regular expression object compiled by
393    strptime(), or a format string.  If False is passed in for data_string
394    then the re object calculated for format will be returned.  The re object
395    must be used with the same locale as was used to compile the re object.
396    """
397    locale_time = LocaleTime()
398    if isinstance(format, RegexpType):
399        if format.pattern.find(locale_time.lang) == -1:
400            raise TypeError("re object not created with same language as "
401                            "LocaleTime instance")
402        else:
403            compiled_re = format
404    else:
405        compiled_re = TimeRE(locale_time).compile(format)
406    if data_string is False:
407        return compiled_re
408    else:
409        found = compiled_re.match(data_string)
410        if not found:
411            raise ValueError("time data did not match format")
412        year = month = day = hour = minute = second = weekday = julian = tz =-1
413        found_dict = found.groupdict()
414        for group_key in found_dict.iterkeys():
415            if group_key == 'y':
416                year = int("%s%s" %
417                           (time.strftime("%Y")[:-2], found_dict['y']))
418            elif group_key == 'Y':
419                year = int(found_dict['Y'])
420            elif group_key == 'm':
421                month = int(found_dict['m'])
422            elif group_key == 'B':
423                month = _insensitiveindex(locale_time.f_month, found_dict['B'])
424            elif group_key == 'b':
425                month = _insensitiveindex(locale_time.a_month, found_dict['b'])
426            elif group_key == 'd':
427                day = int(found_dict['d'])
428            elif group_key is 'H':
429                hour = int(found_dict['H'])
430            elif group_key == 'I':
431                hour = int(found_dict['I'])
432                ampm = found_dict.get('p', '').lower()
433                # If there was no AM/PM indicator, we'll treat this like AM
434                if ampm in ('', locale_time.am_pm[0].lower()):
435                    # We're in AM so the hour is correct unless we're
436                    # looking at 12 midnight.
437                    # 12 midnight == 12 AM == hour 0
438                    if hour == 12:
439                        hour = 0
440                elif ampm == locale_time.am_pm[1].lower():
441                    # We're in PM so we need to add 12 to the hour unless
442                    # we're looking at 12 noon.
443                    # 12 noon == 12 PM == hour 12
444                    if hour != 12:
445                        hour += 12
446            elif group_key == 'M':
447                minute = int(found_dict['M'])
448            elif group_key == 'S':
449                second = int(found_dict['S'])
450            elif group_key == 'A':
451                weekday = _insensitiveindex(locale_time.f_weekday,
452                                            found_dict['A'])
453            elif group_key == 'a':
454                weekday = _insensitiveindex(locale_time.a_weekday,
455                                            found_dict['a'])
456            elif group_key == 'w':
457                weekday = int(found_dict['w'])
458                if weekday == 0:
459                    weekday = 6
460                else:
461                    weekday -= 1
462            elif group_key == 'j':
463                julian = int(found_dict['j'])
464            elif group_key == 'Z':
465                found_zone = found_dict['Z'].lower()
466                if locale_time.timezone[0].lower() == found_zone:
467                    tz = 0
468                elif locale_time.timezone[1].lower() == found_zone:
469                    tz = 1
470                elif locale_time.timezone[2].lower() == found_zone:
471                    tz = 0
472        #XXX <bc>: If calculating fxns are never exposed to the general
473        #          populous then just inline calculations.
474        if julian == -1 and year != -1 and month != -1 and day != -1:
475            julian = julianday(year, month, day)
476        if (month == -1 or day == -1) and julian != -1 and year != -1:
477            year, month, day = gregorian(julian, year)
478        if weekday == -1 and year != -1 and month != -1 and day != -1:
479            weekday = dayofweek(year, month, day)
480        return time.struct_time(
481            (year,month,day,hour,minute,second,weekday, julian,tz))
482
483def _insensitiveindex(lst, findme):
484    # Perform a case-insensitive index search.
485
486    #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
487    #          just lowercase when LocaleTime sets its vars and lowercasing
488    #          search values.
489    findme = findme.lower()
490    for key,item in enumerate(lst):
491        if item.lower() == findme:
492            return key
493    else:
494        raise ValueError("value not in list")
495
496def firstjulian(year):
497    """Calculate the Julian date up until the first of the year."""
498    return ((146097 * (year + 4799)) // 400) - 31738
499
500def julianday(year, month, day):
501    """Calculate the Julian day since the beginning of the year.
502    Calculated from the Gregorian date.
503    """
504    a = (14 - month) // 12
505    return (day - 32045
506            + (((153 * (month + (12 * a) - 3)) + 2) // 5)
507            + ((146097 * (year + 4800 - a)) // 400)) - firstjulian(year) + 1
508
509def gregorian(julian, year):
510    """Return 3-item list containing Gregorian date based on the Julian day."""
511    a = 32043 + julian + firstjulian(year)
512    b = ((4 * a) + 3) // 146097
513    c = a - ((146097 * b) // 4)
514    d = ((4 * c) + 3) // 1461
515    e = c - ((1461 * d) // 4)
516    m = ((5 * e) + 2) // 153
517    day = 1 + e - (((153 * m) + 2) // 5)
518    month = m + 3 - (12 * (m // 10))
519    year = (100 * b) + d - 4800 + (m // 10)
520    return [year, month, day]
521
522def dayofweek(year, month, day):
523    """Calculate the day of the week (Monday is 0)."""
524    a = (14 - month) // 12
525    y = year - a
526    weekday = (day + y + ((97 * y) // 400)
527               + ((31 * (month + (12 * a) -2 )) // 12)) % 7
528    if weekday == 0:
529        return 6
530    else:
531        return weekday-1
532