_strptime.py revision 2bdb61479d235339dc92505d5eccaa6accf4d94c
1"""Strptime-related classes and functions.
2
3CLASSES:
4    LocaleTime -- Discovers and/or stores locale-specific time information
5    TimeRE -- Creates regexes for pattern matching string of text containing
6                time information as is returned by time.strftime()
7
8FUNCTIONS:
9    firstjulian -- Calculates the Julian date up to the first of the specified
10                    year
11    gregorian -- Calculates the Gregorian date based on the Julian day and
12                    year
13    julianday -- Calculates the Julian day since the first of the year based
14                    on the Gregorian date
15    dayofweek -- Calculates the day of the week from the Gregorian date.
16    strptime -- Calculates the time struct represented by the passed-in string
17
18Requires Python 2.2.1 or higher.
19Can be used in Python 2.2 if the following line is added:
20    >>> True = 1; False = 0
21
22"""
23import time
24import locale
25import calendar
26from re import compile as re_compile
27from re import IGNORECASE
28from string import whitespace as whitespace_string
29
30__version__ = (2,1,5)
31__author__ = "Brett Cannon"
32__email__ = "drifty@bigfoot.com"
33
34__all__ = ['strptime']
35
36class LocaleTime(object):
37    """Stores and handles locale-specific information related to time.
38
39    ATTRIBUTES (all read-only after instance creation! Instance variables that
40                store the values have mangled names):
41        f_weekday -- full weekday names (7-item list)
42        a_weekday -- abbreviated weekday names (7-item list)
43        f_month -- full weekday names (14-item list; dummy value in [0], which
44                    is added by code)
45        a_month -- abbreviated weekday names (13-item list, dummy value in
46                    [0], which is added by code)
47        am_pm -- AM/PM representation (2-item list)
48        LC_date_time -- format string for date/time representation (string)
49        LC_date -- format string for date representation (string)
50        LC_time -- format string for time representation (string)
51        timezone -- daylight- and non-daylight-savings timezone representation
52                    (3-item list; code tacks on blank item at end for
53                    possible lack of timezone such as UTC)
54        lang -- Language used by instance (string)
55
56    """
57
58    def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
59    a_month=None, am_pm=None, LC_date_time=None, LC_time=None, LC_date=None,
60    timezone=None, lang=None):
61        """Optionally set attributes with passed-in values."""
62        if f_weekday is None: self.__f_weekday = None
63        elif len(f_weekday) == 7: self.__f_weekday = list(f_weekday)
64        else:
65            raise TypeError("full weekday names must be a 7-item sequence")
66        if a_weekday is None: self.__a_weekday = None
67        elif len(a_weekday) == 7: self.__a_weekday = list(a_weekday)
68        else:
69            raise TypeError(
70                    "abbreviated weekday names must be a 7-item  sequence")
71        if f_month is None: self.__f_month = None
72        elif len(f_month) == 12:
73            self.__f_month = self.__pad(f_month, True)
74        else:
75            raise TypeError("full month names must be a 12-item sequence")
76        if a_month is None: self.__a_month = None
77        elif len(a_month) == 12:
78            self.__a_month = self.__pad(a_month, True)
79        else:
80            raise TypeError(
81                        "abbreviated month names must be a 12-item sequence")
82        if am_pm is None:
83            self.__am_pm = None
84        elif len(am_pm) == 2:
85            self.__am_pm = am_pm
86        else:
87            raise TypeError("AM/PM representation must be a 2-item sequence")
88        self.__LC_date_time = LC_date_time
89        self.__LC_time = LC_time
90        self.__LC_date = LC_date
91        self.__timezone = timezone
92        if timezone:
93            if len(timezone) != 2:
94                raise TypeError("timezone names must contain 2 items")
95            else:
96                self.__timezone = self.__pad(timezone, False)
97        self.__lang = lang
98
99    def __pad(self, seq, front):
100        """Add '' to seq to either front (is True), else the back."""
101        seq = list(seq)
102        if front: seq.insert(0, '')
103        else: seq.append('')
104        return seq
105
106    def __set_nothing(self, stuff):
107        """Raise TypeError when trying to set an attribute."""
108        raise TypeError("attribute does not support assignment")
109
110    def __get_f_weekday(self):
111        """Fetch self.f_weekday."""
112        if not self.__f_weekday: self.__calc_weekday()
113        return self.__f_weekday
114
115    def __get_a_weekday(self):
116        """Fetch self.a_weekday."""
117        if not self.__a_weekday: self.__calc_weekday()
118        return self.__a_weekday
119
120    f_weekday = property(__get_f_weekday, __set_nothing,
121                        doc="Full weekday names")
122    a_weekday = property(__get_a_weekday, __set_nothing,
123                        doc="Abbreviated weekday names")
124
125    def __get_f_month(self):
126        """Fetch self.f_month."""
127        if not self.__f_month: self.__calc_month()
128        return self.__f_month
129
130    def __get_a_month(self):
131        """Fetch self.a_month."""
132        if not self.__a_month: self.__calc_month()
133        return self.__a_month
134
135    f_month = property(__get_f_month, __set_nothing,
136                        doc="Full month names (dummy value at index 0)")
137    a_month = property(__get_a_month, __set_nothing,
138                        doc="Abbreviated month names (dummy value at index 0)")
139
140    def __get_am_pm(self):
141        """Fetch self.am_pm."""
142        if not self.__am_pm: self.__calc_am_pm()
143        return self.__am_pm
144
145    am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
146
147    def __get_timezone(self):
148        """Fetch self.timezone."""
149        if not self.__timezone: self.__calc_timezone()
150        return self.__timezone
151
152    timezone = property(__get_timezone, __set_nothing,
153                        doc="Timezone representation (dummy value at index 2)")
154
155    def __get_LC_date_time(self):
156        """Fetch self.LC_date_time."""
157        if not self.__LC_date_time: self.__calc_date_time()
158        return self.__LC_date_time
159
160    def __get_LC_date(self):
161        """Fetch self.LC_date."""
162        if not self.__LC_date: self.__calc_date_time()
163        return self.__LC_date
164
165    def __get_LC_time(self):
166        """Fetch self.LC_time."""
167        if not self.__LC_time: self.__calc_date_time()
168        return self.__LC_time
169
170    LC_date_time = property(__get_LC_date_time, __set_nothing,
171        doc="Format string for locale's date/time representation ('%c' format)")
172    LC_date = property(__get_LC_date, __set_nothing,
173        doc="Format string for locale's date representation ('%x' format)")
174    LC_time = property(__get_LC_time, __set_nothing,
175        doc="Format string for locale's time representation ('%X' format)")
176
177    def __get_lang(self):
178        """Fetch self.lang."""
179        if not self.__lang: self.__calc_lang()
180        return self.__lang
181
182    lang = property(__get_lang, __set_nothing, doc="Language used for instance")
183
184    def __calc_weekday(self):
185        """Set self.__a_weekday and self.__f_weekday using the calendar module."""
186        a_weekday = [calendar.day_abbr[i] for i in range(7)]
187        f_weekday = [calendar.day_name[i] for i in range(7)]
188        if not self.__a_weekday: self.__a_weekday = a_weekday
189        if not self.__f_weekday: self.__f_weekday = f_weekday
190
191    def __calc_month(self):
192        """Set self.__f_month and self.__a_month using the calendar module."""
193        a_month = [calendar.month_abbr[i] for i in range(13)]
194        f_month = [calendar.month_name[i] for i in range(13)]
195        if not self.__a_month: self.__a_month = a_month
196        if not self.__f_month: self.__f_month = f_month
197
198    def __calc_am_pm(self):
199        """Set self.__am_pm by using time.strftime().
200
201        The magic date (2002, 3, 17, hour, 44, 44, 2, 76, 0) is not really
202        that magical; just happened to have used it everywhere else where a
203        static date was needed.
204
205        """
206        am_pm = []
207        for hour in (01,22):
208            time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
209            am_pm.append(time.strftime("%p", time_tuple))
210        self.__am_pm = am_pm
211
212    def __calc_date_time(self):
213        """Set self.__date_time, self.__date, & self.__time by using time.strftime().
214
215        Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
216        overloaded numbers is minimized.  The order in which searches for
217        values within the format string is very important; it eliminates
218        possible ambiguity for what something represents.
219
220        """
221        time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
222        date_time = [None, None, None]
223        date_time[0] = time.strftime("%c", time_tuple)
224        date_time[1] = time.strftime("%x", time_tuple)
225        date_time[2] = time.strftime("%X", time_tuple)
226        for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
227            current_format = date_time[offset]
228            current_format = current_format.replace('%', '%%')
229            current_format = current_format.replace(self.f_weekday[2], '%A')
230            current_format = current_format.replace(self.f_month[3], '%B')
231            current_format = current_format.replace(self.a_weekday[2], '%a')
232            current_format = current_format.replace(self.a_month[3], '%b')
233            current_format = current_format.replace(self.am_pm[1], '%p')
234            current_format = current_format.replace(self.timezone[0], '%Z')
235            current_format = current_format.replace(self.timezone[1], '%Z')
236            current_format = current_format.replace('1999', '%Y')
237            current_format = current_format.replace('99', '%y')
238            current_format = current_format.replace('22', '%H')
239            current_format = current_format.replace('44', '%M')
240            current_format = current_format.replace('55', '%S')
241            current_format = current_format.replace('76', '%j')
242            current_format = current_format.replace('17', '%d')
243            current_format = current_format.replace('03', '%m')
244            current_format = current_format.replace('2', '%w')
245            current_format = current_format.replace('10', '%I')
246            time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
247            if time.strftime(directive, time_tuple).find('00'):
248                U_W = '%U'
249            else:
250                U_W = '%W'
251            date_time[offset] = current_format.replace('11', U_W)
252        if not self.__LC_date_time: self.__LC_date_time = date_time[0]
253        if not self.__LC_date: self.__LC_date = date_time[1]
254        if not self.__LC_time: self.__LC_time = date_time[2]
255
256    def __calc_timezone(self):
257        """Set self.__timezone by using time.tzname.
258
259        Empty string used for matching when timezone is not used/needed such
260        as with UTC.
261
262        """
263        self.__timezone = self.__pad(time.tzname, 0)
264
265    def __calc_lang(self):
266        """Set self.lang by using locale.getlocale() or
267        locale.getdefaultlocale().
268
269        """
270        current_lang = locale.getlocale(locale.LC_TIME)[0]
271        if current_lang: self.__lang = current_lang
272        else: self.__lang = locale.getdefaultlocale()[0]
273
274class TimeRE(dict):
275    """Handle conversion from format directives to regexes."""
276
277    def __init__(self, locale_time=LocaleTime()):
278        """Initialize instance with non-locale regexes and store LocaleTime object."""
279        super(TimeRE,self).__init__({
280            'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)",  #The " \d" option is
281                                                         #to make %c from ANSI
282                                                         #C work
283            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
284            'I': r"(?P<I>0\d|1[0-2]|\d)",
285            'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
286            'm': r"(?P<m>0\d|1[0-2]|\d)",
287            'M': r"(?P<M>[0-5]\d|\d)",
288            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
289            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
290            'w': r"(?P<w>[0-6])",
291            'W': r"(?P<W>5[0-3]|[0-4]\d|\d)",  #Same as U
292            'y': r"(?P<y>\d\d)",
293            'Y': r"(?P<Y>\d\d\d\d)"})
294        self.locale_time = locale_time
295
296    def __getitem__(self, fetch):
297        """Try to fetch regex; if it does not exist, construct it."""
298        try:
299            return super(TimeRE,self).__getitem__(fetch)
300        except KeyError:
301            if fetch == 'A':
302                self[fetch] = self.__seqToRE(self.locale_time.f_weekday,
303                                                fetch)
304            elif fetch == 'a':
305                self[fetch] = self.__seqToRE(self.locale_time.a_weekday,
306                                                fetch)
307            elif fetch == 'B':
308                self[fetch] = self.__seqToRE(self.locale_time.f_month[1:],
309                                                fetch)
310            elif fetch == 'b':
311                self[fetch] = self.__seqToRE(self.locale_time.a_month[1:],
312                                                fetch)
313            elif fetch == 'c':
314                self[fetch] = self.pattern(self.locale_time.LC_date_time)
315            elif fetch == 'p':
316                self[fetch] = self.__seqToRE(self.locale_time.am_pm, fetch)
317            elif fetch == 'x':
318                self[fetch] = self.pattern(self.locale_time.LC_date)
319            elif fetch == 'X':
320                self[fetch] = self.pattern(self.locale_time.LC_time)
321            elif fetch == 'Z':
322                self[fetch] = self.__seqToRE(self.locale_time.timezone,
323                                                fetch)
324            elif fetch == '%':
325                return '%'
326            return super(TimeRE,self).__getitem__(fetch)
327
328    def __seqToRE(self, to_convert, directive):
329        """Convert a list to a regex string for matching directive."""
330        def sorter(a, b):
331            """Sort based on length.
332
333            Done in case for some strange reason that names in the locale only
334            differ by a suffix and thus want the name with the suffix to match
335            first.
336
337            """
338            try: a_length = len(a)
339            except TypeError: a_length = 0
340            try: b_length = len(b)
341            except TypeError: b_length = 0
342            return cmp(b_length, a_length)
343
344        to_convert = to_convert[:]  #Don't want to change value in-place.
345        to_convert.sort(sorter)
346        regex = '(?P<%s>' % directive
347        for item in to_convert:
348            regex = "%s(?:%s)|" % (regex, item)
349        else:
350            regex = regex[:-1]
351        return '%s)' % regex
352
353    def pattern(self, format):
354        """Return re pattern for the format string."""
355        processed_format = ''
356        for whitespace in whitespace_string:
357            format = format.replace(whitespace, r'\s*')
358        while format.find('%') != -1:
359            directive_index = format.index('%')+1
360            processed_format = "%s%s%s" % (processed_format,
361                                format[:directive_index-1],
362                                self[format[directive_index]])
363            format = format[directive_index+1:]
364        return "%s%s" % (processed_format, format)
365
366    def compile(self, format):
367        """Return a compiled re object for the format string."""
368        format = "(?#%s)%s" % (self.locale_time.lang,format)
369        return re_compile(self.pattern(format), IGNORECASE)
370
371
372def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
373    """Convert data_string to a time struct based on the format string or re object; will return an re object for format if data_string is False.
374
375    The object passed in for format may either be a re object compiled by
376    strptime() or a format string.  If False is passed in for data_string
377    then an re object for format will be returned.  The re object
378    must be used with the same language as used to compile the re object.
379
380    """
381    locale_time = LocaleTime()
382    if isinstance(format, type(re_compile(''))):
383        if format.pattern.find(locale_time.lang) == -1:
384            raise TypeError("re object not created with same language as \
385            LocaleTime instance")
386        else:
387            compiled_re = format
388    else:
389        compiled_re = TimeRE(locale_time).compile(format)
390    if data_string is False:
391        return compiled_re
392    else:
393        found = compiled_re.match(data_string)
394        if not found:
395            raise ValueError("time data did not match format")
396        year = month = day = hour = minute = second = weekday = julian = tz = -1
397        found_dict = found.groupdict()
398        for group_key in found_dict.iterkeys():
399            if group_key in 'yY':
400                if group_key is 'y':
401                    year = int("%s%s" % (time.strftime("%Y")[:-2], found_dict['y']))
402                else:
403                    year = int(found_dict['Y'])
404            elif group_key in 'Bbm':
405                if group_key is 'm':
406                    month = int(found_dict['m'])
407                elif group_key is 'B':
408                    month = locale_time.f_month.index(found_dict['B'])
409                else:
410                    month = locale_time.a_month.index(found_dict['b'])
411            elif group_key is 'd':
412                day = int(found_dict['d'])
413            elif group_key in 'HI':
414                if group_key is 'H':
415                    hour = int(found_dict['H'])
416                else:
417                    hour = int(found_dict['I'])
418                    ampm = found_dict.get('p')
419                    if ampm == locale_time.am_pm[0]:
420                        # We're in AM so the hour is correct unless we're
421                        # looking at 12 midnight.
422                        # 12 midnight == 12 AM == hour 0
423                        if hour == 12:
424                            hour = 0
425                    elif ampm == locale_time.am_pm[1]:
426                        # We're in PM so we need to add 12 to the hour unless
427                        # we're looking at 12 noon.
428                        # 12 noon == 12 PM == hour 12
429                        if hour != 12:
430                            hour += 12
431            elif group_key is 'M':
432                minute = int(found_dict['M'])
433            elif group_key is 'S':
434                second = int(found_dict['S'])
435            elif group_key in 'Aaw':
436                if group_key is 'A':
437                    weekday = locale_time.f_weekday.index(found_dict['A'])
438                elif group_key is 'a':
439                    weekday = locale_time.a_weekday.index(found_dict['a'])
440                else:
441                    weekday = int(found_dict['w'])
442                    if weekday == 0:
443                        weekday = 6
444                    else:
445                        weekday -= 1
446            elif group_key is 'j':
447                julian = int(found_dict['j'])
448            elif group_key is 'Z':
449                if locale_time.timezone[0] == found_dict['Z']:
450                    tz = 0
451                elif locale_time.timezone[1] == found_dict['Z']:
452                    tz = 1
453                elif locale_time.timezone[2] == found_dict['Z']:
454                    tz = 0
455        if julian == -1 and year != -1 and month != -1 and day != -1:
456            julian = julianday(year, month, day)
457        if (month == -1 or day == -1) and julian != -1 and year != -1:
458            year,month,day = gregorian(julian, year)
459        if weekday == -1 and year != -1 and month != -1 and day != -1:
460            weekday = dayofweek(year, month, day)
461        return time.struct_time((year,month,day,hour,minute,second,weekday,
462                                julian,tz))
463
464def firstjulian(year):
465    """Calculate the Julian date up until the first of the year."""
466    return ((146097*(year+4799))//400)-31738
467
468def julianday(year, month, day):
469    """Calculate the Julian day since the beginning of the year from the Gregorian date."""
470    a = (14-month)//12
471    return (day-32045+(((153*(month+(12*a)-3))+2)//5)+\
472    ((146097*(year+4800-a))//400))-firstjulian(year)+1
473
474def gregorian(julian, year):
475    """Return a 3-item list containing the Gregorian date based on the Julian day."""
476    a = 32043+julian+firstjulian(year)
477    b = ((4*a)+3)//146097
478    c = a-((146097*b)//4)
479    d = ((4*c)+3)//1461
480    e = c-((1461*d)//4)
481    m = ((5*e)+2)//153
482    day = 1+e-(((153*m)+2)//5)
483    month = m+3-(12*(m//10))
484    year = (100*b)+d-4800+(m//10)
485    return [year, month, day]
486
487def dayofweek(year, month, day):
488    """Calculate the day of the week (Monday is 0)."""
489    a = (14-month)//12
490    y = year-a
491    weekday = (day+y+((97*y)//400)+((31*(month+(12*a)-2))//12))%7
492    if weekday == 0:
493        return 6
494    else:
495        return weekday-1
496