_strptime.py revision 08e54270f2dae5014f04d627739f71ecce5ad19e
1"""Strptime-related classes and functions. 2 3CLASSES: 4 LocaleTime -- Discovers and/or stores locale-specific time information 5 TimeRE -- Creates regexes for pattern matching a string of text containing 6 time information as is returned by time.strftime() 7 8FUNCTIONS: 9 firstjulian -- Calculates the Julian date up to the first of the specified 10 year 11 gregorian -- Calculates the Gregorian date based on the Julian day and 12 year 13 julianday -- Calculates the Julian day since the first of the year based 14 on the Gregorian date 15 dayofweek -- Calculates the day of the week from the Gregorian date. 16 strptime -- Calculates the time struct represented by the passed-in string 17 18Requires Python 2.2.1 or higher. 19Can be used in Python 2.2 if the following line is added: 20 >>> True = 1; False = 0 21""" 22import time 23import locale 24import calendar 25from re import compile as re_compile 26from re import IGNORECASE 27from string import whitespace as whitespace_string 28 29__author__ = "Brett Cannon" 30__email__ = "drifty@bigfoot.com" 31 32__all__ = ['strptime'] 33 34RegexpType = type(re_compile('')) 35 36 37class LocaleTime(object): 38 """Stores and handles locale-specific information related to time. 39 40 ATTRIBUTES (all read-only after instance creation! Instance variables that 41 store the values have mangled names): 42 f_weekday -- full weekday names (7-item list) 43 a_weekday -- abbreviated weekday names (7-item list) 44 f_month -- full weekday names (14-item list; dummy value in [0], which 45 is added by code) 46 a_month -- abbreviated weekday names (13-item list, dummy value in 47 [0], which is added by code) 48 am_pm -- AM/PM representation (2-item list) 49 LC_date_time -- format string for date/time representation (string) 50 LC_date -- format string for date representation (string) 51 LC_time -- format string for time representation (string) 52 timezone -- daylight- and non-daylight-savings timezone representation 53 (3-item list; code tacks on blank item at end for 54 possible lack of timezone such as UTC) 55 lang -- Language used by instance (string) 56 """ 57 58 def __init__(self, f_weekday=None, a_weekday=None, f_month=None, 59 a_month=None, am_pm=None, LC_date_time=None, LC_time=None, 60 LC_date=None, timezone=None, lang=None): 61 """Optionally set attributes with passed-in values.""" 62 if f_weekday is None: 63 self.__f_weekday = None 64 elif len(f_weekday) == 7: 65 self.__f_weekday = list(f_weekday) 66 else: 67 raise TypeError("full weekday names must be a 7-item sequence") 68 if a_weekday is None: 69 self.__a_weekday = None 70 elif len(a_weekday) == 7: 71 self.__a_weekday = list(a_weekday) 72 else: 73 raise TypeError( 74 "abbreviated weekday names must be a 7-item sequence") 75 if f_month is None: 76 self.__f_month = None 77 elif len(f_month) == 12: 78 self.__f_month = self.__pad(f_month, True) 79 else: 80 raise TypeError("full month names must be a 12-item sequence") 81 if a_month is None: 82 self.__a_month = None 83 elif len(a_month) == 12: 84 self.__a_month = self.__pad(a_month, True) 85 else: 86 raise TypeError( 87 "abbreviated month names must be a 12-item sequence") 88 if am_pm is None: 89 self.__am_pm = None 90 elif len(am_pm) == 2: 91 self.__am_pm = am_pm 92 else: 93 raise TypeError("AM/PM representation must be a 2-item sequence") 94 self.__LC_date_time = LC_date_time 95 self.__LC_time = LC_time 96 self.__LC_date = LC_date 97 self.__timezone = timezone 98 if timezone: 99 if len(timezone) != 2: 100 raise TypeError("timezone names must contain 2 items") 101 else: 102 self.__timezone = self.__pad(timezone, False) 103 self.__lang = lang 104 105 def __pad(self, seq, front): 106 # Add '' to seq to either front (is True), else the back. 107 seq = list(seq) 108 if front: 109 seq.insert(0, '') 110 else: 111 seq.append('') 112 return seq 113 114 def __set_nothing(self, stuff): 115 # Raise TypeError when trying to set an attribute. 116 raise TypeError("attribute does not support assignment") 117 118 def __get_f_weekday(self): 119 # Fetch self.f_weekday. 120 if not self.__f_weekday: 121 self.__calc_weekday() 122 return self.__f_weekday 123 124 def __get_a_weekday(self): 125 # Fetch self.a_weekday. 126 if not self.__a_weekday: 127 self.__calc_weekday() 128 return self.__a_weekday 129 130 f_weekday = property(__get_f_weekday, __set_nothing, 131 doc="Full weekday names") 132 a_weekday = property(__get_a_weekday, __set_nothing, 133 doc="Abbreviated weekday names") 134 135 def __get_f_month(self): 136 # Fetch self.f_month. 137 if not self.__f_month: 138 self.__calc_month() 139 return self.__f_month 140 141 def __get_a_month(self): 142 # Fetch self.a_month. 143 if not self.__a_month: 144 self.__calc_month() 145 return self.__a_month 146 147 f_month = property(__get_f_month, __set_nothing, 148 doc="Full month names (dummy value at index 0)") 149 a_month = property(__get_a_month, __set_nothing, 150 doc="Abbreviated month names (dummy value at index 0)") 151 152 def __get_am_pm(self): 153 # Fetch self.am_pm. 154 if not self.__am_pm: 155 self.__calc_am_pm() 156 return self.__am_pm 157 158 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation") 159 160 def __get_timezone(self): 161 # Fetch self.timezone. 162 if not self.__timezone: 163 self.__calc_timezone() 164 return self.__timezone 165 166 timezone = property(__get_timezone, __set_nothing, 167 doc="Timezone representation (dummy value at index 2)") 168 169 def __get_LC_date_time(self): 170 # Fetch self.LC_date_time. 171 if not self.__LC_date_time: 172 self.__calc_date_time() 173 return self.__LC_date_time 174 175 def __get_LC_date(self): 176 # Fetch self.LC_date. 177 if not self.__LC_date: 178 self.__calc_date_time() 179 return self.__LC_date 180 181 def __get_LC_time(self): 182 # Fetch self.LC_time. 183 if not self.__LC_time: 184 self.__calc_date_time() 185 return self.__LC_time 186 187 LC_date_time = property( 188 __get_LC_date_time, __set_nothing, 189 doc= 190 "Format string for locale's date/time representation ('%c' format)") 191 LC_date = property(__get_LC_date, __set_nothing, 192 doc="Format string for locale's date representation ('%x' format)") 193 LC_time = property(__get_LC_time, __set_nothing, 194 doc="Format string for locale's time representation ('%X' format)") 195 196 def __get_lang(self): 197 # Fetch self.lang. 198 if not self.__lang: 199 self.__calc_lang() 200 return self.__lang 201 202 lang = property(__get_lang, __set_nothing, 203 doc="Language used for instance") 204 205 def __calc_weekday(self): 206 # Set self.__a_weekday and self.__f_weekday using the calendar 207 # module. 208 a_weekday = [calendar.day_abbr[i] for i in range(7)] 209 f_weekday = [calendar.day_name[i] for i in range(7)] 210 if not self.__a_weekday: 211 self.__a_weekday = a_weekday 212 if not self.__f_weekday: 213 self.__f_weekday = f_weekday 214 215 def __calc_month(self): 216 # Set self.__f_month and self.__a_month using the calendar module. 217 a_month = [calendar.month_abbr[i] for i in range(13)] 218 f_month = [calendar.month_name[i] for i in range(13)] 219 if not self.__a_month: 220 self.__a_month = a_month 221 if not self.__f_month: 222 self.__f_month = f_month 223 224 def __calc_am_pm(self): 225 # Set self.__am_pm by using time.strftime(). 226 227 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that 228 # magical; just happened to have used it everywhere else where a 229 # static date was needed. 230 am_pm = [] 231 for hour in (01,22): 232 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) 233 am_pm.append(time.strftime("%p", time_tuple)) 234 self.__am_pm = am_pm 235 236 def __calc_date_time(self): 237 # Set self.__date_time, self.__date, & self.__time by using 238 # time.strftime(). 239 240 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of 241 # overloaded numbers is minimized. The order in which searches for 242 # values within the format string is very important; it eliminates 243 # possible ambiguity for what something represents. 244 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) 245 date_time = [None, None, None] 246 date_time[0] = time.strftime("%c", time_tuple) 247 date_time[1] = time.strftime("%x", time_tuple) 248 date_time[2] = time.strftime("%X", time_tuple) 249 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): 250 current_format = date_time[offset] 251 for old, new in ( 252 ('%', '%%'), (self.f_weekday[2], '%A'), 253 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), 254 (self.a_month[3], '%b'), (self.am_pm[1], '%p'), 255 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'), 256 ('1999', '%Y'), ('99', '%y'), ('22', '%H'), 257 ('44', '%M'), ('55', '%S'), ('76', '%j'), 258 ('17', '%d'), ('03', '%m'), ('3', '%m'), 259 # '3' needed for when no leading zero. 260 ('2', '%w'), ('10', '%I')): 261 # Must deal with possible lack of locale info 262 # manifesting itself as the empty string (e.g., Swedish's 263 # lack of AM/PM info) or a platform returning a tuple of empty 264 # strings (e.g., MacOS 9 having timezone as ('','')). 265 if old: 266 current_format = current_format.replace(old, new) 267 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) 268 if time.strftime(directive, time_tuple).find('00'): 269 U_W = '%U' 270 else: 271 U_W = '%W' 272 date_time[offset] = current_format.replace('11', U_W) 273 if not self.__LC_date_time: 274 self.__LC_date_time = date_time[0] 275 if not self.__LC_date: 276 self.__LC_date = date_time[1] 277 if not self.__LC_time: 278 self.__LC_time = date_time[2] 279 280 def __calc_timezone(self): 281 # Set self.__timezone by using time.tzname. 282 # 283 # Empty string used for matching when timezone is not used/needed such 284 # as with UTC. 285 self.__timezone = self.__pad(time.tzname, 0) 286 287 def __calc_lang(self): 288 # Set self.__lang by using locale.getlocale() or 289 # locale.getdefaultlocale(). If both turn up empty, set the attribute 290 # to ''. This is to stop calls to this method and to make sure 291 # strptime() can produce an re object correctly. 292 current_lang = locale.getlocale(locale.LC_TIME)[0] 293 if current_lang: 294 self.__lang = current_lang 295 else: 296 current_lang = locale.getdefaultlocale()[0] 297 if current_lang: 298 self.__lang = current_lang 299 else: 300 self.__lang = '' 301 302 303class TimeRE(dict): 304 """Handle conversion from format directives to regexes.""" 305 306 def __init__(self, locale_time=LocaleTime()): 307 """Init inst with non-locale regexes and store LocaleTime object.""" 308 #XXX: Does 'Y' need to worry about having less or more than 4 digits? 309 base = super(TimeRE, self) 310 base.__init__({ 311 # The " \d" option is to make %c from ANSI C work 312 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 313 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", 314 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", 315 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 316 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", 317 'M': r"(?P<M>[0-5]\d|\d)", 318 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", 319 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", 320 'w': r"(?P<w>[0-6])", 321 # W is set below by using 'U' 322 'y': r"(?P<y>\d\d)", 323 'Y': r"(?P<Y>\d\d\d\d)"}) 324 base.__setitem__('W', base.__getitem__('U')) 325 self.locale_time = locale_time 326 327 def __getitem__(self, fetch): 328 """Try to fetch regex; if it does not exist, construct it.""" 329 try: 330 return super(TimeRE, self).__getitem__(fetch) 331 except KeyError: 332 constructors = { 333 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch), 334 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch), 335 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:], 336 fetch), 337 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:], 338 fetch), 339 'c': lambda: self.pattern(self.locale_time.LC_date_time), 340 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch), 341 'x': lambda: self.pattern(self.locale_time.LC_date), 342 'X': lambda: self.pattern(self.locale_time.LC_time), 343 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch), 344 '%': lambda: '%', 345 } 346 if fetch in constructors: 347 self[fetch] = constructors[fetch]() 348 return self[fetch] 349 else: 350 raise 351 352 def __seqToRE(self, to_convert, directive): 353 """Convert a list to a regex string for matching a directive.""" 354 def sorter(a, b): 355 """Sort based on length. 356 357 Done in case for some strange reason that names in the locale only 358 differ by a suffix and thus want the name with the suffix to match 359 first. 360 """ 361 try: 362 a_length = len(a) 363 except TypeError: 364 a_length = 0 365 try: 366 b_length = len(b) 367 except TypeError: 368 b_length = 0 369 return cmp(b_length, a_length) 370 371 to_convert = to_convert[:] # Don't want to change value in-place. 372 for value in to_convert: 373 if value != '': 374 break 375 else: 376 return '' 377 to_convert.sort(sorter) 378 regex = '|'.join(to_convert) 379 regex = '(?P<%s>%s' % (directive, regex) 380 return '%s)' % regex 381 382 def pattern(self, format): 383 """Return re pattern for the format string.""" 384 processed_format = '' 385 for whitespace in whitespace_string: 386 format = format.replace(whitespace, r'\s*') 387 while format.find('%') != -1: 388 directive_index = format.index('%')+1 389 processed_format = "%s%s%s" % (processed_format, 390 format[:directive_index-1], 391 self[format[directive_index]]) 392 format = format[directive_index+1:] 393 return "%s%s" % (processed_format, format) 394 395 def compile(self, format): 396 """Return a compiled re object for the format string.""" 397 format = "(?#%s)%s" % (self.locale_time.lang,format) 398 return re_compile(self.pattern(format), IGNORECASE) 399 400 401def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): 402 """Return a time struct based on the input data and the format string.""" 403 locale_time = LocaleTime() 404 compiled_re = TimeRE(locale_time).compile(format) 405 found = compiled_re.match(data_string) 406 if not found: 407 raise ValueError("time data did not match format") 408 year = 1900 409 month = day = 1 410 hour = minute = second = 0 411 tz = -1 412 # Defaulted to -1 so as to signal using functions to calc values 413 weekday = julian = -1 414 found_dict = found.groupdict() 415 for group_key in found_dict.iterkeys(): 416 if group_key == 'y': 417 year = int(found_dict['y']) 418 # Open Group specification for strptime() states that a %y 419 #value in the range of [00, 68] is in the century 2000, while 420 #[69,99] is in the century 1900 421 if year <= 68: 422 year += 2000 423 else: 424 year += 1900 425 elif group_key == 'Y': 426 year = int(found_dict['Y']) 427 elif group_key == 'm': 428 month = int(found_dict['m']) 429 elif group_key == 'B': 430 month = _insensitiveindex(locale_time.f_month, found_dict['B']) 431 elif group_key == 'b': 432 month = _insensitiveindex(locale_time.a_month, found_dict['b']) 433 elif group_key == 'd': 434 day = int(found_dict['d']) 435 elif group_key is 'H': 436 hour = int(found_dict['H']) 437 elif group_key == 'I': 438 hour = int(found_dict['I']) 439 ampm = found_dict.get('p', '').lower() 440 # If there was no AM/PM indicator, we'll treat this like AM 441 if ampm in ('', locale_time.am_pm[0].lower()): 442 # We're in AM so the hour is correct unless we're 443 # looking at 12 midnight. 444 # 12 midnight == 12 AM == hour 0 445 if hour == 12: 446 hour = 0 447 elif ampm == locale_time.am_pm[1].lower(): 448 # We're in PM so we need to add 12 to the hour unless 449 # we're looking at 12 noon. 450 # 12 noon == 12 PM == hour 12 451 if hour != 12: 452 hour += 12 453 elif group_key == 'M': 454 minute = int(found_dict['M']) 455 elif group_key == 'S': 456 second = int(found_dict['S']) 457 elif group_key == 'A': 458 weekday = _insensitiveindex(locale_time.f_weekday, 459 found_dict['A']) 460 elif group_key == 'a': 461 weekday = _insensitiveindex(locale_time.a_weekday, 462 found_dict['a']) 463 elif group_key == 'w': 464 weekday = int(found_dict['w']) 465 if weekday == 0: 466 weekday = 6 467 else: 468 weekday -= 1 469 elif group_key == 'j': 470 julian = int(found_dict['j']) 471 elif group_key == 'Z': 472 found_zone = found_dict['Z'].lower() 473 if locale_time.timezone[0] == locale_time.timezone[1]: 474 pass #Deals with bad locale setup where timezone info is 475 # the same; first found on FreeBSD 4.4. 476 elif locale_time.timezone[0].lower() == found_zone: 477 tz = 0 478 elif locale_time.timezone[1].lower() == found_zone: 479 tz = 1 480 elif locale_time.timezone[2].lower() == found_zone: 481 tz = -1 482 #XXX <bc>: If calculating fxns are never exposed to the general 483 #populous then just inline calculations. Also might be able to use 484 #``datetime`` and the methods it provides. 485 if julian == -1: 486 julian = julianday(year, month, day) 487 else: # Assuming that if they bothered to include Julian day it will 488 #be accurate 489 year, month, day = gregorian(julian, year) 490 if weekday == -1: 491 weekday = dayofweek(year, month, day) 492 return time.struct_time((year, month, day, 493 hour, minute, second, 494 weekday, julian, tz)) 495 496def _insensitiveindex(lst, findme): 497 # Perform a case-insensitive index search. 498 499 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and 500 # just lowercase when LocaleTime sets its vars and lowercasing 501 # search values. 502 findme = findme.lower() 503 for key,item in enumerate(lst): 504 if item.lower() == findme: 505 return key 506 else: 507 raise ValueError("value not in list") 508 509def firstjulian(year): 510 """Calculate the Julian date up until the first of the year.""" 511 return ((146097 * (year + 4799)) // 400) - 31738 512 513def julianday(year, month, day): 514 """Calculate the Julian day since the beginning of the year. 515 Calculated from the Gregorian date. 516 """ 517 a = (14 - month) // 12 518 return (day - 32045 519 + (((153 * (month + (12 * a) - 3)) + 2) // 5) 520 + ((146097 * (year + 4800 - a)) // 400)) - firstjulian(year) + 1 521 522def gregorian(julian, year): 523 """Return 3-item list containing Gregorian date based on the Julian day.""" 524 a = 32043 + julian + firstjulian(year) 525 b = ((4 * a) + 3) // 146097 526 c = a - ((146097 * b) // 4) 527 d = ((4 * c) + 3) // 1461 528 e = c - ((1461 * d) // 4) 529 m = ((5 * e) + 2) // 153 530 day = 1 + e - (((153 * m) + 2) // 5) 531 month = m + 3 - (12 * (m // 10)) 532 year = (100 * b) + d - 4800 + (m // 10) 533 return [year, month, day] 534 535def dayofweek(year, month, day): 536 """Calculate the day of the week (Monday is 0).""" 537 a = (14 - month) // 12 538 y = year - a 539 weekday = (day + y + ((97 * y) // 400) 540 + ((31 * (month + (12 * a) -2 )) // 12)) % 7 541 if weekday == 0: 542 return 6 543 else: 544 return weekday-1 545