_strptime.py revision 4d895fa1258205e01073f0f9c2aba9a57843ae21
1"""Strptime-related classes and functions. 2 3CLASSES: 4 LocaleTime -- Discovers and/or stores locale-specific time information 5 TimeRE -- Creates regexes for pattern matching a string of text containing 6 time information as is returned by time.strftime() 7 8FUNCTIONS: 9 firstjulian -- Calculates the Julian date up to the first of the specified 10 year 11 gregorian -- Calculates the Gregorian date based on the Julian day and 12 year 13 julianday -- Calculates the Julian day since the first of the year based 14 on the Gregorian date 15 dayofweek -- Calculates the day of the week from the Gregorian date. 16 strptime -- Calculates the time struct represented by the passed-in string 17 18Requires Python 2.2.1 or higher. 19Can be used in Python 2.2 if the following line is added: 20 >>> True = 1; False = 0 21""" 22import time 23import locale 24import calendar 25from re import compile as re_compile 26from re import IGNORECASE 27from string import whitespace as whitespace_string 28 29__version__ = (2,1,6) 30__author__ = "Brett Cannon" 31__email__ = "drifty@bigfoot.com" 32 33__all__ = ['strptime'] 34 35RegexpType = type(re_compile('')) 36 37 38class LocaleTime(object): 39 """Stores and handles locale-specific information related to time. 40 41 ATTRIBUTES (all read-only after instance creation! Instance variables that 42 store the values have mangled names): 43 f_weekday -- full weekday names (7-item list) 44 a_weekday -- abbreviated weekday names (7-item list) 45 f_month -- full weekday names (14-item list; dummy value in [0], which 46 is added by code) 47 a_month -- abbreviated weekday names (13-item list, dummy value in 48 [0], which is added by code) 49 am_pm -- AM/PM representation (2-item list) 50 LC_date_time -- format string for date/time representation (string) 51 LC_date -- format string for date representation (string) 52 LC_time -- format string for time representation (string) 53 timezone -- daylight- and non-daylight-savings timezone representation 54 (3-item list; code tacks on blank item at end for 55 possible lack of timezone such as UTC) 56 lang -- Language used by instance (string) 57 """ 58 59 def __init__(self, f_weekday=None, a_weekday=None, f_month=None, 60 a_month=None, am_pm=None, LC_date_time=None, LC_time=None, 61 LC_date=None, timezone=None, lang=None): 62 """Optionally set attributes with passed-in values.""" 63 if f_weekday is None: 64 self.__f_weekday = None 65 elif len(f_weekday) == 7: 66 self.__f_weekday = list(f_weekday) 67 else: 68 raise TypeError("full weekday names must be a 7-item sequence") 69 if a_weekday is None: 70 self.__a_weekday = None 71 elif len(a_weekday) == 7: 72 self.__a_weekday = list(a_weekday) 73 else: 74 raise TypeError( 75 "abbreviated weekday names must be a 7-item sequence") 76 if f_month is None: 77 self.__f_month = None 78 elif len(f_month) == 12: 79 self.__f_month = self.__pad(f_month, True) 80 else: 81 raise TypeError("full month names must be a 12-item sequence") 82 if a_month is None: 83 self.__a_month = None 84 elif len(a_month) == 12: 85 self.__a_month = self.__pad(a_month, True) 86 else: 87 raise TypeError( 88 "abbreviated month names must be a 12-item sequence") 89 if am_pm is None: 90 self.__am_pm = None 91 elif len(am_pm) == 2: 92 self.__am_pm = am_pm 93 else: 94 raise TypeError("AM/PM representation must be a 2-item sequence") 95 self.__LC_date_time = LC_date_time 96 self.__LC_time = LC_time 97 self.__LC_date = LC_date 98 self.__timezone = timezone 99 if timezone: 100 if len(timezone) != 2: 101 raise TypeError("timezone names must contain 2 items") 102 else: 103 self.__timezone = self.__pad(timezone, False) 104 self.__lang = lang 105 106 def __pad(self, seq, front): 107 # Add '' to seq to either front (is True), else the back. 108 seq = list(seq) 109 if front: 110 seq.insert(0, '') 111 else: 112 seq.append('') 113 return seq 114 115 def __set_nothing(self, stuff): 116 # Raise TypeError when trying to set an attribute. 117 raise TypeError("attribute does not support assignment") 118 119 def __get_f_weekday(self): 120 # Fetch self.f_weekday. 121 if not self.__f_weekday: 122 self.__calc_weekday() 123 return self.__f_weekday 124 125 def __get_a_weekday(self): 126 # Fetch self.a_weekday. 127 if not self.__a_weekday: 128 self.__calc_weekday() 129 return self.__a_weekday 130 131 f_weekday = property(__get_f_weekday, __set_nothing, 132 doc="Full weekday names") 133 a_weekday = property(__get_a_weekday, __set_nothing, 134 doc="Abbreviated weekday names") 135 136 def __get_f_month(self): 137 # Fetch self.f_month. 138 if not self.__f_month: 139 self.__calc_month() 140 return self.__f_month 141 142 def __get_a_month(self): 143 # Fetch self.a_month. 144 if not self.__a_month: 145 self.__calc_month() 146 return self.__a_month 147 148 f_month = property(__get_f_month, __set_nothing, 149 doc="Full month names (dummy value at index 0)") 150 a_month = property(__get_a_month, __set_nothing, 151 doc="Abbreviated month names (dummy value at index 0)") 152 153 def __get_am_pm(self): 154 # Fetch self.am_pm. 155 if not self.__am_pm: 156 self.__calc_am_pm() 157 return self.__am_pm 158 159 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation") 160 161 def __get_timezone(self): 162 # Fetch self.timezone. 163 if not self.__timezone: 164 self.__calc_timezone() 165 return self.__timezone 166 167 timezone = property(__get_timezone, __set_nothing, 168 doc="Timezone representation (dummy value at index 2)") 169 170 def __get_LC_date_time(self): 171 # Fetch self.LC_date_time. 172 if not self.__LC_date_time: 173 self.__calc_date_time() 174 return self.__LC_date_time 175 176 def __get_LC_date(self): 177 # Fetch self.LC_date. 178 if not self.__LC_date: 179 self.__calc_date_time() 180 return self.__LC_date 181 182 def __get_LC_time(self): 183 # Fetch self.LC_time. 184 if not self.__LC_time: 185 self.__calc_date_time() 186 return self.__LC_time 187 188 LC_date_time = property( 189 __get_LC_date_time, __set_nothing, 190 doc= 191 "Format string for locale's date/time representation ('%c' format)") 192 LC_date = property(__get_LC_date, __set_nothing, 193 doc="Format string for locale's date representation ('%x' format)") 194 LC_time = property(__get_LC_time, __set_nothing, 195 doc="Format string for locale's time representation ('%X' format)") 196 197 def __get_lang(self): 198 # Fetch self.lang. 199 if not self.__lang: 200 self.__calc_lang() 201 return self.__lang 202 203 lang = property(__get_lang, __set_nothing, 204 doc="Language used for instance") 205 206 def __calc_weekday(self): 207 # Set self.__a_weekday and self.__f_weekday using the calendar 208 # module. 209 a_weekday = [calendar.day_abbr[i] for i in range(7)] 210 f_weekday = [calendar.day_name[i] for i in range(7)] 211 if not self.__a_weekday: 212 self.__a_weekday = a_weekday 213 if not self.__f_weekday: 214 self.__f_weekday = f_weekday 215 216 def __calc_month(self): 217 # Set self.__f_month and self.__a_month using the calendar module. 218 a_month = [calendar.month_abbr[i] for i in range(13)] 219 f_month = [calendar.month_name[i] for i in range(13)] 220 if not self.__a_month: 221 self.__a_month = a_month 222 if not self.__f_month: 223 self.__f_month = f_month 224 225 def __calc_am_pm(self): 226 # Set self.__am_pm by using time.strftime(). 227 228 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that 229 # magical; just happened to have used it everywhere else where a 230 # static date was needed. 231 am_pm = [] 232 for hour in (01,22): 233 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) 234 am_pm.append(time.strftime("%p", time_tuple)) 235 self.__am_pm = am_pm 236 237 def __calc_date_time(self): 238 # Set self.__date_time, self.__date, & self.__time by using 239 # time.strftime(). 240 241 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of 242 # overloaded numbers is minimized. The order in which searches for 243 # values within the format string is very important; it eliminates 244 # possible ambiguity for what something represents. 245 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) 246 date_time = [None, None, None] 247 date_time[0] = time.strftime("%c", time_tuple) 248 date_time[1] = time.strftime("%x", time_tuple) 249 date_time[2] = time.strftime("%X", time_tuple) 250 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): 251 current_format = date_time[offset] 252 for old, new in ( 253 ('%', '%%'), (self.f_weekday[2], '%A'), 254 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), 255 (self.a_month[3], '%b'), (self.am_pm[1], '%p'), 256 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'), 257 ('1999', '%Y'), ('99', '%y'), ('22', '%H'), 258 ('44', '%M'), ('55', '%S'), ('76', '%j'), 259 ('17', '%d'), ('03', '%m'), ('3', '%m'), 260 # '3' needed for when no leading zero. 261 ('2', '%w'), ('10', '%I')): 262 try: 263 # Done this way to deal with possible lack of locale info 264 # manifesting itself as the empty string (i.e., Swedish's 265 # lack of AM/PM info). 266 current_format = current_format.replace(old, new) 267 except ValueError: 268 pass 269 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) 270 if time.strftime(directive, time_tuple).find('00'): 271 U_W = '%U' 272 else: 273 U_W = '%W' 274 date_time[offset] = current_format.replace('11', U_W) 275 if not self.__LC_date_time: 276 self.__LC_date_time = date_time[0] 277 if not self.__LC_date: 278 self.__LC_date = date_time[1] 279 if not self.__LC_time: 280 self.__LC_time = date_time[2] 281 282 def __calc_timezone(self): 283 # Set self.__timezone by using time.tzname. 284 # 285 # Empty string used for matching when timezone is not used/needed such 286 # as with UTC. 287 self.__timezone = self.__pad(time.tzname, 0) 288 289 def __calc_lang(self): 290 # Set self.lang by using locale.getlocale() or 291 # locale.getdefaultlocale(). 292 current_lang = locale.getlocale(locale.LC_TIME)[0] 293 if current_lang: 294 self.__lang = current_lang 295 else: 296 self.__lang = locale.getdefaultlocale()[0] 297 298 299class TimeRE(dict): 300 """Handle conversion from format directives to regexes.""" 301 302 def __init__(self, locale_time=LocaleTime()): 303 """Init inst with non-locale regexes and store LocaleTime object.""" 304 super(TimeRE,self).__init__({ 305 # The " \d" option is to make %c from ANSI C work 306 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)", 307 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", 308 'I': r"(?P<I>0\d|1[0-2]|\d)", 309 'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)", 310 'm': r"(?P<m>0\d|1[0-2]|\d)", 311 'M': r"(?P<M>[0-5]\d|\d)", 312 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", 313 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", 314 'w': r"(?P<w>[0-6])", 315 'W': r"(?P<W>5[0-3]|[0-4]\d|\d)", # Same as U 316 'y': r"(?P<y>\d\d)", 317 'Y': r"(?P<Y>\d\d\d\d)"}) 318 self.locale_time = locale_time 319 320 def __getitem__(self, fetch): 321 """Try to fetch regex; if it does not exist, construct it.""" 322 try: 323 return super(TimeRE, self).__getitem__(fetch) 324 except KeyError: 325 constructors = { 326 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch), 327 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch), 328 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:], 329 fetch), 330 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:], 331 fetch), 332 'c': lambda: self.pattern(self.locale_time.LC_date_time), 333 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch), 334 'x': lambda: self.pattern(self.locale_time.LC_date), 335 'X': lambda: self.pattern(self.locale_time.LC_time), 336 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch), 337 '%': lambda: '%', 338 } 339 if fetch in constructors: 340 self[fetch] = constructors[fetch]() 341 return self[fetch] 342 else: 343 raise 344 345 def __seqToRE(self, to_convert, directive): 346 """Convert a list to a regex string for matching directive.""" 347 def sorter(a, b): 348 """Sort based on length. 349 350 Done in case for some strange reason that names in the locale only 351 differ by a suffix and thus want the name with the suffix to match 352 first. 353 """ 354 try: 355 a_length = len(a) 356 except TypeError: 357 a_length = 0 358 try: 359 b_length = len(b) 360 except TypeError: 361 b_length = 0 362 return cmp(b_length, a_length) 363 364 to_convert = to_convert[:] # Don't want to change value in-place. 365 to_convert.sort(sorter) 366 regex = '|'.join(to_convert) 367 regex = '(?P<%s>%s' % (directive, regex) 368 return '%s)' % regex 369 370 def pattern(self, format): 371 """Return re pattern for the format string.""" 372 processed_format = '' 373 for whitespace in whitespace_string: 374 format = format.replace(whitespace, r'\s*') 375 while format.find('%') != -1: 376 directive_index = format.index('%')+1 377 processed_format = "%s%s%s" % (processed_format, 378 format[:directive_index-1], 379 self[format[directive_index]]) 380 format = format[directive_index+1:] 381 return "%s%s" % (processed_format, format) 382 383 def compile(self, format): 384 """Return a compiled re object for the format string.""" 385 format = "(?#%s)%s" % (self.locale_time.lang,format) 386 return re_compile(self.pattern(format), IGNORECASE) 387 388 389def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): 390 """Return a time struct based on the input data and the format string. 391 392 The format argument may either be a regular expression object compiled by 393 strptime(), or a format string. If False is passed in for data_string 394 then the re object calculated for format will be returned. The re object 395 must be used with the same locale as was used to compile the re object. 396 """ 397 locale_time = LocaleTime() 398 if isinstance(format, RegexpType): 399 if format.pattern.find(locale_time.lang) == -1: 400 raise TypeError("re object not created with same language as " 401 "LocaleTime instance") 402 else: 403 compiled_re = format 404 else: 405 compiled_re = TimeRE(locale_time).compile(format) 406 if data_string is False: 407 return compiled_re 408 else: 409 found = compiled_re.match(data_string) 410 if not found: 411 raise ValueError("time data did not match format") 412 year = month = day = hour = minute = second = weekday = julian = tz =-1 413 found_dict = found.groupdict() 414 for group_key in found_dict.iterkeys(): 415 if group_key == 'y': 416 year = int("%s%s" % 417 (time.strftime("%Y")[:-2], found_dict['y'])) 418 elif group_key == 'Y': 419 year = int(found_dict['Y']) 420 elif group_key == 'm': 421 month = int(found_dict['m']) 422 elif group_key == 'B': 423 month = _insensitiveindex(locale_time.f_month, found_dict['B']) 424 elif group_key == 'b': 425 month = _insensitiveindex(locale_time.a_month, found_dict['b']) 426 elif group_key == 'd': 427 day = int(found_dict['d']) 428 elif group_key is 'H': 429 hour = int(found_dict['H']) 430 elif group_key == 'I': 431 hour = int(found_dict['I']) 432 ampm = found_dict.get('p', '').lower() 433 # If there was no AM/PM indicator, we'll treat this like AM 434 if ampm in ('', locale_time.am_pm[0].lower()): 435 # We're in AM so the hour is correct unless we're 436 # looking at 12 midnight. 437 # 12 midnight == 12 AM == hour 0 438 if hour == 12: 439 hour = 0 440 elif ampm == locale_time.am_pm[1].lower(): 441 # We're in PM so we need to add 12 to the hour unless 442 # we're looking at 12 noon. 443 # 12 noon == 12 PM == hour 12 444 if hour != 12: 445 hour += 12 446 elif group_key == 'M': 447 minute = int(found_dict['M']) 448 elif group_key == 'S': 449 second = int(found_dict['S']) 450 elif group_key == 'A': 451 weekday = _insensitiveindex(locale_time.f_weekday, 452 found_dict['A']) 453 elif group_key == 'a': 454 weekday = _insensitiveindex(locale_time.a_weekday, 455 found_dict['a']) 456 elif group_key == 'w': 457 weekday = int(found_dict['w']) 458 if weekday == 0: 459 weekday = 6 460 else: 461 weekday -= 1 462 elif group_key == 'j': 463 julian = int(found_dict['j']) 464 elif group_key == 'Z': 465 found_zone = found_dict['Z'].lower() 466 if locale_time.timezone[0].lower() == found_zone: 467 tz = 0 468 elif locale_time.timezone[1].lower() == found_zone: 469 tz = 1 470 elif locale_time.timezone[2].lower() == found_zone: 471 tz = 0 472 #XXX <bc>: If calculating fxns are never exposed to the general 473 # populous then just inline calculations. 474 if julian == -1 and year != -1 and month != -1 and day != -1: 475 julian = julianday(year, month, day) 476 if (month == -1 or day == -1) and julian != -1 and year != -1: 477 year, month, day = gregorian(julian, year) 478 if weekday == -1 and year != -1 and month != -1 and day != -1: 479 weekday = dayofweek(year, month, day) 480 return time.struct_time( 481 (year,month,day,hour,minute,second,weekday, julian,tz)) 482 483def _insensitiveindex(lst, findme): 484 # Perform a case-insensitive index search. 485 486 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and 487 # just lowercase when LocaleTime sets its vars and lowercasing 488 # search values. 489 findme = findme.lower() 490 for key,item in enumerate(lst): 491 if item.lower() == findme: 492 return key 493 else: 494 raise ValueError("value not in list") 495 496def firstjulian(year): 497 """Calculate the Julian date up until the first of the year.""" 498 return ((146097 * (year + 4799)) // 400) - 31738 499 500def julianday(year, month, day): 501 """Calculate the Julian day since the beginning of the year. 502 Calculated from the Gregorian date. 503 """ 504 a = (14 - month) // 12 505 return (day - 32045 506 + (((153 * (month + (12 * a) - 3)) + 2) // 5) 507 + ((146097 * (year + 4800 - a)) // 400)) - firstjulian(year) + 1 508 509def gregorian(julian, year): 510 """Return 3-item list containing Gregorian date based on the Julian day.""" 511 a = 32043 + julian + firstjulian(year) 512 b = ((4 * a) + 3) // 146097 513 c = a - ((146097 * b) // 4) 514 d = ((4 * c) + 3) // 1461 515 e = c - ((1461 * d) // 4) 516 m = ((5 * e) + 2) // 153 517 day = 1 + e - (((153 * m) + 2) // 5) 518 month = m + 3 - (12 * (m // 10)) 519 year = (100 * b) + d - 4800 + (m // 10) 520 return [year, month, day] 521 522def dayofweek(year, month, day): 523 """Calculate the day of the week (Monday is 0).""" 524 a = (14 - month) // 12 525 y = year - a 526 weekday = (day + y + ((97 * y) // 400) 527 + ((31 * (month + (12 * a) -2 )) // 12)) % 7 528 if weekday == 0: 529 return 6 530 else: 531 return weekday-1 532