_strptime.py revision 1fdb6335304551b79838523811525d3c59d901ae
1"""Strptime-related classes and functions. 2 3CLASSES: 4 LocaleTime -- Discovers and/or stores locale-specific time information 5 TimeRE -- Creates regexes for pattern matching a string of text containing 6 time information as is returned by time.strftime() 7 8FUNCTIONS: 9 _getlang -- Figure out what language is being used for the locale 10 strptime -- Calculates the time struct represented by the passed-in string 11 12Requires Python 2.2.1 or higher (mainly because of the use of property()). 13Can be used in Python 2.2 if the following line is added: 14 True = 1; False = 0 15""" 16import time 17import locale 18import calendar 19from re import compile as re_compile 20from re import IGNORECASE 21from datetime import date as datetime_date 22 23__author__ = "Brett Cannon" 24__email__ = "brett@python.org" 25 26__all__ = ['strptime'] 27 28def _getlang(): 29 # Figure out what the current language is set to. 30 current_lang = locale.getlocale(locale.LC_TIME)[0] 31 if current_lang: 32 return current_lang 33 else: 34 current_lang = locale.getdefaultlocale()[0] 35 if current_lang: 36 return current_lang 37 else: 38 return '' 39 40class LocaleTime(object): 41 """Stores and handles locale-specific information related to time. 42 43 ATTRIBUTES (all read-only after instance creation! Instance variables that 44 store the values have mangled names): 45 f_weekday -- full weekday names (7-item list) 46 a_weekday -- abbreviated weekday names (7-item list) 47 f_month -- full weekday names (14-item list; dummy value in [0], which 48 is added by code) 49 a_month -- abbreviated weekday names (13-item list, dummy value in 50 [0], which is added by code) 51 am_pm -- AM/PM representation (2-item list) 52 LC_date_time -- format string for date/time representation (string) 53 LC_date -- format string for date representation (string) 54 LC_time -- format string for time representation (string) 55 timezone -- daylight- and non-daylight-savings timezone representation 56 (3-item list; code tacks on blank item at end for 57 possible lack of timezone such as UTC) 58 lang -- Language used by instance (string) 59 """ 60 61 def __init__(self, f_weekday=None, a_weekday=None, f_month=None, 62 a_month=None, am_pm=None, LC_date_time=None, LC_time=None, 63 LC_date=None, timezone=None, lang=None): 64 """Optionally set attributes with passed-in values.""" 65 if f_weekday is None: 66 self.__f_weekday = None 67 elif len(f_weekday) == 7: 68 self.__f_weekday = list(f_weekday) 69 else: 70 raise TypeError("full weekday names must be a 7-item sequence") 71 if a_weekday is None: 72 self.__a_weekday = None 73 elif len(a_weekday) == 7: 74 self.__a_weekday = list(a_weekday) 75 else: 76 raise TypeError( 77 "abbreviated weekday names must be a 7-item sequence") 78 if f_month is None: 79 self.__f_month = None 80 elif len(f_month) == 12: 81 self.__f_month = self.__pad(f_month, True) 82 else: 83 raise TypeError("full month names must be a 12-item sequence") 84 if a_month is None: 85 self.__a_month = None 86 elif len(a_month) == 12: 87 self.__a_month = self.__pad(a_month, True) 88 else: 89 raise TypeError( 90 "abbreviated month names must be a 12-item sequence") 91 if am_pm is None: 92 self.__am_pm = None 93 elif len(am_pm) == 2: 94 self.__am_pm = am_pm 95 else: 96 raise TypeError("AM/PM representation must be a 2-item sequence") 97 self.__LC_date_time = LC_date_time 98 self.__LC_time = LC_time 99 self.__LC_date = LC_date 100 self.__timezone = timezone 101 if timezone: 102 if len(timezone) != 2: 103 raise TypeError("timezone names must contain 2 items") 104 else: 105 self.__timezone = self.__pad(timezone, False) 106 self.__lang = lang 107 108 def __pad(self, seq, front): 109 # Add '' to seq to either front (is True), else the back. 110 seq = list(seq) 111 if front: 112 seq.insert(0, '') 113 else: 114 seq.append('') 115 return seq 116 117 def __set_nothing(self, stuff): 118 # Raise TypeError when trying to set an attribute. 119 raise TypeError("attribute does not support assignment") 120 121 def __get_f_weekday(self): 122 # Fetch self.f_weekday. 123 if not self.__f_weekday: 124 self.__calc_weekday() 125 return self.__f_weekday 126 127 def __get_a_weekday(self): 128 # Fetch self.a_weekday. 129 if not self.__a_weekday: 130 self.__calc_weekday() 131 return self.__a_weekday 132 133 f_weekday = property(__get_f_weekday, __set_nothing, 134 doc="Full weekday names") 135 a_weekday = property(__get_a_weekday, __set_nothing, 136 doc="Abbreviated weekday names") 137 138 def __get_f_month(self): 139 # Fetch self.f_month. 140 if not self.__f_month: 141 self.__calc_month() 142 return self.__f_month 143 144 def __get_a_month(self): 145 # Fetch self.a_month. 146 if not self.__a_month: 147 self.__calc_month() 148 return self.__a_month 149 150 f_month = property(__get_f_month, __set_nothing, 151 doc="Full month names (dummy value at index 0)") 152 a_month = property(__get_a_month, __set_nothing, 153 doc="Abbreviated month names (dummy value at index 0)") 154 155 def __get_am_pm(self): 156 # Fetch self.am_pm. 157 if not self.__am_pm: 158 self.__calc_am_pm() 159 return self.__am_pm 160 161 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation") 162 163 def __get_timezone(self): 164 # Fetch self.timezone. 165 if not self.__timezone: 166 self.__calc_timezone() 167 return self.__timezone 168 169 timezone = property(__get_timezone, __set_nothing, 170 doc="Timezone representation (dummy value at index 2)") 171 172 def __get_LC_date_time(self): 173 # Fetch self.LC_date_time. 174 if not self.__LC_date_time: 175 self.__calc_date_time() 176 return self.__LC_date_time 177 178 def __get_LC_date(self): 179 # Fetch self.LC_date. 180 if not self.__LC_date: 181 self.__calc_date_time() 182 return self.__LC_date 183 184 def __get_LC_time(self): 185 # Fetch self.LC_time. 186 if not self.__LC_time: 187 self.__calc_date_time() 188 return self.__LC_time 189 190 LC_date_time = property( 191 __get_LC_date_time, __set_nothing, 192 doc= 193 "Format string for locale's date/time representation ('%c' format)") 194 LC_date = property(__get_LC_date, __set_nothing, 195 doc="Format string for locale's date representation ('%x' format)") 196 LC_time = property(__get_LC_time, __set_nothing, 197 doc="Format string for locale's time representation ('%X' format)") 198 199 def __get_lang(self): 200 # Fetch self.lang. 201 if not self.__lang: 202 self.__calc_lang() 203 return self.__lang 204 205 lang = property(__get_lang, __set_nothing, 206 doc="Language used for instance") 207 208 def __calc_weekday(self): 209 # Set self.__a_weekday and self.__f_weekday using the calendar 210 # module. 211 a_weekday = [calendar.day_abbr[i] for i in range(7)] 212 f_weekday = [calendar.day_name[i] for i in range(7)] 213 if not self.__a_weekday: 214 self.__a_weekday = a_weekday 215 if not self.__f_weekday: 216 self.__f_weekday = f_weekday 217 218 def __calc_month(self): 219 # Set self.__f_month and self.__a_month using the calendar module. 220 a_month = [calendar.month_abbr[i] for i in range(13)] 221 f_month = [calendar.month_name[i] for i in range(13)] 222 if not self.__a_month: 223 self.__a_month = a_month 224 if not self.__f_month: 225 self.__f_month = f_month 226 227 def __calc_am_pm(self): 228 # Set self.__am_pm by using time.strftime(). 229 230 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that 231 # magical; just happened to have used it everywhere else where a 232 # static date was needed. 233 am_pm = [] 234 for hour in (01,22): 235 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) 236 am_pm.append(time.strftime("%p", time_tuple)) 237 self.__am_pm = am_pm 238 239 def __calc_date_time(self): 240 # Set self.__date_time, self.__date, & self.__time by using 241 # time.strftime(). 242 243 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of 244 # overloaded numbers is minimized. The order in which searches for 245 # values within the format string is very important; it eliminates 246 # possible ambiguity for what something represents. 247 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) 248 date_time = [None, None, None] 249 date_time[0] = time.strftime("%c", time_tuple) 250 date_time[1] = time.strftime("%x", time_tuple) 251 date_time[2] = time.strftime("%X", time_tuple) 252 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): 253 current_format = date_time[offset] 254 for old, new in ( 255 ('%', '%%'), (self.f_weekday[2], '%A'), 256 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), 257 (self.a_month[3], '%b'), (self.am_pm[1], '%p'), 258 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'), 259 ('1999', '%Y'), ('99', '%y'), ('22', '%H'), 260 ('44', '%M'), ('55', '%S'), ('76', '%j'), 261 ('17', '%d'), ('03', '%m'), ('3', '%m'), 262 # '3' needed for when no leading zero. 263 ('2', '%w'), ('10', '%I')): 264 # Must deal with possible lack of locale info 265 # manifesting itself as the empty string (e.g., Swedish's 266 # lack of AM/PM info) or a platform returning a tuple of empty 267 # strings (e.g., MacOS 9 having timezone as ('','')). 268 if old: 269 current_format = current_format.replace(old, new) 270 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) 271 if time.strftime(directive, time_tuple).find('00'): 272 U_W = '%U' 273 else: 274 U_W = '%W' 275 date_time[offset] = current_format.replace('11', U_W) 276 if not self.__LC_date_time: 277 self.__LC_date_time = date_time[0] 278 if not self.__LC_date: 279 self.__LC_date = date_time[1] 280 if not self.__LC_time: 281 self.__LC_time = date_time[2] 282 283 def __calc_timezone(self): 284 # Set self.__timezone by using time.tzname. 285 # 286 # Empty string used for matching when timezone is not used/needed such 287 # as with UTC. 288 self.__timezone = self.__pad(time.tzname, 0) 289 290 def __calc_lang(self): 291 # Set self.__lang by using __getlang(). 292 self.__lang = _getlang() 293 294 295 296class TimeRE(dict): 297 """Handle conversion from format directives to regexes.""" 298 299 def __init__(self, locale_time=LocaleTime()): 300 """Init inst with non-locale regexes and store LocaleTime object.""" 301 #XXX: Does 'Y' need to worry about having less or more than 4 digits? 302 base = super(TimeRE, self) 303 base.__init__({ 304 # The " \d" option is to make %c from ANSI C work 305 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 306 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", 307 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", 308 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 309 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", 310 'M': r"(?P<M>[0-5]\d|\d)", 311 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", 312 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", 313 'w': r"(?P<w>[0-6])", 314 # W is set below by using 'U' 315 'y': r"(?P<y>\d\d)", 316 'Y': r"(?P<Y>\d\d\d\d)"}) 317 base.__setitem__('W', base.__getitem__('U')) 318 self.locale_time = locale_time 319 320 def __getitem__(self, fetch): 321 """Try to fetch regex; if it does not exist, construct it.""" 322 try: 323 return super(TimeRE, self).__getitem__(fetch) 324 except KeyError: 325 constructors = { 326 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch), 327 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch), 328 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:], 329 fetch), 330 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:], 331 fetch), 332 'c': lambda: self.pattern(self.locale_time.LC_date_time), 333 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch), 334 'x': lambda: self.pattern(self.locale_time.LC_date), 335 'X': lambda: self.pattern(self.locale_time.LC_time), 336 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch), 337 '%': lambda: '%', 338 } 339 if fetch in constructors: 340 self[fetch] = constructors[fetch]() 341 return self[fetch] 342 else: 343 raise 344 345 def __seqToRE(self, to_convert, directive): 346 """Convert a list to a regex string for matching a directive.""" 347 def sorter(a, b): 348 """Sort based on length. 349 350 Done in case for some strange reason that names in the locale only 351 differ by a suffix and thus want the name with the suffix to match 352 first. 353 """ 354 try: 355 a_length = len(a) 356 except TypeError: 357 a_length = 0 358 try: 359 b_length = len(b) 360 except TypeError: 361 b_length = 0 362 return cmp(b_length, a_length) 363 364 to_convert = to_convert[:] # Don't want to change value in-place. 365 for value in to_convert: 366 if value != '': 367 break 368 else: 369 return '' 370 to_convert.sort(sorter) 371 regex = '|'.join(to_convert) 372 regex = '(?P<%s>%s' % (directive, regex) 373 return '%s)' % regex 374 375 def pattern(self, format): 376 """Return re pattern for the format string.""" 377 processed_format = '' 378 whitespace_replacement = re_compile('\s+') 379 format = whitespace_replacement.sub('\s*', format) 380 while format.find('%') != -1: 381 directive_index = format.index('%')+1 382 processed_format = "%s%s%s" % (processed_format, 383 format[:directive_index-1], 384 self[format[directive_index]]) 385 format = format[directive_index+1:] 386 return "%s%s" % (processed_format, format) 387 388 def compile(self, format): 389 """Return a compiled re object for the format string.""" 390 return re_compile(self.pattern(format), IGNORECASE) 391 392# Cached TimeRE; probably only need one instance ever so cache it for performance 393_locale_cache = TimeRE() 394# Cached regex objects; same reason as for TimeRE cache 395_regex_cache = dict() 396 397def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): 398 """Return a time struct based on the input data and the format string.""" 399 global _locale_cache 400 global _regex_cache 401 locale_time = _locale_cache.locale_time 402 # If the language changes, caches are invalidated, so clear them 403 if locale_time.lang != _getlang(): 404 _locale_cache = TimeRE() 405 _regex_cache.clear() 406 format_regex = _regex_cache.get(format) 407 if not format_regex: 408 # Limit regex cache size to prevent major bloating of the module; 409 # The value 5 is arbitrary 410 if len(_regex_cache) > 5: 411 _regex_cache.clear() 412 format_regex = _locale_cache.compile(format) 413 _regex_cache[format] = format_regex 414 found = format_regex.match(data_string) 415 if not found: 416 raise ValueError("time data did not match format") 417 year = 1900 418 month = day = 1 419 hour = minute = second = 0 420 tz = -1 421 # weekday and julian defaulted to -1 so as to signal need to calculate values 422 weekday = julian = -1 423 found_dict = found.groupdict() 424 for group_key in found_dict.iterkeys(): 425 if group_key == 'y': 426 year = int(found_dict['y']) 427 # Open Group specification for strptime() states that a %y 428 #value in the range of [00, 68] is in the century 2000, while 429 #[69,99] is in the century 1900 430 if year <= 68: 431 year += 2000 432 else: 433 year += 1900 434 elif group_key == 'Y': 435 year = int(found_dict['Y']) 436 elif group_key == 'm': 437 month = int(found_dict['m']) 438 elif group_key == 'B': 439 month = _insensitiveindex(locale_time.f_month, found_dict['B']) 440 elif group_key == 'b': 441 month = _insensitiveindex(locale_time.a_month, found_dict['b']) 442 elif group_key == 'd': 443 day = int(found_dict['d']) 444 elif group_key is 'H': 445 hour = int(found_dict['H']) 446 elif group_key == 'I': 447 hour = int(found_dict['I']) 448 ampm = found_dict.get('p', '').lower() 449 # If there was no AM/PM indicator, we'll treat this like AM 450 if ampm in ('', locale_time.am_pm[0].lower()): 451 # We're in AM so the hour is correct unless we're 452 # looking at 12 midnight. 453 # 12 midnight == 12 AM == hour 0 454 if hour == 12: 455 hour = 0 456 elif ampm == locale_time.am_pm[1].lower(): 457 # We're in PM so we need to add 12 to the hour unless 458 # we're looking at 12 noon. 459 # 12 noon == 12 PM == hour 12 460 if hour != 12: 461 hour += 12 462 elif group_key == 'M': 463 minute = int(found_dict['M']) 464 elif group_key == 'S': 465 second = int(found_dict['S']) 466 elif group_key == 'A': 467 weekday = _insensitiveindex(locale_time.f_weekday, 468 found_dict['A']) 469 elif group_key == 'a': 470 weekday = _insensitiveindex(locale_time.a_weekday, 471 found_dict['a']) 472 elif group_key == 'w': 473 weekday = int(found_dict['w']) 474 if weekday == 0: 475 weekday = 6 476 else: 477 weekday -= 1 478 elif group_key == 'j': 479 julian = int(found_dict['j']) 480 elif group_key == 'Z': 481 found_zone = found_dict['Z'].lower() 482 if locale_time.timezone[0] == locale_time.timezone[1]: 483 pass #Deals with bad locale setup where timezone info is 484 # the same; first found on FreeBSD 4.4. 485 elif locale_time.timezone[0].lower() == found_zone: 486 tz = 0 487 elif locale_time.timezone[1].lower() == found_zone: 488 tz = 1 489 elif locale_time.timezone[2].lower() == found_zone: 490 tz = -1 491 # Cannot pre-calculate datetime_date() since can change in Julian 492 #calculation and thus could have different value for the day of the week 493 #calculation 494 if julian == -1: 495 # Need to add 1 to result since first day of the year is 1, not 0. 496 julian = datetime_date(year, month, day).toordinal() - \ 497 datetime_date(year, 1, 1).toordinal() + 1 498 else: # Assume that if they bothered to include Julian day it will 499 #be accurate 500 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal()) 501 year = datetime_result.year 502 month = datetime_result.month 503 day = datetime_result.day 504 if weekday == -1: 505 weekday = datetime_date(year, month, day).weekday() 506 return time.struct_time((year, month, day, 507 hour, minute, second, 508 weekday, julian, tz)) 509 510def _insensitiveindex(lst, findme): 511 # Perform a case-insensitive index search. 512 513 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and 514 # just lowercase when LocaleTime sets its vars and lowercasing 515 # search values. 516 findme = findme.lower() 517 for key,item in enumerate(lst): 518 if item.lower() == findme: 519 return key 520 else: 521 raise ValueError("value not in list") 522 523