14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""HTTP cookie handling for web clients. 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis module has (now fairly distant) origins in Gisle Aas' Perl module 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTTP::Cookies, from the libwww-perl library. 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDocstrings, comments and debug strings in this code refer to the 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmattributes of the HTTP cookie system as cookie-attributes, to distinguish 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthem clearly from Python attributes. 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmClass diagram (note that BSDDBCookieJar and the MSIE* classes are not 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdistributed with the Python standard library, but are available from 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmhttp://wwwsearch.sf.net/): 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm CookieJar____ 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm / \ \ 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm FileCookieJar \ \ 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm / | \ \ \ 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm MozillaCookieJar | LWPCookieJar \ \ 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | | \ 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | ---MSIEBase | \ 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | / | | \ 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | / MSIEDBCookieJar BSDDBCookieJar 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm |/ 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm MSIECookieJar 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError', 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'MozillaCookieJar'] 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re, urlparse, copy, time, urllib 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import threading as _threading 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError: 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import dummy_threading as _threading 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport httplib # only for the default HTTP port 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom calendar import timegm 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdebug = False # set to True to enable debugging via the logging module 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmlogger = None 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _debug(*args): 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not debug: 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm global logger 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not logger: 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import logging 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm logger = logging.getLogger("cookielib") 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return logger.debug(*args) 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "instance initialised with one)") 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _warn_unhandled_exception(): 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # There are a few catch-all except: statements in this module, for 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # catching input that's bad in unexpected ways. Warn if any 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # exceptions are caught there. 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm import warnings, traceback, StringIO 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = StringIO.StringIO() 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm traceback.print_exc(None, f) 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm msg = f.getvalue() 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Date/time conversion 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ----------------------------------------------------------------------------- 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmEPOCH_YEAR = 1970 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _timegm(tt): 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm year, month, mday, hour, min, sec = tt[:6] 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return timegm(tt) 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMONTHS_LOWER = [] 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfor month in MONTHS: MONTHS_LOWER.append(month.lower()) 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef time2isoz(t=None): 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a string representing time in seconds since epoch, t. 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the function is called without an argument, it will use the current 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm time. 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm representing Universal Time (UTC, aka GMT). An example of this format is: 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994-11-24 08:49:37Z 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if t is None: t = time.time() 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm year, mon, mday, hour, min, sec = time.gmtime(t)[:6] 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm year, mon, mday, hour, min, sec) 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef time2netscape(t=None): 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a string representing time in seconds since epoch, t. 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the function is called without an argument, it will use the current 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm time. 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The format of the returned string is like this: 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Wed, DD-Mon-YYYY HH:MM:SS GMT 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if t is None: t = time.time() 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef offset_from_tz_string(tz): 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = None 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tz in UTC_ZONES: 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = 0 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = TIMEZONE_RE.search(tz) 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m: 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = 3600 * int(m.group(2)) 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m.group(3): 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = offset + 60 * int(m.group(3)) 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m.group(1) == '-': 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = -offset 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return offset 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _str2time(day, mon, yr, hr, min, sec, tz): 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # translate month name to number 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # month numbers start with 1 (January) 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mon = MONTHS_LOWER.index(mon.lower())+1 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # maybe it's already a number 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm imon = int(mon) 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if 1 <= imon <= 12: 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mon = imon 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # make sure clock elements are defined 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if hr is None: hr = 0 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if min is None: min = 0 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sec is None: sec = 0 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yr = int(yr) 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm day = int(day) 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hr = int(hr) 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm min = int(min) 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sec = int(sec) 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if yr < 1000: 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # find "obvious" year 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cur_yr = time.localtime(time.time())[0] 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = cur_yr % 100 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tmp = yr 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yr = yr + cur_yr - m 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = m - tmp 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if abs(m) > 50: 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m > 0: yr = yr + 100 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: yr = yr - 100 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # convert UTC time tuple to seconds since epoch (not timezone-adjusted) 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm t = _timegm((yr, mon, day, hr, min, sec, tz)) 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if t is not None: 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # adjust time using timezone string, to get absolute time since epoch 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if tz is None: 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tz = "UTC" 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tz = tz.upper() 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm offset = offset_from_tz_string(tz) 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if offset is None: 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm t = t - offset 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return t 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmSTRICT_DATE_RE = re.compile( 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWEEKDAY_RE = re.compile( 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmLOOSE_HTTP_DATE_RE = re.compile( 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r"""^ 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d\d?) # day 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?:\s+|[-\/]) 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\w+) # month 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?:\s+|[-\/]) 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d+) # year 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?: 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?:\s+|:) # separator before clock 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d\d?):(\d\d) # hour:min 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?::(\d\d))? # optional seconds 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )? # optional clock 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \s* 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \s* 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?:\(\w+\))? # ASCII representation of timezone in parens. 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \s*$""", re.X) 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef http2time(text): 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Returns time in seconds since epoch of time represented by a string. 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Return value is an integer. 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm None is returned if the format of str is unrecognized, the time is outside 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the representable range, or the timezone string is not recognized. If the 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string contains no timezone, UTC is assumed. 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The timezone in the string may be numerical (like "-0800" or "+0100") or a 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm timezone strings equivalent to UTC (zero offset) are known to the function. 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The function loosely parses the following formats: 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The parser ignores leading and trailing whitespace. The time may be 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm absent. 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the year is given with only 2 digits, the function will select the 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm century that makes the year closest to the current date. 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # fast exit for strictly conforming string 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = STRICT_DATE_RE.search(text) 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m: 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = m.groups() 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mon = MONTHS_LOWER.index(g[1].lower()) + 1 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tt = (int(g[2]), mon, int(g[0]), 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int(g[3]), int(g[4]), float(g[5])) 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _timegm(tt) 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # No, we need some messy parsing... 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # clean up 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.lstrip() 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = WEEKDAY_RE.sub("", text, 1) # Useless weekday 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # tz is time zone specifier string 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm day, mon, yr, hr, min, sec, tz = [None]*7 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # loose regexp parse 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = LOOSE_HTTP_DATE_RE.search(text) 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m is not None: 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm day, mon, yr, hr, min, sec, tz = m.groups() 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None # bad format 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _str2time(day, mon, yr, hr, min, sec, tz) 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmISO_DATE_RE = re.compile( 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """^ 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d{4}) # year 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [-\/]? 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d\d?) # numerical month 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [-\/]? 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d\d?) # day 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?: 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?:\s+|[-:Tt]) # separator before clock 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (\d\d?):?(\d\d) # hour:min 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm )? # optional clock 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \s* 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ([-+]?\d\d?:?(:?\d\d)? 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \s*$""", re.X) 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iso2time(text): 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm As for http2time, but parses the ISO 8601 formats: 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994-02-03 14:15:29 -0100 -- ISO 8601 format 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994-02-03 14:15:29 -- zone is optional 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994-02-03 -- only date 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994-02-03T14:15:29 -- Use T as separator 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 19940203T141529Z -- ISO 8601 compact format 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 19940203 -- only date 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # clean up 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.lstrip() 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # tz is time zone specifier string 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm day, mon, yr, hr, min, sec, tz = [None]*7 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # loose regexp parse 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = ISO_DATE_RE.search(text) 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m is not None: 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX there's an extra bit of the timezone I'm ignoring here: is 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # this the right thing to do? 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yr, mon, day, hr, min, sec, tz, _ = m.groups() 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None # bad format 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return _str2time(day, mon, yr, hr, min, sec, tz) 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Header parsing 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# ----------------------------------------------------------------------------- 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef unmatched(match): 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return unmatched part of re.Match object.""" 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm start, end = match.span(0) 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return match.string[:start]+match.string[end:] 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_ESCAPE_RE = re.compile(r"\\(.)") 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef split_header_words(header_values): 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r"""Parse header values into a list of lists containing key,value pairs. 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The function knows how to deal with ",", ";" and "=" as well as quoted 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values after "=". A list of space separated tokens are parsed as if they 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm were separated by ";". 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If the header_values passed as argument contains multiple values, then they 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm are treated as if they were a single value separated by comma ",". 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This means that this function is useful for parsing header fields that 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm follow this syntax (BNF as from the HTTP/1.1 specification, but we relax 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the requirement for tokens). 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm headers = #header 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header = (token | parameter) *( [";"] (token | parameter)) 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm token = 1*<any CHAR except CTLs or separators> 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm separators = "(" | ")" | "<" | ">" | "@" 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | "," | ";" | ":" | "\" | <"> 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | "/" | "[" | "]" | "?" | "=" 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | "{" | "}" | SP | HT 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm qdtext = <any TEXT except <">> 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quoted-pair = "\" CHAR 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parameter = attribute "=" value 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attribute = token 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = token | quoted-string 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Each header is represented by a list of key/value pairs. The value for a 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm simple token (not part of a parameter) is None. Syntactically incorrect 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm headers will not necessarily be parsed as you would want. 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is easier to describe with some examples: 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> split_header_words(['text/html; charset="iso-8859-1"']) 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [[('text/html', None), ('charset', 'iso-8859-1')]] 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> split_header_words([r'Basic realm="\"foo\bar\""']) 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [[('Basic', None), ('realm', '"foobar"')]] 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert not isinstance(header_values, basestring) 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm result = [] 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for text in header_values: 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm orig_text = text 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs = [] 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while text: 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = HEADER_TOKEN_RE.search(text) 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m: 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = unmatched(m) 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name = m.group(1) 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = HEADER_QUOTED_VALUE_RE.search(text) 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m: # quoted value 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = unmatched(m) 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = m.group(1) 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = HEADER_ESCAPE_RE.sub(r"\1", value) 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm m = HEADER_VALUE_RE.search(text) 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m: # unquoted value 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = unmatched(m) 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = m.group(1) 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = value.rstrip() 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # no value, a lone token 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = None 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs.append((name, value)) 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif text.lstrip().startswith(","): 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # concatenated headers, as per RFC 2616 section 4.2 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text.lstrip()[1:] 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pairs: result.append(pairs) 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs = [] 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # skip junk 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert nr_junk_chars > 0, ( 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "split_header_words bug: '%s', '%s', %s" % 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (orig_text, text, pairs)) 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = non_junk 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pairs: result.append(pairs) 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return result 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef join_header_words(lists): 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Do the inverse (almost) of the conversion done by split_header_words. 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Takes a list of lists of (key, value) pairs and produces a single header 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value. Attribute values are quoted if needed. 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'text/plain; charset="iso-8859/1"' 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'text/plain, charset="iso-8859/1"' 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm headers = [] 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for pairs in lists: 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attr = [] 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in pairs: 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v is not None: 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not re.search(r"^\w+$", v): 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = '"%s"' % v 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = "%s=%s" % (k, v) 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attr.append(k) 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if attr: headers.append("; ".join(attr)) 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ", ".join(headers) 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _strip_quotes(text): 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text.startswith('"'): 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text[1:] 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text.endswith('"'): 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm text = text[:-1] 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return text 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef parse_ns_headers(ns_headers): 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Ad-hoc parser for Netscape protocol cookie-attributes. 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The old Netscape cookie format for Set-Cookie can for instance contain 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm an unquoted "," in the expires field, so we have to use this ad-hoc 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser instead of split_header_words. 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm XXX This may not make the best possible effort to parse all the crap 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser is probably better, so could do worse than following that if 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm this ever gives any trouble. 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Currently, this is also used for parsing RFC 2109 cookies. 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm known_attrs = ("expires", "domain", "path", "secure", 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2109 attrs (may turn up in Netscape cookies, too) 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "version", "port", "max-age") 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm result = [] 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ns_header in ns_headers: 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs = [] 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version_set = False 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ii, param in enumerate(re.split(r";\s*", ns_header)): 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm param = param.rstrip() 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if param == "": continue 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if "=" not in param: 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k, v = param, None 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k, v = re.split(r"\s*=\s*", param, 1) 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = k.lstrip() 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ii != 0: 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lc = k.lower() 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if lc in known_attrs: 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = lc 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == "version": 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This is an RFC 2109 cookie. 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = _strip_quotes(v) 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version_set = True 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == "expires": 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # convert expires date to seconds since epoch 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = http2time(_strip_quotes(v)) # None if invalid 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs.append((k, v)) 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if pairs: 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not version_set: 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pairs.append(("version", "0")) 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm result.append(pairs) 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return result 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmIPV4_RE = re.compile(r"\.\d+$") 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef is_HDN(text): 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return True if text is a host domain name.""" 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # This may well be wrong. Which RFC is HDN defined in, if any (for 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the purposes of RFC 2965)? 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # For the current implementation, what about IPv6? Remember to look 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # at other uses of IPV4_RE also, if change this. 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if IPV4_RE.search(text): 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text == "": 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if text[0] == "." or text[-1] == ".": 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef domain_match(A, B): 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return True if domain A domain-matches domain B, according to RFC 2965. 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A and B may be host domain names or IP addresses. 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm RFC 2965, section 1: 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Host names can be specified either as an IP address or a HDN string. 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Sometimes we compare one host name with another. (Such comparisons SHALL 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm be case-insensitive.) Host A's name domain-matches host B's if 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm * their host name strings string-compare equal; or 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm * A is a HDN string and has the form NB, where N is a non-empty 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name string, B has the form .B', and B' is a HDN string. (So, 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x.y.com domain-matches .Y.com but not Y.com.) 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Note that domain-match is not a commutative operation: a.b.c.com 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain-matches .c.com, but not the reverse. 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Note that, if A or B are IP addresses, the only relevant part of the 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # definition of the domain-match algorithm is the direct string-compare. 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A = A.lower() 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm B = B.lower() 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if A == B: 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not is_HDN(A): 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = A.rfind(B) 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i == -1 or i == 0: 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # A does not have form NB, or N is the empty string 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not B.startswith("."): 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not is_HDN(B[1:]): 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef liberal_is_HDN(text): 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return True if text is a sort-of-like a host domain name. 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm For accepting/blocking domains. 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if IPV4_RE.search(text): 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef user_domain_match(A, B): 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """For blocking/accepting domains. 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A and B may be host domain names or IP addresses. 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm A = A.lower() 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm B = B.lower() 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not (liberal_is_HDN(A) and liberal_is_HDN(B)): 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if A == B: 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # equal IP addresses 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm initial_dot = B.startswith(".") 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if initial_dot and A.endswith(B): 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not initial_dot and A == B: 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcut_port_re = re.compile(r":\d+$") 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_host(request): 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return request-host, as defined by RFC 2965. 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Variation from RFC: returned value is lowercased, for convenient 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comparison. 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm url = request.get_full_url() 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host = urlparse.urlparse(url)[1] 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if host == "": 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host = request.get_header("Host", "") 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # remove port, if present 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host = cut_port_re.sub("", host, 1) 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return host.lower() 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef eff_request_host(request): 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a tuple (request-host, effective request-host name). 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm As defined by RFC 2965, except both are lowercased. 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm erhn = req_host = request_host(request) 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if req_host.find(".") == -1 and not IPV4_RE.search(req_host): 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm erhn = req_host + ".local" 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return req_host, erhn 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_path(request): 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Path component of request-URI, as defined by RFC 2965.""" 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm url = request.get_full_url() 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parts = urlparse.urlsplit(url) 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = escape_path(parts.path) 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not path.startswith("/"): 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # fix bad RFC 2396 absoluteURI 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = "/" + path 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return path 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_port(request): 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host = request.get_host() 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = host.find(':') 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i >= 0: 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = host[i+1:] 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int(port) 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("nonnumeric port: '%s'", port) 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = DEFAULT_HTTP_PORT 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return port 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef uppercase_escaped_char(match): 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "%%%s" % match.group(1).upper() 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef escape_path(path): 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # There's no knowing what character encoding was used to create URLs 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # containing %-escapes, but since we have to pick one to escape invalid 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # path characters, we pick UTF-8, as recommended in the HTML 4.0 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # specification: 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # And here, kind of: draft-fielding-uri-rfc2396bis-03 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (And in draft IRI specification: draft-duerst-iri-05) 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (And here, for new URI schemes: RFC 2718) 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if isinstance(path, unicode): 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = path.encode("utf-8") 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = urllib.quote(path, HTTP_PATH_SAFE) 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return path 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef reach(h): 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return reach of host h, as defined by RFC 2965, section 1. 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The reach R of a host name H is defined as follows: 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm * If 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - H is the host domain name of a host; and, 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - H has the form A.B; and 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - A has no embedded (that is, interior) dots; and 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - B has at least one embedded dot, or B is the string "local". 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm then the reach of H is .B. 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm * Otherwise, the reach of H is H. 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> reach("www.acme.com") 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '.acme.com' 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> reach("acme.com") 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 'acme.com' 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm >>> reach("acme.local") 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '.local' 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = h.find(".") 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i >= 0: 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #a = h[:i] # this line is only here to show what a is 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm b = h[i+1:] 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = b.find(".") 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if is_HDN(h) and (i >= 0 or b == "local"): 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "."+b 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return h 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef is_third_party(request): 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm RFC 2965, section 3.3.6: 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm An unverifiable transaction is to a third-party host if its request- 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host U does not domain-match the reach R of the request-host O in the 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm origin transaction. 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host = request_host(request) 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not domain_match(req_host, reach(request.get_origin_req_host())): 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Cookie: 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """HTTP Cookie. 7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This class represents both Netscape and RFC 2965 cookies. 7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This is deliberately a very simple class. It just holds attributes. It's 7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm possible to construct Cookie instances that don't comply with the cookie 7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm standards. CookieJar.make_cookies is the factory function for Cookie 7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm objects -- it deals with cookie parsing, supplying defaults, and 7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm normalising to the representation used in this class. CookiePolicy is 7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm responsible for checking them to see whether they should be accepted from 7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and returned to the server. 7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Note that the port may be present in the headers, but unspecified ("Port" 7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rather than"Port=80", for example); if this is the case, port is None. 7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, version, name, value, 7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port, port_specified, 7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain, domain_specified, domain_initial_dot, 7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path, path_specified, 7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm secure, 7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm expires, 7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm discard, 7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment, 7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment_url, 7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rest, 7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2109=False, 7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if version is not None: version = int(version) 7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if expires is not None: expires = int(expires) 7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if port is None and port_specified is True: 7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("if port is None, port_specified must be false") 7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.version = version 7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.name = name 7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.value = value 7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.port = port 7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.port_specified = port_specified 7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # normalise case, as per RFC 2965 section 3.3.3 7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.domain = domain.lower() 7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.domain_specified = domain_specified 7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Sigh. We need to know whether the domain given in the 7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # cookie-attribute had an initial dot, in order to follow RFC 2965 7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (as clarified in draft errata). Needed for the returned $Domain 7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # value. 7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.domain_initial_dot = domain_initial_dot 7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.path = path 7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.path_specified = path_specified 7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.secure = secure 7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.expires = expires 7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.discard = discard 7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.comment = comment 7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.comment_url = comment_url 7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.rfc2109 = rfc2109 7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._rest = copy.copy(rest) 7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def has_nonstandard_attr(self, name): 7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return name in self._rest 7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_nonstandard_attr(self, name, default=None): 7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._rest.get(name, default) 7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_nonstandard_attr(self, name, value): 7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._rest[name] = value 7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def is_expired(self, now=None): 7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if now is None: now = time.time() 7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (self.expires is not None) and (self.expires <= now): 7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.port is None: p = "" 7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: p = ":"+self.port 7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm limit = self.domain + p + self.path 7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.value is not None: 7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm namevalue = "%s=%s" % (self.name, self.value) 7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm namevalue = self.name 7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "<Cookie %s for %s>" % (namevalue, limit) 7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args = [] 7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for name in ("version", "name", "value", 7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "port", "port_specified", 7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "domain", "domain_specified", "domain_initial_dot", 7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "path", "path_specified", 7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "secure", "expires", "discard", "comment", "comment_url", 7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attr = getattr(self, name) 7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args.append("%s=%s" % (name, repr(attr))) 8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args.append("rest=%s" % repr(self._rest)) 8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm args.append("rfc2109=%s" % repr(self.rfc2109)) 8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "Cookie(%s)" % ", ".join(args) 8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass CookiePolicy: 8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Defines which cookies get accepted from and returned to server. 8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm May also modify cookies, though this is probably a bad idea. 8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The subclass DefaultCookiePolicy defines the standard rules for Netscape 8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm and RFC 2965 cookies -- override that if you want a customised policy. 8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok(self, cookie, request): 8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return true if (and only if) cookie should be accepted from server. 8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Currently, pre-expired cookies never get this far -- the CookieJar 8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class deletes such cookies itself. 8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError() 8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok(self, cookie, request): 8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return true if (and only if) cookie should be returned to server.""" 8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError() 8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def domain_return_ok(self, domain, request): 8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return false if cookies should not be returned, given cookie domain. 8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def path_return_ok(self, path, request): 8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return false if cookies should not be returned, given cookie path. 8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass DefaultCookiePolicy(CookiePolicy): 8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Implements the standard rules for accepting and returning cookies.""" 8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DomainStrictNoDots = 1 8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DomainStrictNonDomain = 2 8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DomainRFC2965Match = 4 8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DomainLiberal = 0 8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DomainStrict = DomainStrictNoDots|DomainStrictNonDomain 8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, 8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm blocked_domains=None, allowed_domains=None, 8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm netscape=True, rfc2965=False, 8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2109_as_netscape=None, 8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hide_cookie2=False, 8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_domain=False, 8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_rfc2965_unverifiable=True, 8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_ns_unverifiable=False, 8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_ns_domain=DomainLiberal, 8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_ns_set_initial_dollar=False, 8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_ns_set_path=False, 8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ): 8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Constructor arguments should be passed as keyword arguments only.""" 8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.netscape = netscape 8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.rfc2965 = rfc2965 8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.rfc2109_as_netscape = rfc2109_as_netscape 8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.hide_cookie2 = hide_cookie2 8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_domain = strict_domain 8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable 8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_ns_unverifiable = strict_ns_unverifiable 8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_ns_domain = strict_ns_domain 8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar 8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.strict_ns_set_path = strict_ns_set_path 8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if blocked_domains is not None: 8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._blocked_domains = tuple(blocked_domains) 8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._blocked_domains = () 8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if allowed_domains is not None: 8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm allowed_domains = tuple(allowed_domains) 8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._allowed_domains = allowed_domains 8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def blocked_domains(self): 8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return the sequence of blocked domains (as a tuple).""" 8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._blocked_domains 8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_blocked_domains(self, blocked_domains): 8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the sequence of blocked domains.""" 8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._blocked_domains = tuple(blocked_domains) 8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def is_blocked(self, domain): 8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for blocked_domain in self._blocked_domains: 8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if user_domain_match(domain, blocked_domain): 8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def allowed_domains(self): 8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return None, or the sequence of allowed domains (as a tuple).""" 8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._allowed_domains 8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_allowed_domains(self, allowed_domains): 8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set the sequence of allowed domains, or None.""" 8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if allowed_domains is not None: 9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm allowed_domains = tuple(allowed_domains) 9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._allowed_domains = allowed_domains 9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def is_not_allowed(self, domain): 9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._allowed_domains is None: 9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for allowed_domain in self._allowed_domains: 9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if user_domain_match(domain, allowed_domain): 9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok(self, cookie, request): 9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If you override .set_ok(), be sure to call this method. If it returns 9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm false, so should your subclass (assuming your subclass wants to be more 9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict about which cookies to accept). 9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert cookie.name is not None 9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for n in "version", "verifiability", "name", "path", "domain", "port": 9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fn_name = "set_ok_"+n 9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fn = getattr(self, fn_name) 9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not fn(cookie, request): 9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_version(self, cookie, request): 9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version is None: 9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Version is always set to 0 by parse_ns_headers if it's a Netscape 9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # cookie, so this must be an invalid RFC 2965 cookie. 9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" Set-Cookie2 without version attribute (%s=%s)", 9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie.name, cookie.value) 9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version > 0 and not self.rfc2965: 9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" RFC 2965 cookies are switched off") 9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cookie.version == 0 and not self.netscape: 9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" Netscape cookies are switched off") 9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_verifiability(self, cookie, request): 9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if request.is_unverifiable() and is_third_party(request): 9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version > 0 and self.strict_rfc2965_unverifiable: 9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" third-party RFC 2965 cookie during " 9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "unverifiable transaction") 9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cookie.version == 0 and self.strict_ns_unverifiable: 9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" third-party Netscape cookie during " 9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "unverifiable transaction") 9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_name(self, cookie, request): 9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Try and stop servers setting V0 cookies designed to hack other 9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # servers that know both V0 and V1 protocols. 9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (cookie.version == 0 and self.strict_ns_set_initial_dollar and 9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie.name.startswith("$")): 9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" illegal name (starts with '$'): '%s'", cookie.name) 9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_path(self, cookie, request): 9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.path_specified: 9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_path = request_path(request) 9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((cookie.version > 0 or 9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (cookie.version == 0 and self.strict_ns_set_path)) and 9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not req_path.startswith(cookie.path)): 9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" path attribute %s is not a prefix of request " 9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "path %s", cookie.path, req_path) 9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_domain(self, cookie, request): 9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_blocked(cookie.domain): 9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" domain %s is in user block-list", cookie.domain) 9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_not_allowed(cookie.domain): 9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" domain %s is not in user allow-list", cookie.domain) 9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.domain_specified: 9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host, erhn = eff_request_host(request) 9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = cookie.domain 9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.strict_domain and (domain.count(".") >= 2): 9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX This should probably be compared with the Konqueror 9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (kcookiejar.cpp) and Mozilla implementations, but it's a 9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # losing battle. 9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = domain.rfind(".") 9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm j = domain.rfind(".", 0, i) 9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if j == 0: # domain like .foo.bar 9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm tld = domain[i+1:] 9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sld = domain[j+1:i] 9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if sld.lower() in ("co", "ac", "com", "edu", "org", "net", 9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "gov", "mil", "int", "aero", "biz", "cat", "coop", 9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "info", "jobs", "mobi", "museum", "name", "pro", 9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "travel", "eu") and len(tld) == 2: 10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # domain like .co.uk 10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" country-code second level domain %s", domain) 10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if domain.startswith("."): 10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm undotted_domain = domain[1:] 10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm undotted_domain = domain 10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm embedded_dots = (undotted_domain.find(".") >= 0) 10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not embedded_dots and domain != ".local": 10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" non-local domain %s contains no embedded dot", 10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain) 10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version == 0: 10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (not erhn.endswith(domain) and 10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (not erhn.startswith(".") and 10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not ("."+erhn).endswith(domain))): 10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" effective request-host %s (even with added " 10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "initial dot) does not end end with %s", 10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm erhn, domain) 10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (cookie.version > 0 or 10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (self.strict_ns_domain & self.DomainRFC2965Match)): 10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not domain_match(erhn, domain): 10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" effective request-host %s does not domain-match " 10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "%s", erhn, domain) 10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (cookie.version > 0 or 10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (self.strict_ns_domain & self.DomainStrictNoDots)): 10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host_prefix = req_host[:-len(domain)] 10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (host_prefix.find(".") >= 0 and 10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not IPV4_RE.search(req_host)): 10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" host prefix %s for domain %s contains a dot", 10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm host_prefix, domain) 10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_ok_port(self, cookie, request): 10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.port_specified: 10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port = request_port(request) 10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if req_port is None: 10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port = "80" 10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port = str(req_port) 10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for p in cookie.port.split(","): 10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int(p) 10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" bad port %s (not numeric)", p) 10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if p == req_port: 10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" request port (%s) not found in %s", 10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port, cookie.port) 10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok(self, cookie, request): 10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If you override .return_ok(), be sure to call this method. If it 10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm returns false, so should your subclass (assuming your subclass wants to 10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm be more strict about which cookies to return). 10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Path has already been checked by .path_return_ok(), and domain 10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # blocking done by .domain_return_ok(). 10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for n in "version", "verifiability", "secure", "expires", "port", "domain": 10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fn_name = "return_ok_"+n 10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fn = getattr(self, fn_name) 10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not fn(cookie, request): 10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_version(self, cookie, request): 10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version > 0 and not self.rfc2965: 10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" RFC 2965 cookies are switched off") 10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cookie.version == 0 and not self.netscape: 10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" Netscape cookies are switched off") 10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_verifiability(self, cookie, request): 10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if request.is_unverifiable() and is_third_party(request): 10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version > 0 and self.strict_rfc2965_unverifiable: 10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" third-party RFC 2965 cookie during unverifiable " 10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "transaction") 10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif cookie.version == 0 and self.strict_ns_unverifiable: 10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" third-party Netscape cookie during unverifiable " 10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "transaction") 10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_secure(self, cookie, request): 10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.secure and request.get_type() != "https": 10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" secure cookie with non-secure request") 10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_expires(self, cookie, request): 11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.is_expired(self._now): 11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" cookie expired") 11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_port(self, cookie, request): 11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.port: 11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port = request_port(request) 11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if req_port is None: 11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port = "80" 11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for p in cookie.port.split(","): 11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if p == req_port: 11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" request port %s does not match cookie port %s", 11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_port, cookie.port) 11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def return_ok_domain(self, cookie, request): 11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host, erhn = eff_request_host(request) 11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = cookie.domain 11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't 11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (cookie.version == 0 and 11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (self.strict_ns_domain & self.DomainStrictNonDomain) and 11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not cookie.domain_specified and domain != erhn): 11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" cookie with unspecified domain does not string-compare " 11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "equal to request domain") 11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version > 0 and not domain_match(erhn, domain): 11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" effective request-host name %s does not domain-match " 11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "RFC 2965 cookie domain %s", erhn, domain) 11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version == 0 and not ("."+erhn).endswith(domain): 11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" request-host %s does not match Netscape cookie domain " 11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "%s", req_host, domain) 11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def domain_return_ok(self, domain, request): 11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Liberal check of. This is here as an optimization to avoid 11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # having to load lots of MSIE cookie files unless necessary. 11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host, erhn = eff_request_host(request) 11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not req_host.startswith("."): 11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host = "."+req_host 11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not erhn.startswith("."): 11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm erhn = "."+erhn 11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not (req_host.endswith(domain) or erhn.endswith(domain)): 11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm #_debug(" request domain %s does not match cookie domain %s", 11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # req_host, domain) 11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_blocked(domain): 11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" domain %s is in user block-list", domain) 11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_not_allowed(domain): 11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" domain %s is not in user allow-list", domain) 11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def path_return_ok(self, path, request): 11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("- checking cookie path=%s", path) 11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_path = request_path(request) 11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not req_path.startswith(path): 11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" %s does not path-match %s", req_path, path) 11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef vals_sorted_by_key(adict): 11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm keys = adict.keys() 11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm keys.sort() 11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return map(adict.get, keys) 11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef deepvalues(mapping): 11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Iterates over nested mapping, depth-first, in sorted order by key.""" 11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm values = vals_sorted_by_key(mapping) 11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for obj in values: 11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mapping = False 11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm obj.items 11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except AttributeError: 11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm mapping = True 11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for subobj in deepvalues(obj): 11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield subobj 11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not mapping: 11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yield obj 11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Used as second parameter to dict.get() method, to distinguish absent 11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# dict key from one with a None value. 11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Absent: pass 12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass CookieJar: 12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Collection of HTTP cookies. 12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm You may not need to know about this class: try 12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm urllib2.build_opener(HTTPCookieProcessor).open(url). 12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm non_word_re = re.compile(r"\W") 12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quote_re = re.compile(r"([\"\\])") 12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strict_domain_re = re.compile(r"\.?[^.]*") 12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain_re = re.compile(r"[^.]*") 12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dots_re = re.compile(r"^\.+") 12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" 12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, policy=None): 12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if policy is None: 12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm policy = DefaultCookiePolicy() 12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._policy = policy 12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock = _threading.RLock() 12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies = {} 12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_policy(self, policy): 12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._policy = policy 12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _cookies_for_domain(self, domain, request): 12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = [] 12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self._policy.domain_return_ok(domain, request): 12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [] 12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("Checking %s for cookies to return", domain) 12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies_by_path = self._cookies[domain] 12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for path in cookies_by_path.keys(): 12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self._policy.path_return_ok(path, request): 12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies_by_name = cookies_by_path[path] 12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in cookies_by_name.values(): 12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not self._policy.return_ok(cookie, request): 12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" not returning cookie") 12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" it's a match") 12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies.append(cookie) 12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cookies 12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _cookies_for_request(self, request): 12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list of cookies to be returned to server.""" 12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = [] 12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for domain in self._cookies.keys(): 12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies.extend(self._cookies_for_domain(domain, request)) 12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cookies 12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _cookie_attrs(self, cookies): 12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return a list of cookie-attributes to be returned to server. 12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm like ['foo="bar"; $Path="/"', ...] 12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The $Version attribute is also added when appropriate (currently only 12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm once per request). 12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # add cookies in order of most specific (ie. longest) path first 12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies.sort(key=lambda arg: len(arg.path), reverse=True) 12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version_set = False 12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs = [] 12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in cookies: 12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set version of Cookie header 12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX 12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # What should it be if multiple matching Set-Cookie headers have 12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # different versions themselves? 12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Answer: there is no answer; was supposed to be settled by 12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2965 errata, but that may never appear... 12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version = cookie.version 12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not version_set: 12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version_set = True 12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if version > 0: 12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append("$Version=%s" % version) 12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # quote cookie value if necessary 12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (not for Netscape protocol, which already has any quotes 12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # intact, due to the poorly-specified Netscape Cookie: syntax) 12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((cookie.value is not None) and 12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.non_word_re.search(cookie.value) and version > 0): 12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = self.quote_re.sub(r"\\\1", cookie.value) 12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value = cookie.value 12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # add cookie-attributes to be returned in Cookie header 12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.value is None: 12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append(cookie.name) 12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append("%s=%s" % (cookie.name, value)) 12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if version > 0: 12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.path_specified: 12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append('$Path="%s"' % cookie.path) 12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.domain.startswith("."): 12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = cookie.domain 13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (not cookie.domain_initial_dot and 13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain.startswith(".")): 13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = domain[1:] 13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append('$Domain="%s"' % domain) 13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.port is not None: 13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm p = "$Port" 13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.port_specified: 13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm p = p + ('="%s"' % cookie.port) 13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs.append(p) 13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return attrs 13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def add_cookie_header(self, request): 13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Add correct Cookie: header to request (urllib2.Request object). 13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The Cookie2 header is also added unless policy.hide_cookie2 is true. 13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("add_cookie_header") 13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._policy._now = self._now = int(time.time()) 13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = self._cookies_for_request(request) 13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs = self._cookie_attrs(cookies) 13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if attrs: 13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not request.has_header("Cookie"): 13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm request.add_unredirected_header( 13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Cookie", "; ".join(attrs)) 13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if necessary, advertise that we know RFC 2965 13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (self._policy.rfc2965 and not self._policy.hide_cookie2 and 13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not request.has_header("Cookie2")): 13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in cookies: 13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version != 1: 13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm request.add_unredirected_header("Cookie2", '$Version="1"') 13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.clear_expired_cookies() 13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _normalized_cookie_tuples(self, attrs_set): 13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return list of tuples containing normalised cookie information. 13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm attrs_set is the list of lists of key,value pairs extracted from 13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the Set-Cookie or Set-Cookie2 headers. 13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Tuples are name, value, standard, rest, where name and value are the 13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie name and value, standard is a dictionary containing the standard 13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie-attributes (discard, secure, version, expires or max-age, 13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain, path and port) and rest is a dictionary containing the rest of 13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the cookie-attributes. 13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie_tuples = [] 13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm boolean_attrs = "discard", "secure" 13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm value_attrs = ("version", 13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "expires", "max-age", 13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "domain", "path", "port", 13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "comment", "commenturl") 13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie_attrs in attrs_set: 13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, value = cookie_attrs[0] 13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Build dictionary of standard cookie-attributes (standard) and 13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # dictionary of other cookie-attributes (rest). 13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Note: expiry time is normalised to seconds since epoch. V0 13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # cookies should have the Expires cookie-attribute, and V1 cookies 13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # should have Max-Age, but since V1 includes RFC 2109 cookies (and 13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we 13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # accept either (but prefer Max-Age). 13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm max_age_set = False 13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bad_cookie = False 13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm standard = {} 13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rest = {} 13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in cookie_attrs[1:]: 13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lc = k.lower() 13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # don't lose case distinction for unknown fields 13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if lc in value_attrs or lc in boolean_attrs: 13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = lc 13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k in boolean_attrs and v is None: 13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # boolean cookie-attribute is present, but has no value 13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (like "discard", rather than "port=80") 13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = True 13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k in standard: 13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # only first value is significant 13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == "domain": 13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v is None: 13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" missing value for domain attribute") 13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bad_cookie = True 13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2965 section 3.3.3 14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = v.lower() 14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == "expires": 14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if max_age_set: 14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Prefer max-age to expires (like Mozilla) 14054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 14064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v is None: 14074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" missing or invalid value for expires " 14084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "attribute: treating as session cookie") 14094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 14104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if k == "max-age": 14114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm max_age_set = True 14124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = int(v) 14144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 14154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" missing or invalid (non-numeric) value for " 14164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "max-age attribute") 14174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bad_cookie = True 14184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 14194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # convert RFC 2965 Max-Age to seconds since epoch 14204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXX Strictly you're supposed to follow RFC 2616 14214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # age-calculation rules. Remember that zero Max-Age is a 14224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # is a request to discard (old and new) cookie, though. 14234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k = "expires" 14244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm v = self._now + v 14254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (k in value_attrs) or (k in boolean_attrs): 14264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (v is None and 14274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k not in ("port", "comment", "commenturl")): 14284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" missing value for %s attribute" % k) 14294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bad_cookie = True 14304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 14314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm standard[k] = v 14324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 14334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rest[k] = v 14344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if bad_cookie: 14364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 14374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie_tuples.append((name, value, standard, rest)) 14394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cookie_tuples 14414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _cookie_from_cookie_tuple(self, tup, request): 14434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # standard is dict of standard cookie-attributes, rest is dict of the 14444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # rest of them 14454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, value, standard, rest = tup 14464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = standard.get("domain", Absent) 14484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = standard.get("path", Absent) 14494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = standard.get("port", Absent) 14504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm expires = standard.get("expires", Absent) 14514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set the easy defaults 14534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version = standard.get("version", None) 14544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if version is not None: 14554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 14564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm version = int(version) 14574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except ValueError: 14584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None # invalid version, ignore cookie 14594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm secure = standard.get("secure", False) 14604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (discard is also set if expires is Absent) 14614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm discard = standard.get("discard", False) 14624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment = standard.get("comment", None) 14634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment_url = standard.get("commenturl", None) 14644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set default path 14664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if path is not Absent and path != "": 14674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path_specified = True 14684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = escape_path(path) 14694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 14704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path_specified = False 14714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = request_path(request) 14724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = path.rfind("/") 14734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if i != -1: 14744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if version == 0: 14754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Netscape spec parts company from reality here 14764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = path[:i] 14774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 14784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path = path[:i+1] 14794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(path) == 0: path = "/" 14804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set default domain 14824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain_specified = domain is not Absent 14834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # but first we have to remember whether it starts with a dot 14844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain_initial_dot = False 14854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if domain_specified: 14864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain_initial_dot = bool(domain.startswith(".")) 14874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if domain is Absent: 14884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm req_host, erhn = eff_request_host(request) 14894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = erhn 14904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif not domain.startswith("."): 14914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain = "."+domain 14924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 14934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set default port 14944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port_specified = False 14954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if port is not Absent: 14964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if port is None: 14974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Port attr present, but has no value: default to request port. 14984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Cookie should then only be sent back on that port. 14994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = request_port(request) 15004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 15014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port_specified = True 15024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = re.sub(r"\s+", "", port) 15034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 15044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # No port attr present. Cookie can be sent back on any port. 15054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port = None 15064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # set default expires and discard 15084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if expires is Absent: 15094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm expires = None 15104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm discard = True 15114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif expires <= self._now: 15124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Expiry date in past is request to delete cookie. This can't be 15134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in DefaultCookiePolicy, because can't delete cookies there. 15144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.clear(domain, path, name) 15164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 15174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 15184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("Expiring cookie, domain='%s', path='%s', name='%s'", 15194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain, path, name) 15204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return None 15214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return Cookie(version, 15234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm name, value, 15244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm port, port_specified, 15254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm domain, domain_specified, domain_initial_dot, 15264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path, path_specified, 15274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm secure, 15284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm expires, 15294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm discard, 15304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment, 15314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm comment_url, 15324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rest) 15334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _cookies_from_attrs_set(self, attrs_set, request): 15354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie_tuples = self._normalized_cookie_tuples(attrs_set) 15364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = [] 15384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for tup in cookie_tuples: 15394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie = self._cookie_from_cookie_tuple(tup, request) 15404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie: cookies.append(cookie) 15414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cookies 15424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _process_rfc2109_cookies(self, cookies): 15444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) 15454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if rfc2109_as_ns is None: 15464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2109_as_ns = not self._policy.rfc2965 15474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in cookies: 15484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.version == 1: 15494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie.rfc2109 = True 15504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if rfc2109_as_ns: 15514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # treat 2109 cookies as Netscape cookies rather than 15524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # as RFC2965 cookies 15534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookie.version = 0 15544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def make_cookies(self, response, request): 15564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return sequence of Cookie objects extracted from response object.""" 15574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # get cookie-attributes for RFC 2965 and Netscape protocols 15584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm headers = response.info() 15594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2965_hdrs = headers.getheaders("Set-Cookie2") 15604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ns_hdrs = headers.getheaders("Set-Cookie") 15614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rfc2965 = self._policy.rfc2965 15634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm netscape = self._policy.netscape 15644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((not rfc2965_hdrs and not ns_hdrs) or 15664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (not ns_hdrs and not rfc2965) or 15674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (not rfc2965_hdrs and not netscape) or 15684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (not netscape and not rfc2965)): 15694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [] # no relevant cookie headers: quick exit 15704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = self._cookies_from_attrs_set( 15734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm split_header_words(rfc2965_hdrs), request) 15744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except Exception: 15754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _warn_unhandled_exception() 15764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies = [] 15774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ns_hdrs and netscape: 15794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 15804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # RFC 2109 and Netscape cookies 15814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ns_cookies = self._cookies_from_attrs_set( 15824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parse_ns_headers(ns_hdrs), request) 15834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except Exception: 15844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _warn_unhandled_exception() 15854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ns_cookies = [] 15864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._process_rfc2109_cookies(ns_cookies) 15874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Look for Netscape cookies (from Set-Cookie headers) that match 15894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # corresponding RFC 2965 cookies (from Set-Cookie2 headers). 15904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # For each match, keep the RFC 2965 cookie and ignore the Netscape 15914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are 15924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # bundled in with the Netscape cookies for this purpose, which is 15934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # reasonable behaviour. 15944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if rfc2965: 15954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lookup = {} 15964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in cookies: 15974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lookup[(cookie.domain, cookie.path, cookie.name)] = None 15984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 15994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def no_matching_rfc2965(ns_cookie, lookup=lookup): 16004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key = ns_cookie.domain, ns_cookie.path, ns_cookie.name 16014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return key not in lookup 16024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ns_cookies = filter(no_matching_rfc2965, ns_cookies) 16034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ns_cookies: 16054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cookies.extend(ns_cookies) 16064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return cookies 16084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_cookie_if_ok(self, cookie, request): 16104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set a cookie if policy says it's OK to do so.""" 16114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 16124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._policy._now = self._now = int(time.time()) 16144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._policy.set_ok(cookie, request): 16164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.set_cookie(cookie) 16174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 16204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 16214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def set_cookie(self, cookie): 16234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Set a cookie, without checking whether or not it should be set.""" 16244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c = self._cookies 16254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 16264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.domain not in c: c[cookie.domain] = {} 16284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c2 = c[cookie.domain] 16294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.path not in c2: c2[cookie.path] = {} 16304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c3 = c2[cookie.path] 16314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm c3[cookie.name] = cookie 16324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 16334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 16344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def extract_cookies(self, response, request): 16364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Extract cookies from response, where allowable given the request.""" 16374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug("extract_cookies: %s", response.info()) 16384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 16394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._policy._now = self._now = int(time.time()) 16414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self.make_cookies(response, request): 16434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._policy.set_ok(cookie, request): 16444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _debug(" setting cookie: %s", cookie) 16454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.set_cookie(cookie) 16464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 16474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 16484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clear(self, domain=None, path=None, name=None): 16504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Clear some cookies. 16514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Invoking this method without arguments will clear all cookies. If 16534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm given a single argument, only cookies belonging to that domain will be 16544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm removed. If given two arguments, cookies belonging to the specified 16554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm path within that domain are removed. If given three arguments, then 16564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the cookie with the specified name, path and domain is removed. 16574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Raises KeyError if no matching cookie exists. 16594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 16614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if name is not None: 16624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (domain is None) or (path is None): 16634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError( 16644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "domain and path must be given to remove a cookie by name") 16654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self._cookies[domain][path][name] 16664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif path is not None: 16674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if domain is None: 16684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError( 16694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "domain must be given to remove cookies by path") 16704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self._cookies[domain][path] 16714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif domain is not None: 16724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del self._cookies[domain] 16734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 16744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies = {} 16754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clear_session_cookies(self): 16774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Discard all session cookies. 16784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Note that the .save() method won't save session cookies anyway, unless 16804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm you ask otherwise by passing a true ignore_discard argument. 16814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 16834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 16844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 16854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self: 16864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.discard: 16874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.clear(cookie.domain, cookie.path, cookie.name) 16884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 16894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 16904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def clear_expired_cookies(self): 16924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Discard all expired cookies. 16934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 16944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm You probably don't need to call this method: expired cookies are never 16954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sent back to the server (provided you're using DefaultCookiePolicy), 16964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm this method is called by CookieJar itself every so often, and the 16974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm .save() method won't save expired cookies anyway (unless you ask 16984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm otherwise by passing a true ignore_expires argument). 16994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 17014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 17024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 17034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm now = time.time() 17044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self: 17054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if cookie.is_expired(now): 17064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.clear(cookie.domain, cookie.path, cookie.name) 17074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 17084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 17094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __iter__(self): 17114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return deepvalues(self._cookies) 17124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __len__(self): 17144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Return number of contained cookies.""" 17154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = 0 17164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self: i = i + 1 17174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return i 17184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __repr__(self): 17204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = [] 17214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self: r.append(repr(cookie)) 17224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "<%s[%s]>" % (self.__class__, ", ".join(r)) 17234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __str__(self): 17254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm r = [] 17264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for cookie in self: r.append(str(cookie)) 17274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return "<%s[%s]>" % (self.__class__, ", ".join(r)) 17284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# derives from IOError for backwards-compatibility with Python 2.4.0 17314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass LoadError(IOError): pass 17324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass FileCookieJar(CookieJar): 17344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """CookieJar that can be loaded from and saved to a file.""" 17354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, filename=None, delayload=False, policy=None): 17374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 17384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Cookies are NOT loaded from the named file until either the .load() or 17394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm .revert() method is called. 17404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 17424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm CookieJar.__init__(self, policy) 17434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is not None: 17444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 17454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm filename+"" 17464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except: 17474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("filename must be string-like") 17484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.filename = filename 17494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.delayload = bool(delayload) 17504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def save(self, filename=None, ignore_discard=False, ignore_expires=False): 17524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Save cookies to a file.""" 17534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NotImplementedError() 17544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def load(self, filename=None, ignore_discard=False, ignore_expires=False): 17564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Load cookies from a file.""" 17574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is None: 17584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.filename is not None: filename = self.filename 17594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: raise ValueError(MISSING_FILENAME_TEXT) 17604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = open(filename) 17624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 17634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._really_load(f, filename, ignore_discard, ignore_expires) 17644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 17654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f.close() 17664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def revert(self, filename=None, 17684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ignore_discard=False, ignore_expires=False): 17694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Clear all cookies and reload cookies from a saved file. 17704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Raises LoadError (or IOError) if reversion is not successful; the 17724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm object's state will not be altered if this happens. 17734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 17754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename is None: 17764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.filename is not None: filename = self.filename 17774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: raise ValueError(MISSING_FILENAME_TEXT) 17784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.acquire() 17804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 17814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm old_state = copy.deepcopy(self._cookies) 17834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies = {} 17844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 17854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.load(filename, ignore_discard, ignore_expires) 17864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (LoadError, IOError): 17874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies = old_state 17884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise 17894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm finally: 17914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._cookies_lock.release() 17924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 17934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _LWPCookieJar import LWPCookieJar, lwp_cookie_str 17944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _MozillaCookieJar import MozillaCookieJar 1795