14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaor"""HTTP cookie handling for web clients. 24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 34adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThis module has (now fairly distant) origins in Gisle Aas' Perl module 44adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHTTP::Cookies, from the libwww-perl library. 54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 64adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDocstrings, comments and debug strings in this code refer to the 74adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoattributes of the HTTP cookie system as cookie-attributes, to distinguish 84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaothem clearly from Python attributes. 94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 104adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoClass diagram (note that BSDDBCookieJar and the MSIE* classes are not 114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodistributed with the Python standard library, but are available from 124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://wwwsearch.sf.net/): 134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao CookieJar____ 154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao / \ \ 164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao FileCookieJar \ \ 174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao / | \ \ \ 184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao MozillaCookieJar | LWPCookieJar \ \ 194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | | \ 204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | ---MSIEBase | \ 214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | / | | \ 224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | / MSIEDBCookieJar BSDDBCookieJar 234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao |/ 244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao MSIECookieJar 254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao""" 274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', 294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError', 304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'MozillaCookieJar'] 314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport re, urlparse, copy, time, urllib 334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry: 344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import threading as _threading 354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept ImportError: 364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import dummy_threading as _threading 374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport httplib # only for the default HTTP port 384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom calendar import timegm 394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodebug = False # set to True to enable debugging via the logging module 414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaologger = None 424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _debug(*args): 444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not debug: 454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return 464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao global logger 474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not logger: 484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import logging 494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao logger = logging.getLogger("cookielib") 504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return logger.debug(*args) 514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 534adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) 544adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " 554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "instance initialised with one)") 564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _warn_unhandled_exception(): 584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # There are a few catch-all except: statements in this module, for 594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # catching input that's bad in unexpected ways. Warn if any 604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # exceptions are caught there. 614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao import warnings, traceback, StringIO 624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao f = StringIO.StringIO() 634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao traceback.print_exc(None, f) 644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao msg = f.getvalue() 654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) 664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Date/time conversion 694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# ----------------------------------------------------------------------------- 704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 714adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoEPOCH_YEAR = 1970 724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _timegm(tt): 734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao year, month, mday, hour, min, sec = tt[:6] 744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and 754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): 764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return timegm(tt) 774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 804adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 814adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 834adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMONTHS_LOWER = [] 844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofor month in MONTHS: MONTHS_LOWER.append(month.lower()) 854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef time2isoz(t=None): 874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a string representing time in seconds since epoch, t. 884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If the function is called without an argument, it will use the current 904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao time. 914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", 934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao representing Universal Time (UTC, aka GMT). An example of this format is: 944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1994-11-24 08:49:37Z 964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if t is None: t = time.time() 994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao year, mon, mday, hour, min, sec = time.gmtime(t)[:6] 1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( 1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao year, mon, mday, hour, min, sec) 1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef time2netscape(t=None): 1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a string representing time in seconds since epoch, t. 1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If the function is called without an argument, it will use the current 1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao time. 1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The format of the returned string is like this: 1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Wed, DD-Mon-YYYY HH:MM:SS GMT 1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if t is None: t = time.time() 1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] 1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( 1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) 1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1204adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} 1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1224adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") 1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef offset_from_tz_string(tz): 1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = None 1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if tz in UTC_ZONES: 1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = 0 1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = TIMEZONE_RE.search(tz) 1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m: 1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = 3600 * int(m.group(2)) 1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m.group(3): 1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = offset + 60 * int(m.group(3)) 1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m.group(1) == '-': 1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = -offset 1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return offset 1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _str2time(day, mon, yr, hr, min, sec, tz): 1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # translate month name to number 1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # month numbers start with 1 (January) 1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mon = MONTHS_LOWER.index(mon.lower())+1 1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # maybe it's already a number 1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao imon = int(mon) 1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if 1 <= imon <= 12: 1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mon = imon 1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 1514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 1524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # make sure clock elements are defined 1544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if hr is None: hr = 0 1554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if min is None: min = 0 1564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sec is None: sec = 0 1574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao yr = int(yr) 1594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao day = int(day) 1604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hr = int(hr) 1614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao min = int(min) 1624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao sec = int(sec) 1634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if yr < 1000: 1654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # find "obvious" year 1664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cur_yr = time.localtime(time.time())[0] 1674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = cur_yr % 100 1684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tmp = yr 1694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao yr = yr + cur_yr - m 1704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = m - tmp 1714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if abs(m) > 50: 1724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m > 0: yr = yr + 100 1734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: yr = yr - 100 1744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # convert UTC time tuple to seconds since epoch (not timezone-adjusted) 1764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao t = _timegm((yr, mon, day, hr, min, sec, tz)) 1774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if t is not None: 1794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # adjust time using timezone string, to get absolute time since epoch 1804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if tz is None: 1814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tz = "UTC" 1824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tz = tz.upper() 1834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao offset = offset_from_tz_string(tz) 1844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if offset is None: 1854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 1864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao t = t - offset 1874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return t 1894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1904adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSTRICT_DATE_RE = re.compile( 1914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " 1924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") 1934adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoWEEKDAY_RE = re.compile( 1944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) 1954adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoLOOSE_HTTP_DATE_RE = re.compile( 1964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r"""^ 1974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d\d?) # day 1984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?:\s+|[-\/]) 1994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\w+) # month 2004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?:\s+|[-\/]) 2014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d+) # year 2024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?: 2034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?:\s+|:) # separator before clock 2044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d\d?):(\d\d) # hour:min 2054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?::(\d\d))? # optional seconds 2064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao )? # optional clock 2074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao \s* 2084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone 2094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao \s* 2104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?:\(\w+\))? # ASCII representation of timezone in parens. 2114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao \s*$""", re.X) 2124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef http2time(text): 2134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Returns time in seconds since epoch of time represented by a string. 2144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Return value is an integer. 2164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao None is returned if the format of str is unrecognized, the time is outside 2184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the representable range, or the timezone string is not recognized. If the 2194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao string contains no timezone, UTC is assumed. 2204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The timezone in the string may be numerical (like "-0800" or "+0100") or a 2224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the 2234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao timezone strings equivalent to UTC (zero offset) are known to the function. 2244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The function loosely parses the following formats: 2264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format 2284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format 2294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format 2304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) 2314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) 2324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) 2334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The parser ignores leading and trailing whitespace. The time may be 2354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao absent. 2364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If the year is given with only 2 digits, the function will select the 2384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao century that makes the year closest to the current date. 2394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 2414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # fast exit for strictly conforming string 2424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = STRICT_DATE_RE.search(text) 2434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m: 2444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao g = m.groups() 2454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mon = MONTHS_LOWER.index(g[1].lower()) + 1 2464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tt = (int(g[2]), mon, int(g[0]), 2474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao int(g[3]), int(g[4]), float(g[5])) 2484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _timegm(tt) 2494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # No, we need some messy parsing... 2514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # clean up 2534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = text.lstrip() 2544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = WEEKDAY_RE.sub("", text, 1) # Useless weekday 2554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # tz is time zone specifier string 2574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao day, mon, yr, hr, min, sec, tz = [None]*7 2584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # loose regexp parse 2604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = LOOSE_HTTP_DATE_RE.search(text) 2614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m is not None: 2624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao day, mon, yr, hr, min, sec, tz = m.groups() 2634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 2644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None # bad format 2654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _str2time(day, mon, yr, hr, min, sec, tz) 2674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2684adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoISO_DATE_RE = re.compile( 2694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """^ 2704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d{4}) # year 2714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [-\/]? 2724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d\d?) # numerical month 2734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [-\/]? 2744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d\d?) # day 2754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?: 2764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?:\s+|[-:Tt]) # separator before clock 2774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (\d\d?):?(\d\d) # hour:min 2784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) 2794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao )? # optional clock 2804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao \s* 2814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ([-+]?\d\d?:?(:?\d\d)? 2824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) 2834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao \s*$""", re.X) 2844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef iso2time(text): 2854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 2864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao As for http2time, but parses the ISO 8601 formats: 2874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1994-02-03 14:15:29 -0100 -- ISO 8601 format 2894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1994-02-03 14:15:29 -- zone is optional 2904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1994-02-03 -- only date 2914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1994-02-03T14:15:29 -- Use T as separator 2924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 19940203T141529Z -- ISO 8601 compact format 2934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 19940203 -- only date 2944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 2964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # clean up 2974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = text.lstrip() 2984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 2994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # tz is time zone specifier string 3004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao day, mon, yr, hr, min, sec, tz = [None]*7 3014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # loose regexp parse 3034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = ISO_DATE_RE.search(text) 3044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m is not None: 3054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX there's an extra bit of the timezone I'm ignoring here: is 3064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # this the right thing to do? 3074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao yr, mon, day, hr, min, sec, tz, _ = m.groups() 3084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None # bad format 3104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return _str2time(day, mon, yr, hr, min, sec, tz) 3124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Header parsing 3154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# ----------------------------------------------------------------------------- 3164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unmatched(match): 3184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return unmatched part of re.Match object.""" 3194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao start, end = match.span(0) 3204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return match.string[:start]+match.string[end:] 3214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3224adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") 3234adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") 3244adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") 3254adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_ESCAPE_RE = re.compile(r"\\(.)") 3264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef split_header_words(header_values): 3274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r"""Parse header values into a list of lists containing key,value pairs. 3284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The function knows how to deal with ",", ";" and "=" as well as quoted 3304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao values after "=". A list of space separated tokens are parsed as if they 3314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao were separated by ";". 3324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If the header_values passed as argument contains multiple values, then they 3344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao are treated as if they were a single value separated by comma ",". 3354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This means that this function is useful for parsing header fields that 3374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao follow this syntax (BNF as from the HTTP/1.1 specification, but we relax 3384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the requirement for tokens). 3394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = #header 3414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao header = (token | parameter) *( [";"] (token | parameter)) 3424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao token = 1*<any CHAR except CTLs or separators> 3444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao separators = "(" | ")" | "<" | ">" | "@" 3454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | "," | ";" | ":" | "\" | <"> 3464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | "/" | "[" | "]" | "?" | "=" 3474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao | "{" | "}" | SP | HT 3484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) 3504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao qdtext = <any TEXT except <">> 3514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao quoted-pair = "\" CHAR 3524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parameter = attribute "=" value 3544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attribute = token 3554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = token | quoted-string 3564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Each header is represented by a list of key/value pairs. The value for a 3584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao simple token (not part of a parameter) is None. Syntactically incorrect 3594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers will not necessarily be parsed as you would want. 3604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This is easier to describe with some examples: 3624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) 3644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] 3654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> split_header_words(['text/html; charset="iso-8859-1"']) 3664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [[('text/html', None), ('charset', 'iso-8859-1')]] 3674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> split_header_words([r'Basic realm="\"foo\bar\""']) 3684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao [[('Basic', None), ('realm', '"foobar"')]] 3694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 3704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 3714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao assert not isinstance(header_values, basestring) 3724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = [] 3734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for text in header_values: 3744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao orig_text = text 3754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs = [] 3764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao while text: 3774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = HEADER_TOKEN_RE.search(text) 3784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m: 3794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = unmatched(m) 3804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name = m.group(1) 3814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = HEADER_QUOTED_VALUE_RE.search(text) 3824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m: # quoted value 3834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = unmatched(m) 3844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = m.group(1) 3854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = HEADER_ESCAPE_RE.sub(r"\1", value) 3864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao m = HEADER_VALUE_RE.search(text) 3884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if m: # unquoted value 3894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = unmatched(m) 3904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = m.group(1) 3914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = value.rstrip() 3924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 3934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # no value, a lone token 3944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = None 3954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs.append((name, value)) 3964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif text.lstrip().startswith(","): 3974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # concatenated headers, as per RFC 2616 section 4.2 3984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = text.lstrip()[1:] 3994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if pairs: result.append(pairs) 4004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs = [] 4014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # skip junk 4034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) 4044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao assert nr_junk_chars > 0, ( 4054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "split_header_words bug: '%s', '%s', %s" % 4064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (orig_text, text, pairs)) 4074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = non_junk 4084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if pairs: result.append(pairs) 4094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 4104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4114adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") 4124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef join_header_words(lists): 4134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Do the inverse (almost) of the conversion done by split_header_words. 4144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Takes a list of lists of (key, value) pairs and produces a single header 4164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value. Attribute values are quoted if needed. 4174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) 4194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'text/plain; charset="iso-8859/1"' 4204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) 4214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'text/plain, charset="iso-8859/1"' 4224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 4244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = [] 4254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for pairs in lists: 4264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attr = [] 4274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for k, v in pairs: 4284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if v is not None: 4294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not re.search(r"^\w+$", v): 4304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ 4314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = '"%s"' % v 4324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = "%s=%s" % (k, v) 4334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attr.append(k) 4344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if attr: headers.append("; ".join(attr)) 4354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return ", ".join(headers) 4364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _strip_quotes(text): 4384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if text.startswith('"'): 4394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = text[1:] 4404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if text.endswith('"'): 4414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao text = text[:-1] 4424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return text 4434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef parse_ns_headers(ns_headers): 4454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Ad-hoc parser for Netscape protocol cookie-attributes. 4464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The old Netscape cookie format for Set-Cookie can for instance contain 4484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao an unquoted "," in the expires field, so we have to use this ad-hoc 4494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser instead of split_header_words. 4504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao XXX This may not make the best possible effort to parse all the crap 4524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient 4534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parser is probably better, so could do worse than following that if 4544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao this ever gives any trouble. 4554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Currently, this is also used for parsing RFC 2109 cookies. 4574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 4594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao known_attrs = ("expires", "domain", "path", "secure", 4604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # RFC 2109 attrs (may turn up in Netscape cookies, too) 4614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "version", "port", "max-age") 4624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = [] 4644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for ns_header in ns_headers: 4654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs = [] 4664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version_set = False 4674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for ii, param in enumerate(re.split(r";\s*", ns_header)): 4684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao param = param.rstrip() 4694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if param == "": continue 4704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if "=" not in param: 4714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k, v = param, None 4724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 4734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k, v = re.split(r"\s*=\s*", param, 1) 4744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = k.lstrip() 4754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ii != 0: 4764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao lc = k.lower() 4774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if lc in known_attrs: 4784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = lc 4794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k == "version": 4804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # This is an RFC 2109 cookie. 4814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = _strip_quotes(v) 4824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version_set = True 4834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k == "expires": 4844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # convert expires date to seconds since epoch 4854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = http2time(_strip_quotes(v)) # None if invalid 4864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs.append((k, v)) 4874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if pairs: 4894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not version_set: 4904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pairs.append(("version", "0")) 4914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result.append(pairs) 4924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 4944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 4964adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoIPV4_RE = re.compile(r"\.\d+$") 4974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef is_HDN(text): 4984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return True if text is a host domain name.""" 4994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX 5004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # This may well be wrong. Which RFC is HDN defined in, if any (for 5014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # the purposes of RFC 2965)? 5024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # For the current implementation, what about IPv6? Remember to look 5034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # at other uses of IPV4_RE also, if change this. 5044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if IPV4_RE.search(text): 5054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if text == "": 5074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if text[0] == "." or text[-1] == ".": 5094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef domain_match(A, B): 5134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return True if domain A domain-matches domain B, according to RFC 2965. 5144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao A and B may be host domain names or IP addresses. 5164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao RFC 2965, section 1: 5184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Host names can be specified either as an IP address or a HDN string. 5204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Sometimes we compare one host name with another. (Such comparisons SHALL 5214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao be case-insensitive.) Host A's name domain-matches host B's if 5224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao * their host name strings string-compare equal; or 5244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao * A is a HDN string and has the form NB, where N is a non-empty 5264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name string, B has the form .B', and B' is a HDN string. (So, 5274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao x.y.com domain-matches .Y.com but not Y.com.) 5284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Note that domain-match is not a commutative operation: a.b.c.com 5304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain-matches .c.com, but not the reverse. 5314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 5334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Note that, if A or B are IP addresses, the only relevant part of the 5344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # definition of the domain-match algorithm is the direct string-compare. 5354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao A = A.lower() 5364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao B = B.lower() 5374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if A == B: 5384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not is_HDN(A): 5404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = A.rfind(B) 5424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if i == -1 or i == 0: 5434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # A does not have form NB, or N is the empty string 5444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not B.startswith("."): 5464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not is_HDN(B[1:]): 5484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef liberal_is_HDN(text): 5524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return True if text is a sort-of-like a host domain name. 5534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao For accepting/blocking domains. 5554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 5574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if IPV4_RE.search(text): 5584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef user_domain_match(A, B): 5624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """For blocking/accepting domains. 5634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao A and B may be host domain names or IP addresses. 5654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 5674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao A = A.lower() 5684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao B = B.lower() 5694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (liberal_is_HDN(A) and liberal_is_HDN(B)): 5704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if A == B: 5714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # equal IP addresses 5724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao initial_dot = B.startswith(".") 5754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if initial_dot and A.endswith(B): 5764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not initial_dot and A == B: 5784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 5794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 5804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaocut_port_re = re.compile(r":\d+$") 5824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_host(request): 5834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return request-host, as defined by RFC 2965. 5844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Variation from RFC: returned value is lowercased, for convenient 5864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comparison. 5874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 5894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = request.get_full_url() 5904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = urlparse.urlparse(url)[1] 5914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if host == "": 5924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = request.get_header("Host", "") 5934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # remove port, if present 5954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = cut_port_re.sub("", host, 1) 5964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return host.lower() 5974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 5984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef eff_request_host(request): 5994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a tuple (request-host, effective request-host name). 6004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao As defined by RFC 2965, except both are lowercased. 6024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 6044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao erhn = req_host = request_host(request) 6054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if req_host.find(".") == -1 and not IPV4_RE.search(req_host): 6064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao erhn = req_host + ".local" 6074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return req_host, erhn 6084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_path(request): 6104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Path component of request-URI, as defined by RFC 2965.""" 6114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao url = request.get_full_url() 6124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parts = urlparse.urlsplit(url) 6134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = escape_path(parts.path) 6144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not path.startswith("/"): 6154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # fix bad RFC 2396 absoluteURI 6164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = "/" + path 6174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return path 6184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_port(request): 6204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host = request.get_host() 6214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = host.find(':') 6224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if i >= 0: 6234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = host[i+1:] 6244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 6254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao int(port) 6264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 6274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("nonnumeric port: '%s'", port) 6284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 6294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 6304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = DEFAULT_HTTP_PORT 6314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return port 6324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't 6344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). 6354adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" 6364adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") 6374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef uppercase_escaped_char(match): 6384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "%%%s" % match.group(1).upper() 6394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef escape_path(path): 6404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" 6414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # There's no knowing what character encoding was used to create URLs 6424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # containing %-escapes, but since we have to pick one to escape invalid 6434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # path characters, we pick UTF-8, as recommended in the HTML 4.0 6444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # specification: 6454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 6464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # And here, kind of: draft-fielding-uri-rfc2396bis-03 6474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (And in draft IRI specification: draft-duerst-iri-05) 6484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (And here, for new URI schemes: RFC 2718) 6494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if isinstance(path, unicode): 6504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = path.encode("utf-8") 6514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = urllib.quote(path, HTTP_PATH_SAFE) 6524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) 6534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return path 6544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef reach(h): 6564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return reach of host h, as defined by RFC 2965, section 1. 6574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The reach R of a host name H is defined as follows: 6594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao * If 6614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao - H is the host domain name of a host; and, 6634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao - H has the form A.B; and 6654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao - A has no embedded (that is, interior) dots; and 6674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao - B has at least one embedded dot, or B is the string "local". 6694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao then the reach of H is .B. 6704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao * Otherwise, the reach of H is H. 6724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> reach("www.acme.com") 6744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao '.acme.com' 6754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> reach("acme.com") 6764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 'acme.com' 6774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao >>> reach("acme.local") 6784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao '.local' 6794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 6814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = h.find(".") 6824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if i >= 0: 6834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao #a = h[:i] # this line is only here to show what a is 6844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao b = h[i+1:] 6854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = b.find(".") 6864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if is_HDN(h) and (i >= 0 or b == "local"): 6874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "."+b 6884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return h 6894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef is_third_party(request): 6914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 6924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao RFC 2965, section 3.3.6: 6944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao An unverifiable transaction is to a third-party host if its request- 6964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host U does not domain-match the reach R of the request-host O in the 6974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao origin transaction. 6984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 6994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 7004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host = request_host(request) 7014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not domain_match(req_host, reach(request.get_origin_req_host())): 7024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 7034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 7054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass Cookie: 7084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """HTTP Cookie. 7094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This class represents both Netscape and RFC 2965 cookies. 7114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao This is deliberately a very simple class. It just holds attributes. It's 7134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao possible to construct Cookie instances that don't comply with the cookie 7144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao standards. CookieJar.make_cookies is the factory function for Cookie 7154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao objects -- it deals with cookie parsing, supplying defaults, and 7164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao normalising to the representation used in this class. CookiePolicy is 7174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao responsible for checking them to see whether they should be accepted from 7184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao and returned to the server. 7194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Note that the port may be present in the headers, but unspecified ("Port" 7214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rather than"Port=80", for example); if this is the case, port is None. 7224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 7244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, version, name, value, 7264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port, port_specified, 7274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain, domain_specified, domain_initial_dot, 7284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path, path_specified, 7294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao secure, 7304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao expires, 7314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao discard, 7324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment, 7334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment_url, 7344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rest, 7354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2109=False, 7364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ): 7374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if version is not None: version = int(version) 7394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if expires is not None: expires = int(expires) 7404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if port is None and port_specified is True: 7414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ValueError("if port is None, port_specified must be false") 7424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.version = version 7444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.name = name 7454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.value = value 7464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.port = port 7474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.port_specified = port_specified 7484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # normalise case, as per RFC 2965 section 3.3.3 7494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.domain = domain.lower() 7504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.domain_specified = domain_specified 7514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Sigh. We need to know whether the domain given in the 7524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # cookie-attribute had an initial dot, in order to follow RFC 2965 7534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (as clarified in draft errata). Needed for the returned $Domain 7544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # value. 7554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.domain_initial_dot = domain_initial_dot 7564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.path = path 7574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.path_specified = path_specified 7584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.secure = secure 7594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.expires = expires 7604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.discard = discard 7614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.comment = comment 7624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.comment_url = comment_url 7634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.rfc2109 = rfc2109 7644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._rest = copy.copy(rest) 7664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def has_nonstandard_attr(self, name): 7684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return name in self._rest 7694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def get_nonstandard_attr(self, name, default=None): 7704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self._rest.get(name, default) 7714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_nonstandard_attr(self, name, value): 7724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._rest[name] = value 7734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def is_expired(self, now=None): 7754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if now is None: now = time.time() 7764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (self.expires is not None) and (self.expires <= now): 7774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 7784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 7794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __str__(self): 7814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.port is None: p = "" 7824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: p = ":"+self.port 7834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao limit = self.domain + p + self.path 7844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.value is not None: 7854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao namevalue = "%s=%s" % (self.name, self.value) 7864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 7874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao namevalue = self.name 7884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "<Cookie %s for %s>" % (namevalue, limit) 7894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 7904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __repr__(self): 7914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao args = [] 7924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for name in ("version", "name", "value", 7934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "port", "port_specified", 7944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "domain", "domain_specified", "domain_initial_dot", 7954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "path", "path_specified", 7964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "secure", "expires", "discard", "comment", "comment_url", 7974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ): 7984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attr = getattr(self, name) 7994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao args.append("%s=%s" % (name, repr(attr))) 8004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao args.append("rest=%s" % repr(self._rest)) 8014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao args.append("rfc2109=%s" % repr(self.rfc2109)) 8024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "Cookie(%s)" % ", ".join(args) 8034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass CookiePolicy: 8064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Defines which cookies get accepted from and returned to server. 8074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao May also modify cookies, though this is probably a bad idea. 8094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The subclass DefaultCookiePolicy defines the standard rules for Netscape 8114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao and RFC 2965 cookies -- override that if you want a customised policy. 8124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 8144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok(self, cookie, request): 8154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return true if (and only if) cookie should be accepted from server. 8164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Currently, pre-expired cookies never get this far -- the CookieJar 8184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao class deletes such cookies itself. 8194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 8214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise NotImplementedError() 8224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok(self, cookie, request): 8244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return true if (and only if) cookie should be returned to server.""" 8254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise NotImplementedError() 8264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def domain_return_ok(self, domain, request): 8284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return false if cookies should not be returned, given cookie domain. 8294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 8304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 8314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def path_return_ok(self, path, request): 8334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return false if cookies should not be returned, given cookie path. 8344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 8354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 8364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass DefaultCookiePolicy(CookiePolicy): 8394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Implements the standard rules for accepting and returning cookies.""" 8404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DomainStrictNoDots = 1 8424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DomainStrictNonDomain = 2 8434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DomainRFC2965Match = 4 8444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DomainLiberal = 0 8464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao DomainStrict = DomainStrictNoDots|DomainStrictNonDomain 8474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, 8494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao blocked_domains=None, allowed_domains=None, 8504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao netscape=True, rfc2965=False, 8514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2109_as_netscape=None, 8524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao hide_cookie2=False, 8534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_domain=False, 8544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_rfc2965_unverifiable=True, 8554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_ns_unverifiable=False, 8564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_ns_domain=DomainLiberal, 8574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_ns_set_initial_dollar=False, 8584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_ns_set_path=False, 8594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ): 8604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Constructor arguments should be passed as keyword arguments only.""" 8614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.netscape = netscape 8624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.rfc2965 = rfc2965 8634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.rfc2109_as_netscape = rfc2109_as_netscape 8644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.hide_cookie2 = hide_cookie2 8654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_domain = strict_domain 8664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable 8674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_ns_unverifiable = strict_ns_unverifiable 8684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_ns_domain = strict_ns_domain 8694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar 8704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.strict_ns_set_path = strict_ns_set_path 8714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if blocked_domains is not None: 8734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._blocked_domains = tuple(blocked_domains) 8744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 8754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._blocked_domains = () 8764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if allowed_domains is not None: 8784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao allowed_domains = tuple(allowed_domains) 8794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._allowed_domains = allowed_domains 8804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def blocked_domains(self): 8824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return the sequence of blocked domains (as a tuple).""" 8834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self._blocked_domains 8844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_blocked_domains(self, blocked_domains): 8854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Set the sequence of blocked domains.""" 8864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._blocked_domains = tuple(blocked_domains) 8874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def is_blocked(self, domain): 8894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for blocked_domain in self._blocked_domains: 8904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_domain_match(domain, blocked_domain): 8914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 8924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 8934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 8944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def allowed_domains(self): 8954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return None, or the sequence of allowed domains (as a tuple).""" 8964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self._allowed_domains 8974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_allowed_domains(self, allowed_domains): 8984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Set the sequence of allowed domains, or None.""" 8994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if allowed_domains is not None: 9004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao allowed_domains = tuple(allowed_domains) 9014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._allowed_domains = allowed_domains 9024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def is_not_allowed(self, domain): 9044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self._allowed_domains is None: 9054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for allowed_domain in self._allowed_domains: 9074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if user_domain_match(domain, allowed_domain): 9084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok(self, cookie, request): 9124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 9134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If you override .set_ok(), be sure to call this method. If it returns 9144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao false, so should your subclass (assuming your subclass wants to be more 9154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict about which cookies to accept). 9164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 9184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 9194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao assert cookie.name is not None 9214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for n in "version", "verifiability", "name", "path", "domain", "port": 9234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fn_name = "set_ok_"+n 9244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fn = getattr(self, fn_name) 9254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not fn(cookie, request): 9264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_version(self, cookie, request): 9314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version is None: 9324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Version is always set to 0 by parse_ns_headers if it's a Netscape 9334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # cookie, so this must be an invalid RFC 2965 cookie. 9344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" Set-Cookie2 without version attribute (%s=%s)", 9354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie.name, cookie.value) 9364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version > 0 and not self.rfc2965: 9384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" RFC 2965 cookies are switched off") 9394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif cookie.version == 0 and not self.netscape: 9414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" Netscape cookies are switched off") 9424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_verifiability(self, cookie, request): 9464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if request.is_unverifiable() and is_third_party(request): 9474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version > 0 and self.strict_rfc2965_unverifiable: 9484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" third-party RFC 2965 cookie during " 9494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "unverifiable transaction") 9504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif cookie.version == 0 and self.strict_ns_unverifiable: 9524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" third-party Netscape cookie during " 9534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "unverifiable transaction") 9544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_name(self, cookie, request): 9584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Try and stop servers setting V0 cookies designed to hack other 9594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # servers that know both V0 and V1 protocols. 9604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (cookie.version == 0 and self.strict_ns_set_initial_dollar and 9614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie.name.startswith("$")): 9624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" illegal name (starts with '$'): '%s'", cookie.name) 9634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_path(self, cookie, request): 9674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.path_specified: 9684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_path = request_path(request) 9694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ((cookie.version > 0 or 9704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (cookie.version == 0 and self.strict_ns_set_path)) and 9714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao not req_path.startswith(cookie.path)): 9724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" path attribute %s is not a prefix of request " 9734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "path %s", cookie.path, req_path) 9744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 9764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 9774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_domain(self, cookie, request): 9784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.is_blocked(cookie.domain): 9794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" domain %s is in user block-list", cookie.domain) 9804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.is_not_allowed(cookie.domain): 9824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" domain %s is not in user allow-list", cookie.domain) 9834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 9844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.domain_specified: 9854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host, erhn = eff_request_host(request) 9864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = cookie.domain 9874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.strict_domain and (domain.count(".") >= 2): 9884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX This should probably be compared with the Konqueror 9894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (kcookiejar.cpp) and Mozilla implementations, but it's a 9904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # losing battle. 9914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = domain.rfind(".") 9924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao j = domain.rfind(".", 0, i) 9934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if j == 0: # domain like .foo.bar 9944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao tld = domain[i+1:] 9954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao sld = domain[j+1:i] 9964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sld.lower() in ("co", "ac", "com", "edu", "org", "net", 9974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "gov", "mil", "int", "aero", "biz", "cat", "coop", 9984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "info", "jobs", "mobi", "museum", "name", "pro", 9994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "travel", "eu") and len(tld) == 2: 10004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # domain like .co.uk 10014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" country-code second level domain %s", domain) 10024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if domain.startswith("."): 10044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao undotted_domain = domain[1:] 10054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 10064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao undotted_domain = domain 10074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao embedded_dots = (undotted_domain.find(".") >= 0) 10084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not embedded_dots and domain != ".local": 10094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" non-local domain %s contains no embedded dot", 10104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain) 10114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version == 0: 10134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (not erhn.endswith(domain) and 10144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (not erhn.startswith(".") and 10154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao not ("."+erhn).endswith(domain))): 10164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" effective request-host %s (even with added " 10174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "initial dot) does not end with %s", 10184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao erhn, domain) 10194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (cookie.version > 0 or 10214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (self.strict_ns_domain & self.DomainRFC2965Match)): 10224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not domain_match(erhn, domain): 10234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" effective request-host %s does not domain-match " 10244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "%s", erhn, domain) 10254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (cookie.version > 0 or 10274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (self.strict_ns_domain & self.DomainStrictNoDots)): 10284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host_prefix = req_host[:-len(domain)] 10294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (host_prefix.find(".") >= 0 and 10304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao not IPV4_RE.search(req_host)): 10314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" host prefix %s for domain %s contains a dot", 10324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao host_prefix, domain) 10334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 10354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_ok_port(self, cookie, request): 10374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.port_specified: 10384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port = request_port(request) 10394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if req_port is None: 10404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port = "80" 10414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 10424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port = str(req_port) 10434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for p in cookie.port.split(","): 10444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 10454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao int(p) 10464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 10474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" bad port %s (not numeric)", p) 10484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if p == req_port: 10504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 10514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 10524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" request port (%s) not found in %s", 10534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port, cookie.port) 10544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 10564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok(self, cookie, request): 10584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 10594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao If you override .return_ok(), be sure to call this method. If it 10604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao returns false, so should your subclass (assuming your subclass wants to 10614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao be more strict about which cookies to return). 10624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 10644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Path has already been checked by .path_return_ok(), and domain 10654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # blocking done by .domain_return_ok(). 10664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 10674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for n in "version", "verifiability", "secure", "expires", "port", "domain": 10694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fn_name = "return_ok_"+n 10704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao fn = getattr(self, fn_name) 10714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not fn(cookie, request): 10724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 10744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_version(self, cookie, request): 10764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version > 0 and not self.rfc2965: 10774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" RFC 2965 cookies are switched off") 10784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif cookie.version == 0 and not self.netscape: 10804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" Netscape cookies are switched off") 10814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 10834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_verifiability(self, cookie, request): 10854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if request.is_unverifiable() and is_third_party(request): 10864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version > 0 and self.strict_rfc2965_unverifiable: 10874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" third-party RFC 2965 cookie during unverifiable " 10884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "transaction") 10894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif cookie.version == 0 and self.strict_ns_unverifiable: 10914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" third-party Netscape cookie during unverifiable " 10924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "transaction") 10934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 10944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 10954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 10964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_secure(self, cookie, request): 10974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.secure and request.get_type() != "https": 10984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" secure cookie with non-secure request") 10994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_expires(self, cookie, request): 11034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.is_expired(self._now): 11044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" cookie expired") 11054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_port(self, cookie, request): 11094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.port: 11104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port = request_port(request) 11114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if req_port is None: 11124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port = "80" 11134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for p in cookie.port.split(","): 11144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if p == req_port: 11154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 11164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 11174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" request port %s does not match cookie port %s", 11184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_port, cookie.port) 11194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def return_ok_domain(self, cookie, request): 11234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host, erhn = eff_request_host(request) 11244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = cookie.domain 11254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't 11274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (cookie.version == 0 and 11284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (self.strict_ns_domain & self.DomainStrictNonDomain) and 11294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao not cookie.domain_specified and domain != erhn): 11304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" cookie with unspecified domain does not string-compare " 11314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "equal to request domain") 11324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version > 0 and not domain_match(erhn, domain): 11354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" effective request-host name %s does not domain-match " 11364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "RFC 2965 cookie domain %s", erhn, domain) 11374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version == 0 and not ("."+erhn).endswith(domain): 11394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" request-host %s does not match Netscape cookie domain " 11404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "%s", req_host, domain) 11414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def domain_return_ok(self, domain, request): 11454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Liberal check of. This is here as an optimization to avoid 11464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # having to load lots of MSIE cookie files unless necessary. 11474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host, erhn = eff_request_host(request) 11484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not req_host.startswith("."): 11494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host = "."+req_host 11504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not erhn.startswith("."): 11514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao erhn = "."+erhn 11524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not (req_host.endswith(domain) or erhn.endswith(domain)): 11534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao #_debug(" request domain %s does not match cookie domain %s", 11544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # req_host, domain) 11554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.is_blocked(domain): 11584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" domain %s is in user block-list", domain) 11594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.is_not_allowed(domain): 11614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" domain %s is not in user allow-list", domain) 11624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def path_return_ok(self, path, request): 11674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("- checking cookie path=%s", path) 11684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_path = request_path(request) 11694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not req_path.startswith(path): 11704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" %s does not path-match %s", req_path, path) 11714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return False 11724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return True 11734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef vals_sorted_by_key(adict): 11764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao keys = adict.keys() 11774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao keys.sort() 11784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return map(adict.get, keys) 11794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef deepvalues(mapping): 11814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Iterates over nested mapping, depth-first, in sorted order by key.""" 11824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao values = vals_sorted_by_key(mapping) 11834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for obj in values: 11844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mapping = False 11854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 11864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao obj.items 11874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except AttributeError: 11884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 11894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 11904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao mapping = True 11914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for subobj in deepvalues(obj): 11924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao yield subobj 11934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not mapping: 11944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao yield obj 11954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 11974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Used as second parameter to dict.get() method, to distinguish absent 11984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# dict key from one with a None value. 11994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass Absent: pass 12004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass CookieJar: 12024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Collection of HTTP cookies. 12034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao You may not need to know about this class: try 12054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao urllib2.build_opener(HTTPCookieProcessor).open(url). 12064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 12084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao non_word_re = re.compile(r"\W") 12104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao quote_re = re.compile(r"([\"\\])") 12114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao strict_domain_re = re.compile(r"\.?[^.]*") 12124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain_re = re.compile(r"[^.]*") 12134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao dots_re = re.compile(r"^\.+") 12144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" 12164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, policy=None): 12184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if policy is None: 12194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao policy = DefaultCookiePolicy() 12204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._policy = policy 12214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock = _threading.RLock() 12234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies = {} 12244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_policy(self, policy): 12264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._policy = policy 12274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _cookies_for_domain(self, domain, request): 12294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = [] 12304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not self._policy.domain_return_ok(domain, request): 12314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return [] 12324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("Checking %s for cookies to return", domain) 12334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies_by_path = self._cookies[domain] 12344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for path in cookies_by_path.keys(): 12354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not self._policy.path_return_ok(path, request): 12364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 12374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies_by_name = cookies_by_path[path] 12384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in cookies_by_name.values(): 12394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not self._policy.return_ok(cookie, request): 12404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" not returning cookie") 12414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 12424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" it's a match") 12434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies.append(cookie) 12444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return cookies 12454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _cookies_for_request(self, request): 12474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a list of cookies to be returned to server.""" 12484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = [] 12494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for domain in self._cookies.keys(): 12504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies.extend(self._cookies_for_domain(domain, request)) 12514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return cookies 12524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _cookie_attrs(self, cookies): 12544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return a list of cookie-attributes to be returned to server. 12554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao like ['foo="bar"; $Path="/"', ...] 12574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The $Version attribute is also added when appropriate (currently only 12594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao once per request). 12604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 12624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # add cookies in order of most specific (ie. longest) path first 12634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies.sort(key=lambda arg: len(arg.path), reverse=True) 12644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version_set = False 12664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs = [] 12684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in cookies: 12694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set version of Cookie header 12704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX 12714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # What should it be if multiple matching Set-Cookie headers have 12724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # different versions themselves? 12734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Answer: there is no answer; was supposed to be settled by 12744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # RFC 2965 errata, but that may never appear... 12754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version = cookie.version 12764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not version_set: 12774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version_set = True 12784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if version > 0: 12794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append("$Version=%s" % version) 12804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # quote cookie value if necessary 12824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (not for Netscape protocol, which already has any quotes 12834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # intact, due to the poorly-specified Netscape Cookie: syntax) 12844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ((cookie.value is not None) and 12854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.non_word_re.search(cookie.value) and version > 0): 12864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = self.quote_re.sub(r"\\\1", cookie.value) 12874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 12884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value = cookie.value 12894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 12904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # add cookie-attributes to be returned in Cookie header 12914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.value is None: 12924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append(cookie.name) 12934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 12944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append("%s=%s" % (cookie.name, value)) 12954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if version > 0: 12964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.path_specified: 12974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append('$Path="%s"' % cookie.path) 12984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.domain.startswith("."): 12994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = cookie.domain 13004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (not cookie.domain_initial_dot and 13014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain.startswith(".")): 13024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = domain[1:] 13034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append('$Domain="%s"' % domain) 13044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.port is not None: 13054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao p = "$Port" 13064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.port_specified: 13074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao p = p + ('="%s"' % cookie.port) 13084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs.append(p) 13094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return attrs 13114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def add_cookie_header(self, request): 13134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Add correct Cookie: header to request (urllib2.Request object). 13144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao The Cookie2 header is also added unless policy.hide_cookie2 is true. 13164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("add_cookie_header") 13194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 13204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 13214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._policy._now = self._now = int(time.time()) 13234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = self._cookies_for_request(request) 13254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs = self._cookie_attrs(cookies) 13274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if attrs: 13284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if not request.has_header("Cookie"): 13294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao request.add_unredirected_header( 13304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "Cookie", "; ".join(attrs)) 13314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # if necessary, advertise that we know RFC 2965 13334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (self._policy.rfc2965 and not self._policy.hide_cookie2 and 13344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao not request.has_header("Cookie2")): 13354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in cookies: 13364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version != 1: 13374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao request.add_unredirected_header("Cookie2", '$Version="1"') 13384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 13394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 13414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 13424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.clear_expired_cookies() 13444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _normalized_cookie_tuples(self, attrs_set): 13464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return list of tuples containing normalised cookie information. 13474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao attrs_set is the list of lists of key,value pairs extracted from 13494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the Set-Cookie or Set-Cookie2 headers. 13504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Tuples are name, value, standard, rest, where name and value are the 13524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie name and value, standard is a dictionary containing the standard 13534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie-attributes (discard, secure, version, expires or max-age, 13544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain, path and port) and rest is a dictionary containing the rest of 13554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the cookie-attributes. 13564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 13584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie_tuples = [] 13594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao boolean_attrs = "discard", "secure" 13614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao value_attrs = ("version", 13624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "expires", "max-age", 13634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "domain", "path", "port", 13644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "comment", "commenturl") 13654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie_attrs in attrs_set: 13674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name, value = cookie_attrs[0] 13684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Build dictionary of standard cookie-attributes (standard) and 13704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # dictionary of other cookie-attributes (rest). 13714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Note: expiry time is normalised to seconds since epoch. V0 13734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # cookies should have the Expires cookie-attribute, and V1 cookies 13744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # should have Max-Age, but since V1 includes RFC 2109 cookies (and 13754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we 13764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # accept either (but prefer Max-Age). 13774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao max_age_set = False 13784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bad_cookie = False 13804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 13814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao standard = {} 13824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rest = {} 13834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for k, v in cookie_attrs[1:]: 13844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao lc = k.lower() 13854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # don't lose case distinction for unknown fields 13864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if lc in value_attrs or lc in boolean_attrs: 13874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = lc 13884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k in boolean_attrs and v is None: 13894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # boolean cookie-attribute is present, but has no value 13904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (like "discard", rather than "port=80") 13914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = True 13924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k in standard: 13934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # only first value is significant 13944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 13954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k == "domain": 13964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if v is None: 13974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" missing value for domain attribute") 13984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bad_cookie = True 13994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 14004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # RFC 2965 section 3.3.3 14014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = v.lower() 14024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k == "expires": 14034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if max_age_set: 14044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Prefer max-age to expires (like Mozilla) 14054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 14064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if v is None: 14074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" missing or invalid value for expires " 14084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "attribute: treating as session cookie") 14094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 14104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if k == "max-age": 14114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao max_age_set = True 14124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 14134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = int(v) 14144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 14154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" missing or invalid (non-numeric) value for " 14164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "max-age attribute") 14174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bad_cookie = True 14184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 14194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # convert RFC 2965 Max-Age to seconds since epoch 14204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # XXX Strictly you're supposed to follow RFC 2616 14214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # age-calculation rules. Remember that zero Max-Age is a 14224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # is a request to discard (old and new) cookie, though. 14234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k = "expires" 14244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao v = self._now + v 14254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (k in value_attrs) or (k in boolean_attrs): 14264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (v is None and 14274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao k not in ("port", "comment", "commenturl")): 14284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" missing value for %s attribute" % k) 14294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao bad_cookie = True 14304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao break 14314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao standard[k] = v 14324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 14334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rest[k] = v 14344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if bad_cookie: 14364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao continue 14374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie_tuples.append((name, value, standard, rest)) 14394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return cookie_tuples 14414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _cookie_from_cookie_tuple(self, tup, request): 14434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # standard is dict of standard cookie-attributes, rest is dict of the 14444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # rest of them 14454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name, value, standard, rest = tup 14464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = standard.get("domain", Absent) 14484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = standard.get("path", Absent) 14494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = standard.get("port", Absent) 14504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao expires = standard.get("expires", Absent) 14514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set the easy defaults 14534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version = standard.get("version", None) 14544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if version is not None: 14554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 14564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao version = int(version) 14574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except ValueError: 14584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None # invalid version, ignore cookie 14594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao secure = standard.get("secure", False) 14604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (discard is also set if expires is Absent) 14614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao discard = standard.get("discard", False) 14624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment = standard.get("comment", None) 14634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment_url = standard.get("commenturl", None) 14644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set default path 14664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if path is not Absent and path != "": 14674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path_specified = True 14684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = escape_path(path) 14694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 14704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path_specified = False 14714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = request_path(request) 14724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = path.rfind("/") 14734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if i != -1: 14744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if version == 0: 14754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Netscape spec parts company from reality here 14764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = path[:i] 14774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 14784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path = path[:i+1] 14794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if len(path) == 0: path = "/" 14804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set default domain 14824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain_specified = domain is not Absent 14834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # but first we have to remember whether it starts with a dot 14844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain_initial_dot = False 14854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if domain_specified: 14864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain_initial_dot = bool(domain.startswith(".")) 14874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if domain is Absent: 14884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao req_host, erhn = eff_request_host(request) 14894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = erhn 14904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif not domain.startswith("."): 14914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain = "."+domain 14924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 14934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set default port 14944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port_specified = False 14954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if port is not Absent: 14964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if port is None: 14974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Port attr present, but has no value: default to request port. 14984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Cookie should then only be sent back on that port. 14994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = request_port(request) 15004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 15014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port_specified = True 15024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = re.sub(r"\s+", "", port) 15034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 15044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # No port attr present. Cookie can be sent back on any port. 15054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port = None 15064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # set default expires and discard 15084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if expires is Absent: 15094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao expires = None 15104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao discard = True 15114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif expires <= self._now: 15124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Expiry date in past is request to delete cookie. This can't be 15134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # in DefaultCookiePolicy, because can't delete cookies there. 15144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.clear(domain, path, name) 15164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except KeyError: 15174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 15184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("Expiring cookie, domain='%s', path='%s', name='%s'", 15194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain, path, name) 15204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return None 15214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return Cookie(version, 15234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name, value, 15244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao port, port_specified, 15254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao domain, domain_specified, domain_initial_dot, 15264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path, path_specified, 15274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao secure, 15284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao expires, 15294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao discard, 15304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment, 15314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao comment_url, 15324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rest) 15334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _cookies_from_attrs_set(self, attrs_set, request): 15354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie_tuples = self._normalized_cookie_tuples(attrs_set) 15364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = [] 15384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for tup in cookie_tuples: 15394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie = self._cookie_from_cookie_tuple(tup, request) 15404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie: cookies.append(cookie) 15414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return cookies 15424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _process_rfc2109_cookies(self, cookies): 15444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) 15454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if rfc2109_as_ns is None: 15464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2109_as_ns = not self._policy.rfc2965 15474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in cookies: 15484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.version == 1: 15494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie.rfc2109 = True 15504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if rfc2109_as_ns: 15514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # treat 2109 cookies as Netscape cookies rather than 15524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # as RFC2965 cookies 15534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookie.version = 0 15544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def make_cookies(self, response, request): 15564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return sequence of Cookie objects extracted from response object.""" 15574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # get cookie-attributes for RFC 2965 and Netscape protocols 15584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao headers = response.info() 15594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2965_hdrs = headers.getheaders("Set-Cookie2") 15604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ns_hdrs = headers.getheaders("Set-Cookie") 15614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao rfc2965 = self._policy.rfc2965 15634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao netscape = self._policy.netscape 15644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ((not rfc2965_hdrs and not ns_hdrs) or 15664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (not ns_hdrs and not rfc2965) or 15674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (not rfc2965_hdrs and not netscape) or 15684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (not netscape and not rfc2965)): 15694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return [] # no relevant cookie headers: quick exit 15704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = self._cookies_from_attrs_set( 15734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao split_header_words(rfc2965_hdrs), request) 15744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except Exception: 15754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _warn_unhandled_exception() 15764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies = [] 15774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ns_hdrs and netscape: 15794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 15804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # RFC 2109 and Netscape cookies 15814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ns_cookies = self._cookies_from_attrs_set( 15824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao parse_ns_headers(ns_hdrs), request) 15834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except Exception: 15844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _warn_unhandled_exception() 15854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ns_cookies = [] 15864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._process_rfc2109_cookies(ns_cookies) 15874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # Look for Netscape cookies (from Set-Cookie headers) that match 15894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # corresponding RFC 2965 cookies (from Set-Cookie2 headers). 15904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # For each match, keep the RFC 2965 cookie and ignore the Netscape 15914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are 15924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # bundled in with the Netscape cookies for this purpose, which is 15934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # reasonable behaviour. 15944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if rfc2965: 15954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao lookup = {} 15964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in cookies: 15974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao lookup[(cookie.domain, cookie.path, cookie.name)] = None 15984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 15994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def no_matching_rfc2965(ns_cookie, lookup=lookup): 16004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao key = ns_cookie.domain, ns_cookie.path, ns_cookie.name 16014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return key not in lookup 16024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ns_cookies = filter(no_matching_rfc2965, ns_cookies) 16034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if ns_cookies: 16054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao cookies.extend(ns_cookies) 16064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return cookies 16084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_cookie_if_ok(self, cookie, request): 16104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Set a cookie if policy says it's OK to do so.""" 16114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 16124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 16134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._policy._now = self._now = int(time.time()) 16144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self._policy.set_ok(cookie, request): 16164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.set_cookie(cookie) 16174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 16204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 16214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def set_cookie(self, cookie): 16234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Set a cookie, without checking whether or not it should be set.""" 16244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao c = self._cookies 16254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 16264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 16274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.domain not in c: c[cookie.domain] = {} 16284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao c2 = c[cookie.domain] 16294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.path not in c2: c2[cookie.path] = {} 16304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao c3 = c2[cookie.path] 16314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao c3[cookie.name] = cookie 16324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 16334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 16344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def extract_cookies(self, response, request): 16364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Extract cookies from response, where allowable given the request.""" 16374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug("extract_cookies: %s", response.info()) 16384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 16394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 16404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._policy._now = self._now = int(time.time()) 16414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self.make_cookies(response, request): 16434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self._policy.set_ok(cookie, request): 16444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao _debug(" setting cookie: %s", cookie) 16454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.set_cookie(cookie) 16464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 16474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 16484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def clear(self, domain=None, path=None, name=None): 16504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Clear some cookies. 16514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Invoking this method without arguments will clear all cookies. If 16534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao given a single argument, only cookies belonging to that domain will be 16544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao removed. If given two arguments, cookies belonging to the specified 16554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao path within that domain are removed. If given three arguments, then 16564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao the cookie with the specified name, path and domain is removed. 16574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Raises KeyError if no matching cookie exists. 16594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 16614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if name is not None: 16624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if (domain is None) or (path is None): 16634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ValueError( 16644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "domain and path must be given to remove a cookie by name") 16654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self._cookies[domain][path][name] 16664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif path is not None: 16674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if domain is None: 16684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ValueError( 16694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao "domain must be given to remove cookies by path") 16704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self._cookies[domain][path] 16714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif domain is not None: 16724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self._cookies[domain] 16734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 16744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies = {} 16754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def clear_session_cookies(self): 16774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Discard all session cookies. 16784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Note that the .save() method won't save session cookies anyway, unless 16804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao you ask otherwise by passing a true ignore_discard argument. 16814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 16834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 16844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 16854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self: 16864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.discard: 16874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.clear(cookie.domain, cookie.path, cookie.name) 16884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 16894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 16904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def clear_expired_cookies(self): 16924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Discard all expired cookies. 16934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 16944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao You probably don't need to call this method: expired cookies are never 16954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao sent back to the server (provided you're using DefaultCookiePolicy), 16964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao this method is called by CookieJar itself every so often, and the 16974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao .save() method won't save expired cookies anyway (unless you ask 16984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao otherwise by passing a true ignore_expires argument). 16994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 17014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 17024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 17034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao now = time.time() 17044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self: 17054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if cookie.is_expired(now): 17064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.clear(cookie.domain, cookie.path, cookie.name) 17074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 17084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 17094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __iter__(self): 17114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return deepvalues(self._cookies) 17124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __len__(self): 17144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Return number of contained cookies.""" 17154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao i = 0 17164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self: i = i + 1 17174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return i 17184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __repr__(self): 17204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r = [] 17214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self: r.append(repr(cookie)) 17224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "<%s[%s]>" % (self.__class__, ", ".join(r)) 17234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __str__(self): 17254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao r = [] 17264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao for cookie in self: r.append(str(cookie)) 17274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return "<%s[%s]>" % (self.__class__, ", ".join(r)) 17284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# derives from IOError for backwards-compatibility with Python 2.4.0 17314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass LoadError(IOError): pass 17324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass FileCookieJar(CookieJar): 17344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """CookieJar that can be loaded from and saved to a file.""" 17354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, filename=None, delayload=False, policy=None): 17374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 17384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Cookies are NOT loaded from the named file until either the .load() or 17394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao .revert() method is called. 17404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 17424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao CookieJar.__init__(self, policy) 17434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if filename is not None: 17444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 17454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao filename+"" 17464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except: 17474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise ValueError("filename must be string-like") 17484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.filename = filename 17494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.delayload = bool(delayload) 17504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def save(self, filename=None, ignore_discard=False, ignore_expires=False): 17524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Save cookies to a file.""" 17534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise NotImplementedError() 17544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def load(self, filename=None, ignore_discard=False, ignore_expires=False): 17564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Load cookies from a file.""" 17574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if filename is None: 17584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.filename is not None: filename = self.filename 17594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: raise ValueError(MISSING_FILENAME_TEXT) 17604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao f = open(filename) 17624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 17634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._really_load(f, filename, ignore_discard, ignore_expires) 17644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 17654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao f.close() 17664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def revert(self, filename=None, 17684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ignore_discard=False, ignore_expires=False): 17694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """Clear all cookies and reload cookies from a saved file. 17704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao Raises LoadError (or IOError) if reversion is not successful; the 17724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao object's state will not be altered if this happens. 17734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao """ 17754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if filename is None: 17764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.filename is not None: filename = self.filename 17774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: raise ValueError(MISSING_FILENAME_TEXT) 17784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.acquire() 17804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 17814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao old_state = copy.deepcopy(self._cookies) 17834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies = {} 17844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 17854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.load(filename, ignore_discard, ignore_expires) 17864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except (LoadError, IOError): 17874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies = old_state 17884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise 17894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao finally: 17914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self._cookies_lock.release() 17924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 17934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom _LWPCookieJar import LWPCookieJar, lwp_cookie_str 17944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom _MozillaCookieJar import MozillaCookieJar 1795