14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaor"""HTTP cookie handling for web clients.
24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
34adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoThis module has (now fairly distant) origins in Gisle Aas' Perl module
44adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHTTP::Cookies, from the libwww-perl library.
54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
64adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDocstrings, comments and debug strings in this code refer to the
74adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoattributes of the HTTP cookie system as cookie-attributes, to distinguish
84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaothem clearly from Python attributes.
94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
104adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoClass diagram (note that BSDDBCookieJar and the MSIE* classes are not
114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodistributed with the Python standard library, but are available from
124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaohttp://wwwsearch.sf.net/):
134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        CookieJar____
154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        /     \      \
164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            FileCookieJar      \      \
174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao             /    |   \         \      \
184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao MozillaCookieJar | LWPCookieJar \      \
194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  |               |      \
204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  |   ---MSIEBase |       \
214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  |  /      |     |        \
224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  | /   MSIEDBCookieJar BSDDBCookieJar
234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                  |/
244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao               MSIECookieJar
254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""
274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao           'MozillaCookieJar']
314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport re, urlparse, copy, time, urllib
334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotry:
344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    import threading as _threading
354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoexcept ImportError:
364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    import dummy_threading as _threading
374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport httplib  # only for the default HTTP port
384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom calendar import timegm
394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodebug = False   # set to True to enable debugging via the logging module
414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaologger = None
424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _debug(*args):
444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not debug:
454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global logger
474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not logger:
484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        import logging
494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        logger = logging.getLogger("cookielib")
504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return logger.debug(*args)
514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
534adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
544adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                         "instance initialised with one)")
564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _warn_unhandled_exception():
584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # There are a few catch-all except: statements in this module, for
594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # catching input that's bad in unexpected ways.  Warn if any
604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # exceptions are caught there.
614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    import warnings, traceback, StringIO
624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    f = StringIO.StringIO()
634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    traceback.print_exc(None, f)
644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    msg = f.getvalue()
654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Date/time conversion
694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# -----------------------------------------------------------------------------
704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
714adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoEPOCH_YEAR = 1970
724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _timegm(tt):
734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    year, month, mday, hour, min, sec = tt[:6]
744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return timegm(tt)
774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return None
794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
804adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
814adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
834adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoMONTHS_LOWER = []
844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofor month in MONTHS: MONTHS_LOWER.append(month.lower())
854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef time2isoz(t=None):
874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return a string representing time in seconds since epoch, t.
884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If the function is called without an argument, it will use the current
904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    time.
914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    representing Universal Time (UTC, aka GMT).  An example of this format is:
944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    1994-11-24 08:49:37Z
964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if t is None: t = time.time()
994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        year, mon, mday, hour, min, sec)
1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef time2netscape(t=None):
1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return a string representing time in seconds since epoch, t.
1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If the function is called without an argument, it will use the current
1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    time.
1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The format of the returned string is like this:
1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Wed, DD-Mon-YYYY HH:MM:SS GMT
1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if t is None: t = time.time()
1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1204adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1224adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef offset_from_tz_string(tz):
1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    offset = None
1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if tz in UTC_ZONES:
1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        offset = 0
1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        m = TIMEZONE_RE.search(tz)
1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if m:
1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            offset = 3600 * int(m.group(2))
1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if m.group(3):
1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                offset = offset + 60 * int(m.group(3))
1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if m.group(1) == '-':
1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                offset = -offset
1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return offset
1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _str2time(day, mon, yr, hr, min, sec, tz):
1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # translate month name to number
1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # month numbers start with 1 (January)
1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    try:
1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        mon = MONTHS_LOWER.index(mon.lower())+1
1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except ValueError:
1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # maybe it's already a number
1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            imon = int(mon)
1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ValueError:
1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None
1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if 1 <= imon <= 12:
1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            mon = imon
1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
1514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None
1524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # make sure clock elements are defined
1544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if hr is None: hr = 0
1554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if min is None: min = 0
1564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if sec is None: sec = 0
1574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    yr = int(yr)
1594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    day = int(day)
1604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    hr = int(hr)
1614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    min = int(min)
1624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    sec = int(sec)
1634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if yr < 1000:
1654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # find "obvious" year
1664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cur_yr = time.localtime(time.time())[0]
1674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        m = cur_yr % 100
1684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        tmp = yr
1694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        yr = yr + cur_yr - m
1704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        m = m - tmp
1714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if abs(m) > 50:
1724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if m > 0: yr = yr + 100
1734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else: yr = yr - 100
1744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
1764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    t = _timegm((yr, mon, day, hr, min, sec, tz))
1774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if t is not None:
1794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # adjust time using timezone string, to get absolute time since epoch
1804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if tz is None:
1814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            tz = "UTC"
1824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        tz = tz.upper()
1834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        offset = offset_from_tz_string(tz)
1844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if offset is None:
1854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None
1864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        t = t - offset
1874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return t
1894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1904adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoSTRICT_DATE_RE = re.compile(
1914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
1924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
1934adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoWEEKDAY_RE = re.compile(
1944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
1954adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoLOOSE_HTTP_DATE_RE = re.compile(
1964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    r"""^
1974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\d\d?)            # day
1984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       (?:\s+|[-\/])
1994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\w+)              # month
2004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        (?:\s+|[-\/])
2014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\d+)              # year
2024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (?:
2034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          (?:\s+|:)    # separator before clock
2044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       (\d\d?):(\d\d)  # hour:min
2054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       (?::(\d\d))?    # optional seconds
2064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    )?                 # optional clock
2074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       \s*
2084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
2094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       \s*
2104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (?:\(\w+\))?       # ASCII representation of timezone in parens.
2114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       \s*$""", re.X)
2124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef http2time(text):
2134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Returns time in seconds since epoch of time represented by a string.
2144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Return value is an integer.
2164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    None is returned if the format of str is unrecognized, the time is outside
2184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    the representable range, or the timezone string is not recognized.  If the
2194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    string contains no timezone, UTC is assumed.
2204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The timezone in the string may be numerical (like "-0800" or "+0100") or a
2224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
2234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    timezone strings equivalent to UTC (zero offset) are known to the function.
2244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The function loosely parses the following formats:
2264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
2284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
2294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
2304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
2314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
2324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
2334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The parser ignores leading and trailing whitespace.  The time may be
2354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    absent.
2364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If the year is given with only 2 digits, the function will select the
2384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    century that makes the year closest to the current date.
2394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
2414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # fast exit for strictly conforming string
2424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    m = STRICT_DATE_RE.search(text)
2434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if m:
2444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        g = m.groups()
2454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        mon = MONTHS_LOWER.index(g[1].lower()) + 1
2464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        tt = (int(g[2]), mon, int(g[0]),
2474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao              int(g[3]), int(g[4]), float(g[5]))
2484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return _timegm(tt)
2494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # No, we need some messy parsing...
2514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # clean up
2534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    text = text.lstrip()
2544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
2554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # tz is time zone specifier string
2574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    day, mon, yr, hr, min, sec, tz = [None]*7
2584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # loose regexp parse
2604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    m = LOOSE_HTTP_DATE_RE.search(text)
2614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if m is not None:
2624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        day, mon, yr, hr, min, sec, tz = m.groups()
2634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
2644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return None  # bad format
2654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _str2time(day, mon, yr, hr, min, sec, tz)
2674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2684adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoISO_DATE_RE = re.compile(
2694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """^
2704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\d{4})              # year
2714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       [-\/]?
2724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\d\d?)              # numerical month
2734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       [-\/]?
2744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    (\d\d?)              # day
2754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao   (?:
2764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao         (?:\s+|[-:Tt])  # separator before clock
2774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      (\d\d?):?(\d\d)    # hour:min
2784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
2794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao   )?                    # optional clock
2804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      \s*
2814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao   ([-+]?\d\d?:?(:?\d\d)?
2824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
2834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      \s*$""", re.X)
2844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef iso2time(text):
2854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
2864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    As for http2time, but parses the ISO 8601 formats:
2874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    1994-02-03 14:15:29 -0100    -- ISO 8601 format
2894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    1994-02-03 14:15:29          -- zone is optional
2904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    1994-02-03                   -- only date
2914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    1994-02-03T14:15:29          -- Use T as separator
2924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    19940203T141529Z             -- ISO 8601 compact format
2934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    19940203                     -- only date
2944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
2964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # clean up
2974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    text = text.lstrip()
2984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # tz is time zone specifier string
3004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    day, mon, yr, hr, min, sec, tz = [None]*7
3014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # loose regexp parse
3034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    m = ISO_DATE_RE.search(text)
3044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if m is not None:
3054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # XXX there's an extra bit of the timezone I'm ignoring here: is
3064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #   this the right thing to do?
3074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        yr, mon, day, hr, min, sec, tz, _ = m.groups()
3084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
3094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return None  # bad format
3104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return _str2time(day, mon, yr, hr, min, sec, tz)
3124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Header parsing
3154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# -----------------------------------------------------------------------------
3164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef unmatched(match):
3184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return unmatched part of re.Match object."""
3194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    start, end = match.span(0)
3204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return match.string[:start]+match.string[end:]
3214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3224adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)")
3234adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
3244adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)")
3254adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_ESCAPE_RE = re.compile(r"\\(.)")
3264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef split_header_words(header_values):
3274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    r"""Parse header values into a list of lists containing key,value pairs.
3284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The function knows how to deal with ",", ";" and "=" as well as quoted
3304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    values after "=".  A list of space separated tokens are parsed as if they
3314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    were separated by ";".
3324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If the header_values passed as argument contains multiple values, then they
3344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    are treated as if they were a single value separated by comma ",".
3354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    This means that this function is useful for parsing header fields that
3374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
3384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    the requirement for tokens).
3394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      headers           = #header
3414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      header            = (token | parameter) *( [";"] (token | parameter))
3424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      token             = 1*<any CHAR except CTLs or separators>
3444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      separators        = "(" | ")" | "<" | ">" | "@"
3454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        | "," | ";" | ":" | "\" | <">
3464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        | "/" | "[" | "]" | "?" | "="
3474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        | "{" | "}" | SP | HT
3484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
3504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      qdtext            = <any TEXT except <">>
3514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      quoted-pair       = "\" CHAR
3524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      parameter         = attribute "=" value
3544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      attribute         = token
3554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao      value             = token | quoted-string
3564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Each header is represented by a list of key/value pairs.  The value for a
3584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    simple token (not part of a parameter) is None.  Syntactically incorrect
3594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    headers will not necessarily be parsed as you would want.
3604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    This is easier to describe with some examples:
3624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
3644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
3654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> split_header_words(['text/html; charset="iso-8859-1"'])
3664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    [[('text/html', None), ('charset', 'iso-8859-1')]]
3674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
3684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    [[('Basic', None), ('realm', '"foobar"')]]
3694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
3714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    assert not isinstance(header_values, basestring)
3724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    result = []
3734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for text in header_values:
3744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        orig_text = text
3754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        pairs = []
3764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        while text:
3774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            m = HEADER_TOKEN_RE.search(text)
3784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if m:
3794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                text = unmatched(m)
3804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                name = m.group(1)
3814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                m = HEADER_QUOTED_VALUE_RE.search(text)
3824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if m:  # quoted value
3834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    text = unmatched(m)
3844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    value = m.group(1)
3854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    value = HEADER_ESCAPE_RE.sub(r"\1", value)
3864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
3874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    m = HEADER_VALUE_RE.search(text)
3884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if m:  # unquoted value
3894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        text = unmatched(m)
3904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        value = m.group(1)
3914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        value = value.rstrip()
3924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    else:
3934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        # no value, a lone token
3944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        value = None
3954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pairs.append((name, value))
3964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif text.lstrip().startswith(","):
3974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # concatenated headers, as per RFC 2616 section 4.2
3984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                text = text.lstrip()[1:]
3994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if pairs: result.append(pairs)
4004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pairs = []
4014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # skip junk
4034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
4044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                assert nr_junk_chars > 0, (
4054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    "split_header_words bug: '%s', '%s', %s" %
4064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    (orig_text, text, pairs))
4074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                text = non_junk
4084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if pairs: result.append(pairs)
4094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return result
4104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4114adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
4124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef join_header_words(lists):
4134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Do the inverse (almost) of the conversion done by split_header_words.
4144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Takes a list of lists of (key, value) pairs and produces a single header
4164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    value.  Attribute values are quoted if needed.
4174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
4194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    'text/plain; charset="iso-8859/1"'
4204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
4214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    'text/plain, charset="iso-8859/1"'
4224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
4244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    headers = []
4254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for pairs in lists:
4264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        attr = []
4274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for k, v in pairs:
4284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if v is not None:
4294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if not re.search(r"^\w+$", v):
4304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
4314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = '"%s"' % v
4324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                k = "%s=%s" % (k, v)
4334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            attr.append(k)
4344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if attr: headers.append("; ".join(attr))
4354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return ", ".join(headers)
4364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef _strip_quotes(text):
4384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if text.startswith('"'):
4394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        text = text[1:]
4404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if text.endswith('"'):
4414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        text = text[:-1]
4424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return text
4434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef parse_ns_headers(ns_headers):
4454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Ad-hoc parser for Netscape protocol cookie-attributes.
4464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The old Netscape cookie format for Set-Cookie can for instance contain
4484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    an unquoted "," in the expires field, so we have to use this ad-hoc
4494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    parser instead of split_header_words.
4504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    XXX This may not make the best possible effort to parse all the crap
4524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
4534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    parser is probably better, so could do worse than following that if
4544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    this ever gives any trouble.
4554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Currently, this is also used for parsing RFC 2109 cookies.
4574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
4594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    known_attrs = ("expires", "domain", "path", "secure",
4604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
4614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   "version", "port", "max-age")
4624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    result = []
4644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for ns_header in ns_headers:
4654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        pairs = []
4664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        version_set = False
4674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for ii, param in enumerate(re.split(r";\s*", ns_header)):
4684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            param = param.rstrip()
4694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if param == "": continue
4704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if "=" not in param:
4714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                k, v = param, None
4724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
4734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                k, v = re.split(r"\s*=\s*", param, 1)
4744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                k = k.lstrip()
4754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ii != 0:
4764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                lc = k.lower()
4774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if lc in known_attrs:
4784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    k = lc
4794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k == "version":
4804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # This is an RFC 2109 cookie.
4814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = _strip_quotes(v)
4824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    version_set = True
4834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k == "expires":
4844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # convert expires date to seconds since epoch
4854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = http2time(_strip_quotes(v))  # None if invalid
4864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pairs.append((k, v))
4874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if pairs:
4894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not version_set:
4904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pairs.append(("version", "0"))
4914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            result.append(pairs)
4924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return result
4944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
4964adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoIPV4_RE = re.compile(r"\.\d+$")
4974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef is_HDN(text):
4984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return True if text is a host domain name."""
4994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # XXX
5004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # This may well be wrong.  Which RFC is HDN defined in, if any (for
5014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #  the purposes of RFC 2965)?
5024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # For the current implementation, what about IPv6?  Remember to look
5034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #  at other uses of IPV4_RE also, if change this.
5044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if IPV4_RE.search(text):
5054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if text == "":
5074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if text[0] == "." or text[-1] == ".":
5094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return True
5114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef domain_match(A, B):
5134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return True if domain A domain-matches domain B, according to RFC 2965.
5144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    A and B may be host domain names or IP addresses.
5164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    RFC 2965, section 1:
5184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Host names can be specified either as an IP address or a HDN string.
5204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Sometimes we compare one host name with another.  (Such comparisons SHALL
5214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    be case-insensitive.)  Host A's name domain-matches host B's if
5224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao         *  their host name strings string-compare equal; or
5244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao         * A is a HDN string and has the form NB, where N is a non-empty
5264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            name string, B has the form .B', and B' is a HDN string.  (So,
5274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            x.y.com domain-matches .Y.com but not Y.com.)
5284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Note that domain-match is not a commutative operation: a.b.c.com
5304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    domain-matches .c.com, but not the reverse.
5314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
5334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Note that, if A or B are IP addresses, the only relevant part of the
5344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # definition of the domain-match algorithm is the direct string-compare.
5354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    A = A.lower()
5364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    B = B.lower()
5374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if A == B:
5384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
5394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not is_HDN(A):
5404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    i = A.rfind(B)
5424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if i == -1 or i == 0:
5434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # A does not have form NB, or N is the empty string
5444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not B.startswith("."):
5464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not is_HDN(B[1:]):
5484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return True
5504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef liberal_is_HDN(text):
5524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return True if text is a sort-of-like a host domain name.
5534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    For accepting/blocking domains.
5554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
5574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if IPV4_RE.search(text):
5584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return True
5604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef user_domain_match(A, B):
5624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """For blocking/accepting domains.
5634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    A and B may be host domain names or IP addresses.
5654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
5674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    A = A.lower()
5684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    B = B.lower()
5694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
5704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if A == B:
5714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # equal IP addresses
5724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return True
5734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
5744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    initial_dot = B.startswith(".")
5754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if initial_dot and A.endswith(B):
5764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
5774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not initial_dot and A == B:
5784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
5794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return False
5804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaocut_port_re = re.compile(r":\d+$")
5824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_host(request):
5834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return request-host, as defined by RFC 2965.
5844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Variation from RFC: returned value is lowercased, for convenient
5864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    comparison.
5874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
5894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    url = request.get_full_url()
5904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    host = urlparse.urlparse(url)[1]
5914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if host == "":
5924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host = request.get_header("Host", "")
5934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # remove port, if present
5954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    host = cut_port_re.sub("", host, 1)
5964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return host.lower()
5974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
5984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef eff_request_host(request):
5994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return a tuple (request-host, effective request-host name).
6004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    As defined by RFC 2965, except both are lowercased.
6024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
6044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    erhn = req_host = request_host(request)
6054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
6064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        erhn = req_host + ".local"
6074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return req_host, erhn
6084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_path(request):
6104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Path component of request-URI, as defined by RFC 2965."""
6114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    url = request.get_full_url()
6124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    parts = urlparse.urlsplit(url)
6134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    path = escape_path(parts.path)
6144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not path.startswith("/"):
6154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # fix bad RFC 2396 absoluteURI
6164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path = "/" + path
6174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return path
6184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef request_port(request):
6204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    host = request.get_host()
6214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    i = host.find(':')
6224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if i >= 0:
6234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        port = host[i+1:]
6244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
6254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            int(port)
6264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except ValueError:
6274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("nonnumeric port: '%s'", port)
6284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None
6294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
6304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        port = DEFAULT_HTTP_PORT
6314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return port
6324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
6344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
6354adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
6364adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
6374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef uppercase_escaped_char(match):
6384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return "%%%s" % match.group(1).upper()
6394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef escape_path(path):
6404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
6414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # There's no knowing what character encoding was used to create URLs
6424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # containing %-escapes, but since we have to pick one to escape invalid
6434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # path characters, we pick UTF-8, as recommended in the HTML 4.0
6444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # specification:
6454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
6464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # And here, kind of: draft-fielding-uri-rfc2396bis-03
6474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # (And in draft IRI specification: draft-duerst-iri-05)
6484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # (And here, for new URI schemes: RFC 2718)
6494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if isinstance(path, unicode):
6504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path = path.encode("utf-8")
6514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    path = urllib.quote(path, HTTP_PATH_SAFE)
6524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
6534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return path
6544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef reach(h):
6564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Return reach of host h, as defined by RFC 2965, section 1.
6574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The reach R of a host name H is defined as follows:
6594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       *  If
6614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          -  H is the host domain name of a host; and,
6634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          -  H has the form A.B; and
6654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          -  A has no embedded (that is, interior) dots; and
6674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao          -  B has at least one embedded dot, or B is the string "local".
6694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao             then the reach of H is .B.
6704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao       *  Otherwise, the reach of H is H.
6724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> reach("www.acme.com")
6744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    '.acme.com'
6754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> reach("acme.com")
6764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    'acme.com'
6774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    >>> reach("acme.local")
6784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    '.local'
6794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
6814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    i = h.find(".")
6824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if i >= 0:
6834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #a = h[:i]  # this line is only here to show what a is
6844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        b = h[i+1:]
6854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = b.find(".")
6864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if is_HDN(h) and (i >= 0 or b == "local"):
6874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return "."+b
6884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return h
6894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef is_third_party(request):
6914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
6924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    RFC 2965, section 3.3.6:
6944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        An unverifiable transaction is to a third-party host if its request-
6964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        host U does not domain-match the reach R of the request-host O in the
6974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        origin transaction.
6984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
6994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
7004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    req_host = request_host(request)
7014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not domain_match(req_host, reach(request.get_origin_req_host())):
7024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
7034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    else:
7044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
7054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass Cookie:
7084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """HTTP Cookie.
7094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    This class represents both Netscape and RFC 2965 cookies.
7114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    This is deliberately a very simple class.  It just holds attributes.  It's
7134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    possible to construct Cookie instances that don't comply with the cookie
7144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    standards.  CookieJar.make_cookies is the factory function for Cookie
7154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    objects -- it deals with cookie parsing, supplying defaults, and
7164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    normalising to the representation used in this class.  CookiePolicy is
7174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    responsible for checking them to see whether they should be accepted from
7184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    and returned to the server.
7194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Note that the port may be present in the headers, but unspecified ("Port"
7214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    rather than"Port=80", for example); if this is the case, port is None.
7224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
7244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, version, name, value,
7264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 port, port_specified,
7274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 domain, domain_specified, domain_initial_dot,
7284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 path, path_specified,
7294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 secure,
7304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 expires,
7314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 discard,
7324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 comment,
7334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 comment_url,
7344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 rest,
7354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 rfc2109=False,
7364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 ):
7374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if version is not None: version = int(version)
7394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if expires is not None: expires = int(expires)
7404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if port is None and port_specified is True:
7414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            raise ValueError("if port is None, port_specified must be false")
7424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.version = version
7444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.name = name
7454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.value = value
7464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.port = port
7474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.port_specified = port_specified
7484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # normalise case, as per RFC 2965 section 3.3.3
7494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.domain = domain.lower()
7504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.domain_specified = domain_specified
7514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Sigh.  We need to know whether the domain given in the
7524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # cookie-attribute had an initial dot, in order to follow RFC 2965
7534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # (as clarified in draft errata).  Needed for the returned $Domain
7544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # value.
7554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.domain_initial_dot = domain_initial_dot
7564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.path = path
7574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.path_specified = path_specified
7584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.secure = secure
7594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.expires = expires
7604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.discard = discard
7614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.comment = comment
7624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.comment_url = comment_url
7634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.rfc2109 = rfc2109
7644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._rest = copy.copy(rest)
7664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def has_nonstandard_attr(self, name):
7684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return name in self._rest
7694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_nonstandard_attr(self, name, default=None):
7704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self._rest.get(name, default)
7714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_nonstandard_attr(self, name, value):
7724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._rest[name] = value
7734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def is_expired(self, now=None):
7754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if now is None: now = time.time()
7764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if (self.expires is not None) and (self.expires <= now):
7774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return True
7784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
7794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __str__(self):
7814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.port is None: p = ""
7824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else: p = ":"+self.port
7834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        limit = self.domain + p + self.path
7844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.value is not None:
7854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            namevalue = "%s=%s" % (self.name, self.value)
7864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
7874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            namevalue = self.name
7884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return "<Cookie %s for %s>" % (namevalue, limit)
7894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
7904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __repr__(self):
7914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        args = []
7924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for name in ("version", "name", "value",
7934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     "port", "port_specified",
7944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     "domain", "domain_specified", "domain_initial_dot",
7954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     "path", "path_specified",
7964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     "secure", "expires", "discard", "comment", "comment_url",
7974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     ):
7984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            attr = getattr(self, name)
7994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            args.append("%s=%s" % (name, repr(attr)))
8004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        args.append("rest=%s" % repr(self._rest))
8014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        args.append("rfc2109=%s" % repr(self.rfc2109))
8024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return "Cookie(%s)" % ", ".join(args)
8034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass CookiePolicy:
8064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Defines which cookies get accepted from and returned to server.
8074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    May also modify cookies, though this is probably a bad idea.
8094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    The subclass DefaultCookiePolicy defines the standard rules for Netscape
8114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    and RFC 2965 cookies -- override that if you want a customised policy.
8124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
8144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok(self, cookie, request):
8154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return true if (and only if) cookie should be accepted from server.
8164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Currently, pre-expired cookies never get this far -- the CookieJar
8184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        class deletes such cookies itself.
8194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
8214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise NotImplementedError()
8224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok(self, cookie, request):
8244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return true if (and only if) cookie should be returned to server."""
8254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise NotImplementedError()
8264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def domain_return_ok(self, domain, request):
8284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return false if cookies should not be returned, given cookie domain.
8294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
8304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
8314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def path_return_ok(self, path, request):
8334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return false if cookies should not be returned, given cookie path.
8344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
8354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
8364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass DefaultCookiePolicy(CookiePolicy):
8394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Implements the standard rules for accepting and returning cookies."""
8404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DomainStrictNoDots = 1
8424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DomainStrictNonDomain = 2
8434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DomainRFC2965Match = 4
8444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DomainLiberal = 0
8464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
8474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self,
8494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 blocked_domains=None, allowed_domains=None,
8504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 netscape=True, rfc2965=False,
8514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 rfc2109_as_netscape=None,
8524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 hide_cookie2=False,
8534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_domain=False,
8544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_rfc2965_unverifiable=True,
8554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_ns_unverifiable=False,
8564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_ns_domain=DomainLiberal,
8574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_ns_set_initial_dollar=False,
8584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 strict_ns_set_path=False,
8594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 ):
8604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Constructor arguments should be passed as keyword arguments only."""
8614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.netscape = netscape
8624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.rfc2965 = rfc2965
8634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.rfc2109_as_netscape = rfc2109_as_netscape
8644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.hide_cookie2 = hide_cookie2
8654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_domain = strict_domain
8664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
8674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_ns_unverifiable = strict_ns_unverifiable
8684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_ns_domain = strict_ns_domain
8694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
8704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.strict_ns_set_path = strict_ns_set_path
8714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if blocked_domains is not None:
8734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._blocked_domains = tuple(blocked_domains)
8744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
8754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._blocked_domains = ()
8764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if allowed_domains is not None:
8784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            allowed_domains = tuple(allowed_domains)
8794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._allowed_domains = allowed_domains
8804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def blocked_domains(self):
8824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return the sequence of blocked domains (as a tuple)."""
8834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self._blocked_domains
8844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_blocked_domains(self, blocked_domains):
8854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Set the sequence of blocked domains."""
8864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._blocked_domains = tuple(blocked_domains)
8874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def is_blocked(self, domain):
8894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for blocked_domain in self._blocked_domains:
8904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if user_domain_match(domain, blocked_domain):
8914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return True
8924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return False
8934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
8944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def allowed_domains(self):
8954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return None, or the sequence of allowed domains (as a tuple)."""
8964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self._allowed_domains
8974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_allowed_domains(self, allowed_domains):
8984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Set the sequence of allowed domains, or None."""
8994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if allowed_domains is not None:
9004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            allowed_domains = tuple(allowed_domains)
9014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._allowed_domains = allowed_domains
9024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def is_not_allowed(self, domain):
9044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self._allowed_domains is None:
9054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for allowed_domain in self._allowed_domains:
9074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if user_domain_match(domain, allowed_domain):
9084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
9094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok(self, cookie, request):
9124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
9134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        If you override .set_ok(), be sure to call this method.  If it returns
9144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        false, so should your subclass (assuming your subclass wants to be more
9154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        strict about which cookies to accept).
9164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
9184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
9194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        assert cookie.name is not None
9214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for n in "version", "verifiability", "name", "path", "domain", "port":
9234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fn_name = "set_ok_"+n
9244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fn = getattr(self, fn_name)
9254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not fn(cookie, request):
9264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
9274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_version(self, cookie, request):
9314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.version is None:
9324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Version is always set to 0 by parse_ns_headers if it's a Netscape
9334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # cookie, so this must be an invalid RFC 2965 cookie.
9344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   Set-Cookie2 without version attribute (%s=%s)",
9354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   cookie.name, cookie.value)
9364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.version > 0 and not self.rfc2965:
9384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   RFC 2965 cookies are switched off")
9394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif cookie.version == 0 and not self.netscape:
9414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   Netscape cookies are switched off")
9424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_verifiability(self, cookie, request):
9464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if request.is_unverifiable() and is_third_party(request):
9474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
9484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   third-party RFC 2965 cookie during "
9494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                             "unverifiable transaction")
9504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
9514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif cookie.version == 0 and self.strict_ns_unverifiable:
9524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   third-party Netscape cookie during "
9534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                             "unverifiable transaction")
9544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
9554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_name(self, cookie, request):
9584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Try and stop servers setting V0 cookies designed to hack other
9594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # servers that know both V0 and V1 protocols.
9604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
9614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookie.name.startswith("$")):
9624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   illegal name (starts with '$'): '%s'", cookie.name)
9634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_path(self, cookie, request):
9674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.path_specified:
9684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_path = request_path(request)
9694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ((cookie.version > 0 or
9704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                 (cookie.version == 0 and self.strict_ns_set_path)) and
9714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                not req_path.startswith(cookie.path)):
9724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   path attribute %s is not a prefix of request "
9734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "path %s", cookie.path, req_path)
9744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
9754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
9764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
9774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_domain(self, cookie, request):
9784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.is_blocked(cookie.domain):
9794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   domain %s is in user block-list", cookie.domain)
9804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.is_not_allowed(cookie.domain):
9824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   domain %s is not in user allow-list", cookie.domain)
9834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
9844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.domain_specified:
9854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_host, erhn = eff_request_host(request)
9864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            domain = cookie.domain
9874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.strict_domain and (domain.count(".") >= 2):
9884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # XXX This should probably be compared with the Konqueror
9894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # (kcookiejar.cpp) and Mozilla implementations, but it's a
9904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # losing battle.
9914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                i = domain.rfind(".")
9924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                j = domain.rfind(".", 0, i)
9934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if j == 0:  # domain like .foo.bar
9944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    tld = domain[i+1:]
9954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    sld = domain[j+1:i]
9964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
9974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "gov", "mil", "int", "aero", "biz", "cat", "coop",
9984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "info", "jobs", "mobi", "museum", "name", "pro",
9994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "travel", "eu") and len(tld) == 2:
10004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        # domain like .co.uk
10014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        _debug("   country-code second level domain %s", domain)
10024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        return False
10034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if domain.startswith("."):
10044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                undotted_domain = domain[1:]
10054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
10064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                undotted_domain = domain
10074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            embedded_dots = (undotted_domain.find(".") >= 0)
10084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not embedded_dots and domain != ".local":
10094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   non-local domain %s contains no embedded dot",
10104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       domain)
10114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
10124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.version == 0:
10134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if (not erhn.endswith(domain) and
10144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    (not erhn.startswith(".") and
10154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                     not ("."+erhn).endswith(domain))):
10164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug("   effective request-host %s (even with added "
10174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           "initial dot) does not end with %s",
10184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           erhn, domain)
10194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return False
10204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (cookie.version > 0 or
10214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                (self.strict_ns_domain & self.DomainRFC2965Match)):
10224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if not domain_match(erhn, domain):
10234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug("   effective request-host %s does not domain-match "
10244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           "%s", erhn, domain)
10254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return False
10264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (cookie.version > 0 or
10274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                (self.strict_ns_domain & self.DomainStrictNoDots)):
10284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                host_prefix = req_host[:-len(domain)]
10294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if (host_prefix.find(".") >= 0 and
10304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    not IPV4_RE.search(req_host)):
10314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug("   host prefix %s for domain %s contains a dot",
10324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           host_prefix, domain)
10334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return False
10344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
10354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_ok_port(self, cookie, request):
10374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.port_specified:
10384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_port = request_port(request)
10394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if req_port is None:
10404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                req_port = "80"
10414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
10424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                req_port = str(req_port)
10434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for p in cookie.port.split(","):
10444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                try:
10454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    int(p)
10464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                except ValueError:
10474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug("   bad port %s (not numeric)", p)
10484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return False
10494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if p == req_port:
10504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    break
10514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
10524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   request port (%s) not found in %s",
10534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       req_port, cookie.port)
10544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
10554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
10564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok(self, cookie, request):
10584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
10594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        If you override .return_ok(), be sure to call this method.  If it
10604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        returns false, so should your subclass (assuming your subclass wants to
10614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        be more strict about which cookies to return).
10624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
10644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Path has already been checked by .path_return_ok(), and domain
10654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # blocking done by .domain_return_ok().
10664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
10674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for n in "version", "verifiability", "secure", "expires", "port", "domain":
10694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fn_name = "return_ok_"+n
10704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fn = getattr(self, fn_name)
10714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not fn(cookie, request):
10724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
10734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
10744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_version(self, cookie, request):
10764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.version > 0 and not self.rfc2965:
10774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   RFC 2965 cookies are switched off")
10784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
10794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif cookie.version == 0 and not self.netscape:
10804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   Netscape cookies are switched off")
10814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
10824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
10834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_verifiability(self, cookie, request):
10854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if request.is_unverifiable() and is_third_party(request):
10864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
10874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   third-party RFC 2965 cookie during unverifiable "
10884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "transaction")
10894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
10904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif cookie.version == 0 and self.strict_ns_unverifiable:
10914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   third-party Netscape cookie during unverifiable "
10924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "transaction")
10934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
10944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
10954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
10964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_secure(self, cookie, request):
10974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.secure and request.get_type() != "https":
10984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   secure cookie with non-secure request")
10994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_expires(self, cookie, request):
11034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.is_expired(self._now):
11044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   cookie expired")
11054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_port(self, cookie, request):
11094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.port:
11104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_port = request_port(request)
11114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if req_port is None:
11124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                req_port = "80"
11134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for p in cookie.port.split(","):
11144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if p == req_port:
11154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    break
11164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
11174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   request port %s does not match cookie port %s",
11184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       req_port, cookie.port)
11194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
11204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def return_ok_domain(self, cookie, request):
11234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        req_host, erhn = eff_request_host(request)
11244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        domain = cookie.domain
11254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
11274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if (cookie.version == 0 and
11284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (self.strict_ns_domain & self.DomainStrictNonDomain) and
11294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            not cookie.domain_specified and domain != erhn):
11304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   cookie with unspecified domain does not string-compare "
11314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   "equal to request domain")
11324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.version > 0 and not domain_match(erhn, domain):
11354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   effective request-host name %s does not domain-match "
11364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   "RFC 2965 cookie domain %s", erhn, domain)
11374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if cookie.version == 0 and not ("."+erhn).endswith(domain):
11394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   request-host %s does not match Netscape cookie domain "
11404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   "%s", req_host, domain)
11414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def domain_return_ok(self, domain, request):
11454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # Liberal check of.  This is here as an optimization to avoid
11464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # having to load lots of MSIE cookie files unless necessary.
11474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        req_host, erhn = eff_request_host(request)
11484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not req_host.startswith("."):
11494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_host = "."+req_host
11504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not erhn.startswith("."):
11514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            erhn = "."+erhn
11524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not (req_host.endswith(domain) or erhn.endswith(domain)):
11534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #_debug("   request domain %s does not match cookie domain %s",
11544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #       req_host, domain)
11554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.is_blocked(domain):
11584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   domain %s is in user block-list", domain)
11594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.is_not_allowed(domain):
11614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("   domain %s is not in user allow-list", domain)
11624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def path_return_ok(self, path, request):
11674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug("- checking cookie path=%s", path)
11684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        req_path = request_path(request)
11694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not req_path.startswith(path):
11704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("  %s does not path-match %s", req_path, path)
11714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
11724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
11734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef vals_sorted_by_key(adict):
11764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    keys = adict.keys()
11774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    keys.sort()
11784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return map(adict.get, keys)
11794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef deepvalues(mapping):
11814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Iterates over nested mapping, depth-first, in sorted order by key."""
11824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    values = vals_sorted_by_key(mapping)
11834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for obj in values:
11844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        mapping = False
11854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
11864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            obj.items
11874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except AttributeError:
11884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            pass
11894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
11904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            mapping = True
11914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for subobj in deepvalues(obj):
11924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                yield subobj
11934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not mapping:
11944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            yield obj
11954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
11974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Used as second parameter to dict.get() method, to distinguish absent
11984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# dict key from one with a None value.
11994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass Absent: pass
12004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass CookieJar:
12024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """Collection of HTTP cookies.
12034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    You may not need to know about this class: try
12054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    urllib2.build_opener(HTTPCookieProcessor).open(url).
12064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
12084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    non_word_re = re.compile(r"\W")
12104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    quote_re = re.compile(r"([\"\\])")
12114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    strict_domain_re = re.compile(r"\.?[^.]*")
12124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    domain_re = re.compile(r"[^.]*")
12134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    dots_re = re.compile(r"^\.+")
12144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
12164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, policy=None):
12184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if policy is None:
12194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            policy = DefaultCookiePolicy()
12204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._policy = policy
12214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock = _threading.RLock()
12234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies = {}
12244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_policy(self, policy):
12264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._policy = policy
12274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _cookies_for_domain(self, domain, request):
12294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookies = []
12304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if not self._policy.domain_return_ok(domain, request):
12314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return []
12324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug("Checking %s for cookies to return", domain)
12334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookies_by_path = self._cookies[domain]
12344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for path in cookies_by_path.keys():
12354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not self._policy.path_return_ok(path, request):
12364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                continue
12374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookies_by_name = cookies_by_path[path]
12384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for cookie in cookies_by_name.values():
12394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if not self._policy.return_ok(cookie, request):
12404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug("   not returning cookie")
12414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    continue
12424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _debug("   it's a match")
12434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cookies.append(cookie)
12444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return cookies
12454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _cookies_for_request(self, request):
12474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a list of cookies to be returned to server."""
12484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookies = []
12494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for domain in self._cookies.keys():
12504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookies.extend(self._cookies_for_domain(domain, request))
12514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return cookies
12524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _cookie_attrs(self, cookies):
12544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return a list of cookie-attributes to be returned to server.
12554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        like ['foo="bar"; $Path="/"', ...]
12574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        The $Version attribute is also added when appropriate (currently only
12594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        once per request).
12604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
12624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # add cookies in order of most specific (ie. longest) path first
12634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookies.sort(key=lambda arg: len(arg.path), reverse=True)
12644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        version_set = False
12664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        attrs = []
12684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie in cookies:
12694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # set version of Cookie header
12704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # XXX
12714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # What should it be if multiple matching Set-Cookie headers have
12724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #  different versions themselves?
12734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Answer: there is no answer; was supposed to be settled by
12744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #  RFC 2965 errata, but that may never appear...
12754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            version = cookie.version
12764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not version_set:
12774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                version_set = True
12784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if version > 0:
12794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    attrs.append("$Version=%s" % version)
12804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # quote cookie value if necessary
12824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # (not for Netscape protocol, which already has any quotes
12834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #  intact, due to the poorly-specified Netscape Cookie: syntax)
12844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ((cookie.value is not None) and
12854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.non_word_re.search(cookie.value) and version > 0):
12864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                value = self.quote_re.sub(r"\\\1", cookie.value)
12874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
12884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                value = cookie.value
12894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
12904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # add cookie-attributes to be returned in Cookie header
12914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.value is None:
12924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                attrs.append(cookie.name)
12934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
12944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                attrs.append("%s=%s" % (cookie.name, value))
12954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if version > 0:
12964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if cookie.path_specified:
12974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    attrs.append('$Path="%s"' % cookie.path)
12984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if cookie.domain.startswith("."):
12994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    domain = cookie.domain
13004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if (not cookie.domain_initial_dot and
13014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        domain.startswith(".")):
13024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        domain = domain[1:]
13034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    attrs.append('$Domain="%s"' % domain)
13044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if cookie.port is not None:
13054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    p = "$Port"
13064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if cookie.port_specified:
13074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        p = p + ('="%s"' % cookie.port)
13084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    attrs.append(p)
13094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return attrs
13114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def add_cookie_header(self, request):
13134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Add correct Cookie: header to request (urllib2.Request object).
13144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        The Cookie2 header is also added unless policy.hide_cookie2 is true.
13164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
13184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug("add_cookie_header")
13194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
13204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
13214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._policy._now = self._now = int(time.time())
13234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookies = self._cookies_for_request(request)
13254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            attrs = self._cookie_attrs(cookies)
13274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if attrs:
13284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if not request.has_header("Cookie"):
13294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    request.add_unredirected_header(
13304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        "Cookie", "; ".join(attrs))
13314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # if necessary, advertise that we know RFC 2965
13334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
13344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                not request.has_header("Cookie2")):
13354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                for cookie in cookies:
13364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if cookie.version != 1:
13374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        request.add_unredirected_header("Cookie2", '$Version="1"')
13384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        break
13394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
13414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
13424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.clear_expired_cookies()
13444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _normalized_cookie_tuples(self, attrs_set):
13464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return list of tuples containing normalised cookie information.
13474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        attrs_set is the list of lists of key,value pairs extracted from
13494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        the Set-Cookie or Set-Cookie2 headers.
13504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Tuples are name, value, standard, rest, where name and value are the
13524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookie name and value, standard is a dictionary containing the standard
13534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookie-attributes (discard, secure, version, expires or max-age,
13544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        domain, path and port) and rest is a dictionary containing the rest of
13554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        the cookie-attributes.
13564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
13584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookie_tuples = []
13594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        boolean_attrs = "discard", "secure"
13614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        value_attrs = ("version",
13624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "expires", "max-age",
13634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "domain", "path", "port",
13644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                       "comment", "commenturl")
13654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie_attrs in attrs_set:
13674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            name, value = cookie_attrs[0]
13684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Build dictionary of standard cookie-attributes (standard) and
13704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # dictionary of other cookie-attributes (rest).
13714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Note: expiry time is normalised to seconds since epoch.  V0
13734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # cookies should have the Expires cookie-attribute, and V1 cookies
13744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
13754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
13764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # accept either (but prefer Max-Age).
13774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            max_age_set = False
13784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            bad_cookie = False
13804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
13814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            standard = {}
13824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            rest = {}
13834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for k, v in cookie_attrs[1:]:
13844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                lc = k.lower()
13854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # don't lose case distinction for unknown fields
13864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if lc in value_attrs or lc in boolean_attrs:
13874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    k = lc
13884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k in boolean_attrs and v is None:
13894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # boolean cookie-attribute is present, but has no value
13904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # (like "discard", rather than "port=80")
13914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = True
13924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k in standard:
13934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # only first value is significant
13944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    continue
13954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k == "domain":
13964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if v is None:
13974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        _debug("   missing value for domain attribute")
13984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        bad_cookie = True
13994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        break
14004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # RFC 2965 section 3.3.3
14014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = v.lower()
14024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k == "expires":
14034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if max_age_set:
14044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        # Prefer max-age to expires (like Mozilla)
14054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        continue
14064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if v is None:
14074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        _debug("   missing or invalid value for expires "
14084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              "attribute: treating as session cookie")
14094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        continue
14104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if k == "max-age":
14114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    max_age_set = True
14124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    try:
14134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        v = int(v)
14144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    except ValueError:
14154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        _debug("   missing or invalid (non-numeric) value for "
14164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                              "max-age attribute")
14174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        bad_cookie = True
14184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        break
14194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # convert RFC 2965 Max-Age to seconds since epoch
14204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # XXX Strictly you're supposed to follow RFC 2616
14214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    #   age-calculation rules.  Remember that zero Max-Age is a
14224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    #   is a request to discard (old and new) cookie, though.
14234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    k = "expires"
14244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    v = self._now + v
14254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if (k in value_attrs) or (k in boolean_attrs):
14264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    if (v is None and
14274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        k not in ("port", "comment", "commenturl")):
14284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        _debug("   missing value for %s attribute" % k)
14294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        bad_cookie = True
14304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                        break
14314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    standard[k] = v
14324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
14334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    rest[k] = v
14344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if bad_cookie:
14364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                continue
14374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookie_tuples.append((name, value, standard, rest))
14394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return cookie_tuples
14414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _cookie_from_cookie_tuple(self, tup, request):
14434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # standard is dict of standard cookie-attributes, rest is dict of the
14444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # rest of them
14454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        name, value, standard, rest = tup
14464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        domain = standard.get("domain", Absent)
14484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path = standard.get("path", Absent)
14494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        port = standard.get("port", Absent)
14504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        expires = standard.get("expires", Absent)
14514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # set the easy defaults
14534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        version = standard.get("version", None)
14544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if version is not None:
14554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
14564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                version = int(version)
14574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except ValueError:
14584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return None  # invalid version, ignore cookie
14594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        secure = standard.get("secure", False)
14604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # (discard is also set if expires is Absent)
14614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        discard = standard.get("discard", False)
14624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        comment = standard.get("comment", None)
14634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        comment_url = standard.get("commenturl", None)
14644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # set default path
14664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if path is not Absent and path != "":
14674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            path_specified = True
14684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            path = escape_path(path)
14694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
14704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            path_specified = False
14714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            path = request_path(request)
14724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            i = path.rfind("/")
14734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if i != -1:
14744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if version == 0:
14754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # Netscape spec parts company from reality here
14764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    path = path[:i]
14774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                else:
14784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    path = path[:i+1]
14794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if len(path) == 0: path = "/"
14804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # set default domain
14824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        domain_specified = domain is not Absent
14834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # but first we have to remember whether it starts with a dot
14844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        domain_initial_dot = False
14854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if domain_specified:
14864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            domain_initial_dot = bool(domain.startswith("."))
14874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if domain is Absent:
14884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            req_host, erhn = eff_request_host(request)
14894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            domain = erhn
14904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif not domain.startswith("."):
14914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            domain = "."+domain
14924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
14934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # set default port
14944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        port_specified = False
14954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if port is not Absent:
14964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if port is None:
14974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # Port attr present, but has no value: default to request port.
14984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # Cookie should then only be sent back on that port.
14994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                port = request_port(request)
15004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
15014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                port_specified = True
15024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                port = re.sub(r"\s+", "", port)
15034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
15044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # No port attr present.  Cookie can be sent back on any port.
15054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            port = None
15064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # set default expires and discard
15084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if expires is Absent:
15094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            expires = None
15104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            discard = True
15114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif expires <= self._now:
15124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Expiry date in past is request to delete cookie.  This can't be
15134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # in DefaultCookiePolicy, because can't delete cookies there.
15144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
15154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.clear(domain, path, name)
15164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except KeyError:
15174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                pass
15184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
15194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                   domain, path, name)
15204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return None
15214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return Cookie(version,
15234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      name, value,
15244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      port, port_specified,
15254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      domain, domain_specified, domain_initial_dot,
15264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      path, path_specified,
15274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      secure,
15284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      expires,
15294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      discard,
15304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      comment,
15314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      comment_url,
15324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                      rest)
15334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _cookies_from_attrs_set(self, attrs_set, request):
15354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
15364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        cookies = []
15384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for tup in cookie_tuples:
15394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookie = self._cookie_from_cookie_tuple(tup, request)
15404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie: cookies.append(cookie)
15414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return cookies
15424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def _process_rfc2109_cookies(self, cookies):
15444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
15454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if rfc2109_as_ns is None:
15464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            rfc2109_as_ns = not self._policy.rfc2965
15474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie in cookies:
15484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.version == 1:
15494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cookie.rfc2109 = True
15504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if rfc2109_as_ns:
15514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # treat 2109 cookies as Netscape cookies rather than
15524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    # as RFC2965 cookies
15534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    cookie.version = 0
15544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def make_cookies(self, response, request):
15564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return sequence of Cookie objects extracted from response object."""
15574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # get cookie-attributes for RFC 2965 and Netscape protocols
15584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        headers = response.info()
15594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
15604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        ns_hdrs = headers.getheaders("Set-Cookie")
15614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        rfc2965 = self._policy.rfc2965
15634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        netscape = self._policy.netscape
15644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if ((not rfc2965_hdrs and not ns_hdrs) or
15664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (not ns_hdrs and not rfc2965) or
15674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (not rfc2965_hdrs and not netscape) or
15684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            (not netscape and not rfc2965)):
15694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return []  # no relevant cookie headers: quick exit
15704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
15724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookies = self._cookies_from_attrs_set(
15734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                split_header_words(rfc2965_hdrs), request)
15744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        except Exception:
15754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            _warn_unhandled_exception()
15764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            cookies = []
15774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if ns_hdrs and netscape:
15794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
15804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                # RFC 2109 and Netscape cookies
15814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                ns_cookies = self._cookies_from_attrs_set(
15824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    parse_ns_headers(ns_hdrs), request)
15834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except Exception:
15844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                _warn_unhandled_exception()
15854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                ns_cookies = []
15864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._process_rfc2109_cookies(ns_cookies)
15874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Look for Netscape cookies (from Set-Cookie headers) that match
15894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
15904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # For each match, keep the RFC 2965 cookie and ignore the Netscape
15914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
15924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # bundled in with the Netscape cookies for this purpose, which is
15934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # reasonable behaviour.
15944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if rfc2965:
15954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                lookup = {}
15964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                for cookie in cookies:
15974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
15984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
15994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                def no_matching_rfc2965(ns_cookie, lookup=lookup):
16004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
16014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    return key not in lookup
16024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
16034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ns_cookies:
16054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                cookies.extend(ns_cookies)
16064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return cookies
16084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_cookie_if_ok(self, cookie, request):
16104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Set a cookie if policy says it's OK to do so."""
16114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
16124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
16134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._policy._now = self._now = int(time.time())
16144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self._policy.set_ok(cookie, request):
16164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.set_cookie(cookie)
16174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
16204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
16214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def set_cookie(self, cookie):
16234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Set a cookie, without checking whether or not it should be set."""
16244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        c = self._cookies
16254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
16264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
16274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.domain not in c: c[cookie.domain] = {}
16284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            c2 = c[cookie.domain]
16294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if cookie.path not in c2: c2[cookie.path] = {}
16304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            c3 = c2[cookie.path]
16314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            c3[cookie.name] = cookie
16324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
16334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
16344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def extract_cookies(self, response, request):
16364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Extract cookies from response, where allowable given the request."""
16374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        _debug("extract_cookies: %s", response.info())
16384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
16394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
16404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._policy._now = self._now = int(time.time())
16414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for cookie in self.make_cookies(response, request):
16434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if self._policy.set_ok(cookie, request):
16444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    _debug(" setting cookie: %s", cookie)
16454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.set_cookie(cookie)
16464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
16474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
16484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def clear(self, domain=None, path=None, name=None):
16504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Clear some cookies.
16514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Invoking this method without arguments will clear all cookies.  If
16534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        given a single argument, only cookies belonging to that domain will be
16544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        removed.  If given two arguments, cookies belonging to the specified
16554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        path within that domain are removed.  If given three arguments, then
16564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        the cookie with the specified name, path and domain is removed.
16574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Raises KeyError if no matching cookie exists.
16594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
16614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if name is not None:
16624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (domain is None) or (path is None):
16634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise ValueError(
16644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    "domain and path must be given to remove a cookie by name")
16654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            del self._cookies[domain][path][name]
16664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif path is not None:
16674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if domain is None:
16684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise ValueError(
16694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    "domain must be given to remove cookies by path")
16704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            del self._cookies[domain][path]
16714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif domain is not None:
16724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            del self._cookies[domain]
16734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
16744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies = {}
16754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def clear_session_cookies(self):
16774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Discard all session cookies.
16784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Note that the .save() method won't save session cookies anyway, unless
16804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        you ask otherwise by passing a true ignore_discard argument.
16814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
16834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
16844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
16854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for cookie in self:
16864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if cookie.discard:
16874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.clear(cookie.domain, cookie.path, cookie.name)
16884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
16894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
16904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def clear_expired_cookies(self):
16924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Discard all expired cookies.
16934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
16944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        You probably don't need to call this method: expired cookies are never
16954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        sent back to the server (provided you're using DefaultCookiePolicy),
16964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        this method is called by CookieJar itself every so often, and the
16974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        .save() method won't save expired cookies anyway (unless you ask
16984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        otherwise by passing a true ignore_expires argument).
16994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
17014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
17024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
17034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            now = time.time()
17044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            for cookie in self:
17054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if cookie.is_expired(now):
17064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    self.clear(cookie.domain, cookie.path, cookie.name)
17074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
17084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
17094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __iter__(self):
17114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return deepvalues(self._cookies)
17124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __len__(self):
17144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Return number of contained cookies."""
17154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        i = 0
17164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie in self: i = i + 1
17174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return i
17184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __repr__(self):
17204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        r = []
17214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie in self: r.append(repr(cookie))
17224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return "<%s[%s]>" % (self.__class__, ", ".join(r))
17234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __str__(self):
17254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        r = []
17264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for cookie in self: r.append(str(cookie))
17274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return "<%s[%s]>" % (self.__class__, ", ".join(r))
17284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# derives from IOError for backwards-compatibility with Python 2.4.0
17314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass LoadError(IOError): pass
17324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass FileCookieJar(CookieJar):
17344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """CookieJar that can be loaded from and saved to a file."""
17354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, filename=None, delayload=False, policy=None):
17374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
17384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Cookies are NOT loaded from the named file until either the .load() or
17394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        .revert() method is called.
17404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
17424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        CookieJar.__init__(self, policy)
17434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if filename is not None:
17444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
17454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                filename+""
17464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except:
17474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise ValueError("filename must be string-like")
17484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.filename = filename
17494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.delayload = bool(delayload)
17504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
17524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Save cookies to a file."""
17534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        raise NotImplementedError()
17544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
17564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Load cookies from a file."""
17574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if filename is None:
17584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.filename is not None: filename = self.filename
17594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else: raise ValueError(MISSING_FILENAME_TEXT)
17604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        f = open(filename)
17624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
17634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._really_load(f, filename, ignore_discard, ignore_expires)
17644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
17654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            f.close()
17664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def revert(self, filename=None,
17684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao               ignore_discard=False, ignore_expires=False):
17694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """Clear all cookies and reload cookies from a saved file.
17704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        Raises LoadError (or IOError) if reversion is not successful; the
17724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        object's state will not be altered if this happens.
17734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        """
17754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if filename is None:
17764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.filename is not None: filename = self.filename
17774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else: raise ValueError(MISSING_FILENAME_TEXT)
17784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self._cookies_lock.acquire()
17804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        try:
17814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            old_state = copy.deepcopy(self._cookies)
17834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies = {}
17844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            try:
17854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self.load(filename, ignore_discard, ignore_expires)
17864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            except (LoadError, IOError):
17874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                self._cookies = old_state
17884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise
17894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        finally:
17914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            self._cookies_lock.release()
17924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
17934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom _LWPCookieJar import LWPCookieJar, lwp_cookie_str
17944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofrom _MozillaCookieJar import MozillaCookieJar
1795