14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""HTTP cookie handling for web clients.
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis module has (now fairly distant) origins in Gisle Aas' Perl module
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTTP::Cookies, from the libwww-perl library.
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDocstrings, comments and debug strings in this code refer to the
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmattributes of the HTTP cookie system as cookie-attributes, to distinguish
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthem clearly from Python attributes.
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmClass diagram (note that BSDDBCookieJar and the MSIE* classes are not
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdistributed with the Python standard library, but are available from
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmhttp://wwwsearch.sf.net/):
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        CookieJar____
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        /     \      \
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            FileCookieJar      \      \
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             /    |   \         \      \
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm MozillaCookieJar | LWPCookieJar \      \
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  |               |      \
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  |   ---MSIEBase |       \
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  |  /      |     |        \
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  | /   MSIEDBCookieJar BSDDBCookieJar
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  |/
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm               MSIECookieJar
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm           'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm           'MozillaCookieJar']
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re, urlparse, copy, time, urllib
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry:
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    import threading as _threading
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError:
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    import dummy_threading as _threading
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport httplib  # only for the default HTTP port
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom calendar import timegm
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdebug = False   # set to True to enable debugging via the logging module
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmlogger = None
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _debug(*args):
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not debug:
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    global logger
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not logger:
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        import logging
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        logger = logging.getLogger("cookielib")
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return logger.debug(*args)
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         "instance initialised with one)")
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _warn_unhandled_exception():
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # There are a few catch-all except: statements in this module, for
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # catching input that's bad in unexpected ways.  Warn if any
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # exceptions are caught there.
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    import warnings, traceback, StringIO
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    f = StringIO.StringIO()
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    traceback.print_exc(None, f)
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    msg = f.getvalue()
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Date/time conversion
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -----------------------------------------------------------------------------
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmEPOCH_YEAR = 1970
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _timegm(tt):
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    year, month, mday, hour, min, sec = tt[:6]
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return timegm(tt)
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return None
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMONTHS_LOWER = []
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfor month in MONTHS: MONTHS_LOWER.append(month.lower())
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef time2isoz(t=None):
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return a string representing time in seconds since epoch, t.
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If the function is called without an argument, it will use the current
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    time.
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    representing Universal Time (UTC, aka GMT).  An example of this format is:
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    1994-11-24 08:49:37Z
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if t is None: t = time.time()
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        year, mon, mday, hour, min, sec)
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef time2netscape(t=None):
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return a string representing time in seconds since epoch, t.
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If the function is called without an argument, it will use the current
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    time.
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The format of the returned string is like this:
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Wed, DD-Mon-YYYY HH:MM:SS GMT
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if t is None: t = time.time()
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef offset_from_tz_string(tz):
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    offset = None
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if tz in UTC_ZONES:
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        offset = 0
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        m = TIMEZONE_RE.search(tz)
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if m:
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            offset = 3600 * int(m.group(2))
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if m.group(3):
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                offset = offset + 60 * int(m.group(3))
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if m.group(1) == '-':
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                offset = -offset
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return offset
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _str2time(day, mon, yr, hr, min, sec, tz):
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # translate month name to number
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # month numbers start with 1 (January)
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        mon = MONTHS_LOWER.index(mon.lower())+1
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except ValueError:
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # maybe it's already a number
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            imon = int(mon)
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except ValueError:
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if 1 <= imon <= 12:
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            mon = imon
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # make sure clock elements are defined
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if hr is None: hr = 0
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if min is None: min = 0
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if sec is None: sec = 0
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yr = int(yr)
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    day = int(day)
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    hr = int(hr)
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    min = int(min)
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    sec = int(sec)
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if yr < 1000:
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # find "obvious" year
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cur_yr = time.localtime(time.time())[0]
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        m = cur_yr % 100
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tmp = yr
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yr = yr + cur_yr - m
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        m = m - tmp
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if abs(m) > 50:
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if m > 0: yr = yr + 100
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: yr = yr - 100
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    t = _timegm((yr, mon, day, hr, min, sec, tz))
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if t is not None:
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # adjust time using timezone string, to get absolute time since epoch
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if tz is None:
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            tz = "UTC"
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tz = tz.upper()
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        offset = offset_from_tz_string(tz)
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if offset is None:
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        t = t - offset
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return t
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmSTRICT_DATE_RE = re.compile(
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWEEKDAY_RE = re.compile(
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmLOOSE_HTTP_DATE_RE = re.compile(
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    r"""^
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\d\d?)            # day
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       (?:\s+|[-\/])
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\w+)              # month
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        (?:\s+|[-\/])
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\d+)              # year
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?:
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          (?:\s+|:)    # separator before clock
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       (\d\d?):(\d\d)  # hour:min
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       (?::(\d\d))?    # optional seconds
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    )?                 # optional clock
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       \s*
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       \s*
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?:\(\w+\))?       # ASCII representation of timezone in parens.
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       \s*$""", re.X)
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef http2time(text):
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Returns time in seconds since epoch of time represented by a string.
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Return value is an integer.
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    None is returned if the format of str is unrecognized, the time is outside
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    the representable range, or the timezone string is not recognized.  If the
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string contains no timezone, UTC is assumed.
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The timezone in the string may be numerical (like "-0800" or "+0100") or a
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    timezone strings equivalent to UTC (zero offset) are known to the function.
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The function loosely parses the following formats:
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The parser ignores leading and trailing whitespace.  The time may be
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    absent.
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If the year is given with only 2 digits, the function will select the
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    century that makes the year closest to the current date.
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # fast exit for strictly conforming string
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    m = STRICT_DATE_RE.search(text)
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if m:
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        g = m.groups()
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        mon = MONTHS_LOWER.index(g[1].lower()) + 1
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tt = (int(g[2]), mon, int(g[0]),
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              int(g[3]), int(g[4]), float(g[5]))
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return _timegm(tt)
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # No, we need some messy parsing...
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # clean up
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = text.lstrip()
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # tz is time zone specifier string
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    day, mon, yr, hr, min, sec, tz = [None]*7
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # loose regexp parse
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    m = LOOSE_HTTP_DATE_RE.search(text)
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if m is not None:
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        day, mon, yr, hr, min, sec, tz = m.groups()
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return None  # bad format
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _str2time(day, mon, yr, hr, min, sec, tz)
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmISO_DATE_RE = re.compile(
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """^
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\d{4})              # year
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       [-\/]?
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\d\d?)              # numerical month
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       [-\/]?
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (\d\d?)              # day
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   (?:
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm         (?:\s+|[-:Tt])  # separator before clock
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      (\d\d?):?(\d\d)    # hour:min
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   )?                    # optional clock
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      \s*
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   ([-+]?\d\d?:?(:?\d\d)?
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      \s*$""", re.X)
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef iso2time(text):
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    As for http2time, but parses the ISO 8601 formats:
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    1994-02-03 14:15:29 -0100    -- ISO 8601 format
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    1994-02-03 14:15:29          -- zone is optional
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    1994-02-03                   -- only date
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    1994-02-03T14:15:29          -- Use T as separator
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    19940203T141529Z             -- ISO 8601 compact format
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    19940203                     -- only date
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # clean up
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    text = text.lstrip()
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # tz is time zone specifier string
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    day, mon, yr, hr, min, sec, tz = [None]*7
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # loose regexp parse
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    m = ISO_DATE_RE.search(text)
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if m is not None:
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # XXX there's an extra bit of the timezone I'm ignoring here: is
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #   this the right thing to do?
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yr, mon, day, hr, min, sec, tz, _ = m.groups()
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return None  # bad format
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _str2time(day, mon, yr, hr, min, sec, tz)
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Header parsing
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# -----------------------------------------------------------------------------
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef unmatched(match):
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return unmatched part of re.Match object."""
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    start, end = match.span(0)
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return match.string[:start]+match.string[end:]
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)")
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)")
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_ESCAPE_RE = re.compile(r"\\(.)")
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef split_header_words(header_values):
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    r"""Parse header values into a list of lists containing key,value pairs.
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The function knows how to deal with ",", ";" and "=" as well as quoted
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    values after "=".  A list of space separated tokens are parsed as if they
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    were separated by ";".
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If the header_values passed as argument contains multiple values, then they
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    are treated as if they were a single value separated by comma ",".
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This means that this function is useful for parsing header fields that
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    the requirement for tokens).
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      headers           = #header
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      header            = (token | parameter) *( [";"] (token | parameter))
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      token             = 1*<any CHAR except CTLs or separators>
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      separators        = "(" | ")" | "<" | ">" | "@"
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        | "," | ";" | ":" | "\" | <">
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        | "/" | "[" | "]" | "?" | "="
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        | "{" | "}" | SP | HT
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      qdtext            = <any TEXT except <">>
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      quoted-pair       = "\" CHAR
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      parameter         = attribute "=" value
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      attribute         = token
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm      value             = token | quoted-string
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Each header is represented by a list of key/value pairs.  The value for a
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    simple token (not part of a parameter) is None.  Syntactically incorrect
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    headers will not necessarily be parsed as you would want.
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This is easier to describe with some examples:
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> split_header_words(['text/html; charset="iso-8859-1"'])
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    [[('text/html', None), ('charset', 'iso-8859-1')]]
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    [[('Basic', None), ('realm', '"foobar"')]]
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    assert not isinstance(header_values, basestring)
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    result = []
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for text in header_values:
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        orig_text = text
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        pairs = []
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while text:
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            m = HEADER_TOKEN_RE.search(text)
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if m:
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                text = unmatched(m)
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                name = m.group(1)
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                m = HEADER_QUOTED_VALUE_RE.search(text)
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if m:  # quoted value
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    text = unmatched(m)
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    value = m.group(1)
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    value = HEADER_ESCAPE_RE.sub(r"\1", value)
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    m = HEADER_VALUE_RE.search(text)
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if m:  # unquoted value
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        text = unmatched(m)
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        value = m.group(1)
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        value = value.rstrip()
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    else:
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        # no value, a lone token
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        value = None
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                pairs.append((name, value))
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif text.lstrip().startswith(","):
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # concatenated headers, as per RFC 2616 section 4.2
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                text = text.lstrip()[1:]
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if pairs: result.append(pairs)
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                pairs = []
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # skip junk
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                assert nr_junk_chars > 0, (
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    "split_header_words bug: '%s', '%s', %s" %
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    (orig_text, text, pairs))
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                text = non_junk
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if pairs: result.append(pairs)
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return result
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef join_header_words(lists):
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Do the inverse (almost) of the conversion done by split_header_words.
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Takes a list of lists of (key, value) pairs and produces a single header
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    value.  Attribute values are quoted if needed.
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    'text/plain; charset="iso-8859/1"'
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    'text/plain, charset="iso-8859/1"'
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    headers = []
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for pairs in lists:
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attr = []
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for k, v in pairs:
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if v is not None:
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not re.search(r"^\w+$", v):
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = '"%s"' % v
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                k = "%s=%s" % (k, v)
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            attr.append(k)
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if attr: headers.append("; ".join(attr))
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return ", ".join(headers)
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _strip_quotes(text):
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if text.startswith('"'):
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        text = text[1:]
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if text.endswith('"'):
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        text = text[:-1]
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return text
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef parse_ns_headers(ns_headers):
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Ad-hoc parser for Netscape protocol cookie-attributes.
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The old Netscape cookie format for Set-Cookie can for instance contain
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    an unquoted "," in the expires field, so we have to use this ad-hoc
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser instead of split_header_words.
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    XXX This may not make the best possible effort to parse all the crap
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser is probably better, so could do worse than following that if
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    this ever gives any trouble.
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Currently, this is also used for parsing RFC 2109 cookies.
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    known_attrs = ("expires", "domain", "path", "secure",
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   "version", "port", "max-age")
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    result = []
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for ns_header in ns_headers:
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        pairs = []
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        version_set = False
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ii, param in enumerate(re.split(r";\s*", ns_header)):
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            param = param.rstrip()
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if param == "": continue
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if "=" not in param:
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                k, v = param, None
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                k, v = re.split(r"\s*=\s*", param, 1)
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                k = k.lstrip()
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ii != 0:
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                lc = k.lower()
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if lc in known_attrs:
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    k = lc
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k == "version":
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # This is an RFC 2109 cookie.
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = _strip_quotes(v)
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    version_set = True
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k == "expires":
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # convert expires date to seconds since epoch
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = http2time(_strip_quotes(v))  # None if invalid
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pairs.append((k, v))
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if pairs:
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not version_set:
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                pairs.append(("version", "0"))
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            result.append(pairs)
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return result
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmIPV4_RE = re.compile(r"\.\d+$")
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef is_HDN(text):
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return True if text is a host domain name."""
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # XXX
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # This may well be wrong.  Which RFC is HDN defined in, if any (for
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #  the purposes of RFC 2965)?
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # For the current implementation, what about IPv6?  Remember to look
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #  at other uses of IPV4_RE also, if change this.
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if IPV4_RE.search(text):
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if text == "":
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if text[0] == "." or text[-1] == ".":
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return True
5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef domain_match(A, B):
5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return True if domain A domain-matches domain B, according to RFC 2965.
5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A and B may be host domain names or IP addresses.
5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    RFC 2965, section 1:
5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Host names can be specified either as an IP address or a HDN string.
5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Sometimes we compare one host name with another.  (Such comparisons SHALL
5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    be case-insensitive.)  Host A's name domain-matches host B's if
5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm         *  their host name strings string-compare equal; or
5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm         * A is a HDN string and has the form NB, where N is a non-empty
5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            name string, B has the form .B', and B' is a HDN string.  (So,
5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            x.y.com domain-matches .Y.com but not Y.com.)
5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Note that domain-match is not a commutative operation: a.b.c.com
5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    domain-matches .c.com, but not the reverse.
5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Note that, if A or B are IP addresses, the only relevant part of the
5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # definition of the domain-match algorithm is the direct string-compare.
5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A = A.lower()
5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    B = B.lower()
5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if A == B:
5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not is_HDN(A):
5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = A.rfind(B)
5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if i == -1 or i == 0:
5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # A does not have form NB, or N is the empty string
5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not B.startswith("."):
5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not is_HDN(B[1:]):
5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return True
5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef liberal_is_HDN(text):
5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return True if text is a sort-of-like a host domain name.
5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    For accepting/blocking domains.
5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if IPV4_RE.search(text):
5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return True
5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef user_domain_match(A, B):
5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """For blocking/accepting domains.
5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A and B may be host domain names or IP addresses.
5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    A = A.lower()
5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    B = B.lower()
5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if A == B:
5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # equal IP addresses
5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return True
5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    initial_dot = B.startswith(".")
5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if initial_dot and A.endswith(B):
5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not initial_dot and A == B:
5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return False
5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcut_port_re = re.compile(r":\d+$")
5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_host(request):
5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return request-host, as defined by RFC 2965.
5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Variation from RFC: returned value is lowercased, for convenient
5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    comparison.
5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    url = request.get_full_url()
5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    host = urlparse.urlparse(url)[1]
5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if host == "":
5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        host = request.get_header("Host", "")
5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # remove port, if present
5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    host = cut_port_re.sub("", host, 1)
5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return host.lower()
5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef eff_request_host(request):
5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return a tuple (request-host, effective request-host name).
6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    As defined by RFC 2965, except both are lowercased.
6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    erhn = req_host = request_host(request)
6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        erhn = req_host + ".local"
6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return req_host, erhn
6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_path(request):
6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Path component of request-URI, as defined by RFC 2965."""
6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    url = request.get_full_url()
6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parts = urlparse.urlsplit(url)
6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    path = escape_path(parts.path)
6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not path.startswith("/"):
6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # fix bad RFC 2396 absoluteURI
6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        path = "/" + path
6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return path
6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef request_port(request):
6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    host = request.get_host()
6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = host.find(':')
6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if i >= 0:
6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        port = host[i+1:]
6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            int(port)
6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except ValueError:
6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("nonnumeric port: '%s'", port)
6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        port = DEFAULT_HTTP_PORT
6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return port
6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef uppercase_escaped_char(match):
6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return "%%%s" % match.group(1).upper()
6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef escape_path(path):
6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # There's no knowing what character encoding was used to create URLs
6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # containing %-escapes, but since we have to pick one to escape invalid
6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # path characters, we pick UTF-8, as recommended in the HTML 4.0
6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # specification:
6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # And here, kind of: draft-fielding-uri-rfc2396bis-03
6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (And in draft IRI specification: draft-duerst-iri-05)
6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # (And here, for new URI schemes: RFC 2718)
6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if isinstance(path, unicode):
6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        path = path.encode("utf-8")
6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    path = urllib.quote(path, HTTP_PATH_SAFE)
6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return path
6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef reach(h):
6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return reach of host h, as defined by RFC 2965, section 1.
6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The reach R of a host name H is defined as follows:
6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       *  If
6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          -  H is the host domain name of a host; and,
6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          -  H has the form A.B; and
6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          -  A has no embedded (that is, interior) dots; and
6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm          -  B has at least one embedded dot, or B is the string "local".
6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             then the reach of H is .B.
6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       *  Otherwise, the reach of H is H.
6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> reach("www.acme.com")
6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    '.acme.com'
6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> reach("acme.com")
6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    'acme.com'
6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    >>> reach("acme.local")
6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    '.local'
6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = h.find(".")
6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if i >= 0:
6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #a = h[:i]  # this line is only here to show what a is
6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        b = h[i+1:]
6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        i = b.find(".")
6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if is_HDN(h) and (i >= 0 or b == "local"):
6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return "."+b
6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return h
6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef is_third_party(request):
6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    RFC 2965, section 3.3.6:
6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        An unverifiable transaction is to a third-party host if its request-
6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        host U does not domain-match the reach R of the request-host O in the
6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        origin transaction.
6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    req_host = request_host(request)
7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not domain_match(req_host, reach(request.get_origin_req_host())):
7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else:
7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Cookie:
7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """HTTP Cookie.
7094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This class represents both Netscape and RFC 2965 cookies.
7114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    This is deliberately a very simple class.  It just holds attributes.  It's
7134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    possible to construct Cookie instances that don't comply with the cookie
7144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    standards.  CookieJar.make_cookies is the factory function for Cookie
7154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    objects -- it deals with cookie parsing, supplying defaults, and
7164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    normalising to the representation used in this class.  CookiePolicy is
7174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    responsible for checking them to see whether they should be accepted from
7184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    and returned to the server.
7194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Note that the port may be present in the headers, but unspecified ("Port"
7214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    rather than"Port=80", for example); if this is the case, port is None.
7224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
7244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, version, name, value,
7264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 port, port_specified,
7274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 domain, domain_specified, domain_initial_dot,
7284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 path, path_specified,
7294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 secure,
7304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 expires,
7314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 discard,
7324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 comment,
7334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 comment_url,
7344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 rest,
7354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 rfc2109=False,
7364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 ):
7374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if version is not None: version = int(version)
7394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if expires is not None: expires = int(expires)
7404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if port is None and port_specified is True:
7414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise ValueError("if port is None, port_specified must be false")
7424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.version = version
7444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.name = name
7454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.value = value
7464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.port = port
7474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.port_specified = port_specified
7484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # normalise case, as per RFC 2965 section 3.3.3
7494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.domain = domain.lower()
7504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.domain_specified = domain_specified
7514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Sigh.  We need to know whether the domain given in the
7524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # cookie-attribute had an initial dot, in order to follow RFC 2965
7534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # (as clarified in draft errata).  Needed for the returned $Domain
7544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # value.
7554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.domain_initial_dot = domain_initial_dot
7564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.path = path
7574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.path_specified = path_specified
7584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.secure = secure
7594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.expires = expires
7604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.discard = discard
7614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.comment = comment
7624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.comment_url = comment_url
7634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.rfc2109 = rfc2109
7644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._rest = copy.copy(rest)
7664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def has_nonstandard_attr(self, name):
7684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return name in self._rest
7694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_nonstandard_attr(self, name, default=None):
7704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._rest.get(name, default)
7714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_nonstandard_attr(self, name, value):
7724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._rest[name] = value
7734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def is_expired(self, now=None):
7754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if now is None: now = time.time()
7764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (self.expires is not None) and (self.expires <= now):
7774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return True
7784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
7794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __str__(self):
7814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.port is None: p = ""
7824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else: p = ":"+self.port
7834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        limit = self.domain + p + self.path
7844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.value is not None:
7854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            namevalue = "%s=%s" % (self.name, self.value)
7864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
7874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            namevalue = self.name
7884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "<Cookie %s for %s>" % (namevalue, limit)
7894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
7914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args = []
7924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for name in ("version", "name", "value",
7934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     "port", "port_specified",
7944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     "domain", "domain_specified", "domain_initial_dot",
7954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     "path", "path_specified",
7964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     "secure", "expires", "discard", "comment", "comment_url",
7974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     ):
7984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            attr = getattr(self, name)
7994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            args.append("%s=%s" % (name, repr(attr)))
8004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args.append("rest=%s" % repr(self._rest))
8014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        args.append("rfc2109=%s" % repr(self.rfc2109))
8024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "Cookie(%s)" % ", ".join(args)
8034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass CookiePolicy:
8064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Defines which cookies get accepted from and returned to server.
8074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    May also modify cookies, though this is probably a bad idea.
8094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    The subclass DefaultCookiePolicy defines the standard rules for Netscape
8114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    and RFC 2965 cookies -- override that if you want a customised policy.
8124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
8144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok(self, cookie, request):
8154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return true if (and only if) cookie should be accepted from server.
8164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Currently, pre-expired cookies never get this far -- the CookieJar
8184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        class deletes such cookies itself.
8194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError()
8224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok(self, cookie, request):
8244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return true if (and only if) cookie should be returned to server."""
8254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError()
8264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def domain_return_ok(self, domain, request):
8284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return false if cookies should not be returned, given cookie domain.
8294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
8314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def path_return_ok(self, path, request):
8334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return false if cookies should not be returned, given cookie path.
8344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
8354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
8364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass DefaultCookiePolicy(CookiePolicy):
8394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Implements the standard rules for accepting and returning cookies."""
8404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DomainStrictNoDots = 1
8424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DomainStrictNonDomain = 2
8434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DomainRFC2965Match = 4
8444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DomainLiberal = 0
8464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
8474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self,
8494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 blocked_domains=None, allowed_domains=None,
8504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 netscape=True, rfc2965=False,
8514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 rfc2109_as_netscape=None,
8524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 hide_cookie2=False,
8534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_domain=False,
8544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_rfc2965_unverifiable=True,
8554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_ns_unverifiable=False,
8564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_ns_domain=DomainLiberal,
8574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_ns_set_initial_dollar=False,
8584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 strict_ns_set_path=False,
8594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 ):
8604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Constructor arguments should be passed as keyword arguments only."""
8614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.netscape = netscape
8624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.rfc2965 = rfc2965
8634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.rfc2109_as_netscape = rfc2109_as_netscape
8644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.hide_cookie2 = hide_cookie2
8654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_domain = strict_domain
8664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
8674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_ns_unverifiable = strict_ns_unverifiable
8684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_ns_domain = strict_ns_domain
8694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
8704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.strict_ns_set_path = strict_ns_set_path
8714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if blocked_domains is not None:
8734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._blocked_domains = tuple(blocked_domains)
8744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
8754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._blocked_domains = ()
8764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if allowed_domains is not None:
8784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            allowed_domains = tuple(allowed_domains)
8794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._allowed_domains = allowed_domains
8804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def blocked_domains(self):
8824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return the sequence of blocked domains (as a tuple)."""
8834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._blocked_domains
8844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_blocked_domains(self, blocked_domains):
8854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Set the sequence of blocked domains."""
8864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._blocked_domains = tuple(blocked_domains)
8874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def is_blocked(self, domain):
8894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for blocked_domain in self._blocked_domains:
8904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if user_domain_match(domain, blocked_domain):
8914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return True
8924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return False
8934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
8944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def allowed_domains(self):
8954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return None, or the sequence of allowed domains (as a tuple)."""
8964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self._allowed_domains
8974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_allowed_domains(self, allowed_domains):
8984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Set the sequence of allowed domains, or None."""
8994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if allowed_domains is not None:
9004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            allowed_domains = tuple(allowed_domains)
9014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._allowed_domains = allowed_domains
9024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def is_not_allowed(self, domain):
9044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self._allowed_domains is None:
9054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for allowed_domain in self._allowed_domains:
9074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if user_domain_match(domain, allowed_domain):
9084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
9094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok(self, cookie, request):
9124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
9134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If you override .set_ok(), be sure to call this method.  If it returns
9144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        false, so should your subclass (assuming your subclass wants to be more
9154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        strict about which cookies to accept).
9164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
9184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
9194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        assert cookie.name is not None
9214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for n in "version", "verifiability", "name", "path", "domain", "port":
9234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fn_name = "set_ok_"+n
9244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fn = getattr(self, fn_name)
9254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not fn(cookie, request):
9264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
9274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_version(self, cookie, request):
9314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.version is None:
9324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Version is always set to 0 by parse_ns_headers if it's a Netscape
9334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # cookie, so this must be an invalid RFC 2965 cookie.
9344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   Set-Cookie2 without version attribute (%s=%s)",
9354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   cookie.name, cookie.value)
9364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.version > 0 and not self.rfc2965:
9384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   RFC 2965 cookies are switched off")
9394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif cookie.version == 0 and not self.netscape:
9414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   Netscape cookies are switched off")
9424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_verifiability(self, cookie, request):
9464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if request.is_unverifiable() and is_third_party(request):
9474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
9484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   third-party RFC 2965 cookie during "
9494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             "unverifiable transaction")
9504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
9514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif cookie.version == 0 and self.strict_ns_unverifiable:
9524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   third-party Netscape cookie during "
9534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             "unverifiable transaction")
9544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
9554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_name(self, cookie, request):
9584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Try and stop servers setting V0 cookies designed to hack other
9594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # servers that know both V0 and V1 protocols.
9604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
9614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookie.name.startswith("$")):
9624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   illegal name (starts with '$'): '%s'", cookie.name)
9634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_path(self, cookie, request):
9674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.path_specified:
9684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_path = request_path(request)
9694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ((cookie.version > 0 or
9704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                 (cookie.version == 0 and self.strict_ns_set_path)) and
9714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                not req_path.startswith(cookie.path)):
9724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   path attribute %s is not a prefix of request "
9734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "path %s", cookie.path, req_path)
9744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
9754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
9764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
9774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_domain(self, cookie, request):
9784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.is_blocked(cookie.domain):
9794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   domain %s is in user block-list", cookie.domain)
9804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.is_not_allowed(cookie.domain):
9824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   domain %s is not in user allow-list", cookie.domain)
9834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
9844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.domain_specified:
9854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_host, erhn = eff_request_host(request)
9864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            domain = cookie.domain
9874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.strict_domain and (domain.count(".") >= 2):
9884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # XXX This should probably be compared with the Konqueror
9894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # (kcookiejar.cpp) and Mozilla implementations, but it's a
9904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # losing battle.
9914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                i = domain.rfind(".")
9924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                j = domain.rfind(".", 0, i)
9934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if j == 0:  # domain like .foo.bar
9944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    tld = domain[i+1:]
9954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    sld = domain[j+1:i]
9964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
9974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "gov", "mil", "int", "aero", "biz", "cat", "coop",
9984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "info", "jobs", "mobi", "museum", "name", "pro",
9994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "travel", "eu") and len(tld) == 2:
10004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        # domain like .co.uk
10014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        _debug("   country-code second level domain %s", domain)
10024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        return False
10034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if domain.startswith("."):
10044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                undotted_domain = domain[1:]
10054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
10064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                undotted_domain = domain
10074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            embedded_dots = (undotted_domain.find(".") >= 0)
10084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not embedded_dots and domain != ".local":
10094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   non-local domain %s contains no embedded dot",
10104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       domain)
10114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
10124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.version == 0:
10134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (not erhn.endswith(domain) and
10144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    (not erhn.startswith(".") and
10154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                     not ("."+erhn).endswith(domain))):
10164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug("   effective request-host %s (even with added "
10174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           "initial dot) does not end end with %s",
10184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           erhn, domain)
10194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return False
10204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (cookie.version > 0 or
10214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                (self.strict_ns_domain & self.DomainRFC2965Match)):
10224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not domain_match(erhn, domain):
10234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug("   effective request-host %s does not domain-match "
10244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           "%s", erhn, domain)
10254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return False
10264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (cookie.version > 0 or
10274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                (self.strict_ns_domain & self.DomainStrictNoDots)):
10284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                host_prefix = req_host[:-len(domain)]
10294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (host_prefix.find(".") >= 0 and
10304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    not IPV4_RE.search(req_host)):
10314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug("   host prefix %s for domain %s contains a dot",
10324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           host_prefix, domain)
10334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return False
10344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
10354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_ok_port(self, cookie, request):
10374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.port_specified:
10384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_port = request_port(request)
10394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if req_port is None:
10404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                req_port = "80"
10414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
10424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                req_port = str(req_port)
10434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for p in cookie.port.split(","):
10444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                try:
10454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    int(p)
10464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                except ValueError:
10474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug("   bad port %s (not numeric)", p)
10484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return False
10494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if p == req_port:
10504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break
10514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
10524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   request port (%s) not found in %s",
10534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       req_port, cookie.port)
10544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
10554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
10564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok(self, cookie, request):
10584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
10594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        If you override .return_ok(), be sure to call this method.  If it
10604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        returns false, so should your subclass (assuming your subclass wants to
10614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        be more strict about which cookies to return).
10624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
10644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Path has already been checked by .path_return_ok(), and domain
10654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # blocking done by .domain_return_ok().
10664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
10674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for n in "version", "verifiability", "secure", "expires", "port", "domain":
10694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fn_name = "return_ok_"+n
10704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fn = getattr(self, fn_name)
10714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not fn(cookie, request):
10724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
10734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
10744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_version(self, cookie, request):
10764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.version > 0 and not self.rfc2965:
10774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   RFC 2965 cookies are switched off")
10784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
10794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif cookie.version == 0 and not self.netscape:
10804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   Netscape cookies are switched off")
10814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
10824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
10834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_verifiability(self, cookie, request):
10854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if request.is_unverifiable() and is_third_party(request):
10864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
10874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   third-party RFC 2965 cookie during unverifiable "
10884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "transaction")
10894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
10904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif cookie.version == 0 and self.strict_ns_unverifiable:
10914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   third-party Netscape cookie during unverifiable "
10924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "transaction")
10934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
10944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
10954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
10964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_secure(self, cookie, request):
10974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.secure and request.get_type() != "https":
10984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   secure cookie with non-secure request")
10994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_expires(self, cookie, request):
11034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.is_expired(self._now):
11044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   cookie expired")
11054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_port(self, cookie, request):
11094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.port:
11104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_port = request_port(request)
11114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if req_port is None:
11124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                req_port = "80"
11134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for p in cookie.port.split(","):
11144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if p == req_port:
11154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break
11164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
11174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   request port %s does not match cookie port %s",
11184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       req_port, cookie.port)
11194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
11204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def return_ok_domain(self, cookie, request):
11234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        req_host, erhn = eff_request_host(request)
11244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        domain = cookie.domain
11254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
11274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (cookie.version == 0 and
11284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (self.strict_ns_domain & self.DomainStrictNonDomain) and
11294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            not cookie.domain_specified and domain != erhn):
11304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   cookie with unspecified domain does not string-compare "
11314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   "equal to request domain")
11324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.version > 0 and not domain_match(erhn, domain):
11354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   effective request-host name %s does not domain-match "
11364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   "RFC 2965 cookie domain %s", erhn, domain)
11374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if cookie.version == 0 and not ("."+erhn).endswith(domain):
11394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   request-host %s does not match Netscape cookie domain "
11404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   "%s", req_host, domain)
11414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def domain_return_ok(self, domain, request):
11454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # Liberal check of.  This is here as an optimization to avoid
11464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # having to load lots of MSIE cookie files unless necessary.
11474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        req_host, erhn = eff_request_host(request)
11484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not req_host.startswith("."):
11494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_host = "."+req_host
11504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not erhn.startswith("."):
11514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            erhn = "."+erhn
11524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not (req_host.endswith(domain) or erhn.endswith(domain)):
11534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #_debug("   request domain %s does not match cookie domain %s",
11544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #       req_host, domain)
11554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.is_blocked(domain):
11584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   domain %s is in user block-list", domain)
11594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.is_not_allowed(domain):
11614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("   domain %s is not in user allow-list", domain)
11624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def path_return_ok(self, path, request):
11674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug("- checking cookie path=%s", path)
11684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        req_path = request_path(request)
11694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not req_path.startswith(path):
11704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("  %s does not path-match %s", req_path, path)
11714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
11724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
11734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef vals_sorted_by_key(adict):
11764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    keys = adict.keys()
11774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    keys.sort()
11784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return map(adict.get, keys)
11794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef deepvalues(mapping):
11814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Iterates over nested mapping, depth-first, in sorted order by key."""
11824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    values = vals_sorted_by_key(mapping)
11834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for obj in values:
11844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        mapping = False
11854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
11864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            obj.items
11874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except AttributeError:
11884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            pass
11894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
11904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            mapping = True
11914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for subobj in deepvalues(obj):
11924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yield subobj
11934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not mapping:
11944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yield obj
11954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
11974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Used as second parameter to dict.get() method, to distinguish absent
11984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# dict key from one with a None value.
11994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Absent: pass
12004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass CookieJar:
12024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Collection of HTTP cookies.
12034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    You may not need to know about this class: try
12054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    urllib2.build_opener(HTTPCookieProcessor).open(url).
12064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
12084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    non_word_re = re.compile(r"\W")
12104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    quote_re = re.compile(r"([\"\\])")
12114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    strict_domain_re = re.compile(r"\.?[^.]*")
12124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    domain_re = re.compile(r"[^.]*")
12134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    dots_re = re.compile(r"^\.+")
12144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
12164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, policy=None):
12184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if policy is None:
12194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            policy = DefaultCookiePolicy()
12204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._policy = policy
12214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock = _threading.RLock()
12234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies = {}
12244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_policy(self, policy):
12264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._policy = policy
12274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _cookies_for_domain(self, domain, request):
12294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookies = []
12304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if not self._policy.domain_return_ok(domain, request):
12314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return []
12324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug("Checking %s for cookies to return", domain)
12334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookies_by_path = self._cookies[domain]
12344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for path in cookies_by_path.keys():
12354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not self._policy.path_return_ok(path, request):
12364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue
12374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookies_by_name = cookies_by_path[path]
12384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cookie in cookies_by_name.values():
12394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not self._policy.return_ok(cookie, request):
12404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug("   not returning cookie")
12414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue
12424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _debug("   it's a match")
12434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                cookies.append(cookie)
12444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cookies
12454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _cookies_for_request(self, request):
12474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a list of cookies to be returned to server."""
12484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookies = []
12494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for domain in self._cookies.keys():
12504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookies.extend(self._cookies_for_domain(domain, request))
12514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cookies
12524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _cookie_attrs(self, cookies):
12544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return a list of cookie-attributes to be returned to server.
12554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        like ['foo="bar"; $Path="/"', ...]
12574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The $Version attribute is also added when appropriate (currently only
12594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        once per request).
12604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
12624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # add cookies in order of most specific (ie. longest) path first
12634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookies.sort(key=lambda arg: len(arg.path), reverse=True)
12644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        version_set = False
12664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrs = []
12684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie in cookies:
12694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # set version of Cookie header
12704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # XXX
12714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # What should it be if multiple matching Set-Cookie headers have
12724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #  different versions themselves?
12734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Answer: there is no answer; was supposed to be settled by
12744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #  RFC 2965 errata, but that may never appear...
12754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            version = cookie.version
12764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not version_set:
12774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                version_set = True
12784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if version > 0:
12794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    attrs.append("$Version=%s" % version)
12804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # quote cookie value if necessary
12824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # (not for Netscape protocol, which already has any quotes
12834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #  intact, due to the poorly-specified Netscape Cookie: syntax)
12844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ((cookie.value is not None) and
12854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.non_word_re.search(cookie.value) and version > 0):
12864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                value = self.quote_re.sub(r"\\\1", cookie.value)
12874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
12884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                value = cookie.value
12894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
12904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # add cookie-attributes to be returned in Cookie header
12914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.value is None:
12924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                attrs.append(cookie.name)
12934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
12944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                attrs.append("%s=%s" % (cookie.name, value))
12954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if version > 0:
12964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cookie.path_specified:
12974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    attrs.append('$Path="%s"' % cookie.path)
12984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cookie.domain.startswith("."):
12994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    domain = cookie.domain
13004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (not cookie.domain_initial_dot and
13014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        domain.startswith(".")):
13024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        domain = domain[1:]
13034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    attrs.append('$Domain="%s"' % domain)
13044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cookie.port is not None:
13054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    p = "$Port"
13064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if cookie.port_specified:
13074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        p = p + ('="%s"' % cookie.port)
13084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    attrs.append(p)
13094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return attrs
13114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def add_cookie_header(self, request):
13134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Add correct Cookie: header to request (urllib2.Request object).
13144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        The Cookie2 header is also added unless policy.hide_cookie2 is true.
13164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
13184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug("add_cookie_header")
13194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
13204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
13214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._policy._now = self._now = int(time.time())
13234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookies = self._cookies_for_request(request)
13254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            attrs = self._cookie_attrs(cookies)
13274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if attrs:
13284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if not request.has_header("Cookie"):
13294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    request.add_unredirected_header(
13304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        "Cookie", "; ".join(attrs))
13314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # if necessary, advertise that we know RFC 2965
13334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
13344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                not request.has_header("Cookie2")):
13354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for cookie in cookies:
13364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if cookie.version != 1:
13374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        request.add_unredirected_header("Cookie2", '$Version="1"')
13384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        break
13394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
13414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
13424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.clear_expired_cookies()
13444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _normalized_cookie_tuples(self, attrs_set):
13464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return list of tuples containing normalised cookie information.
13474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        attrs_set is the list of lists of key,value pairs extracted from
13494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        the Set-Cookie or Set-Cookie2 headers.
13504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Tuples are name, value, standard, rest, where name and value are the
13524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookie name and value, standard is a dictionary containing the standard
13534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookie-attributes (discard, secure, version, expires or max-age,
13544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        domain, path and port) and rest is a dictionary containing the rest of
13554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        the cookie-attributes.
13564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
13584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookie_tuples = []
13594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        boolean_attrs = "discard", "secure"
13614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        value_attrs = ("version",
13624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "expires", "max-age",
13634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "domain", "path", "port",
13644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       "comment", "commenturl")
13654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie_attrs in attrs_set:
13674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            name, value = cookie_attrs[0]
13684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Build dictionary of standard cookie-attributes (standard) and
13704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # dictionary of other cookie-attributes (rest).
13714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Note: expiry time is normalised to seconds since epoch.  V0
13734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # cookies should have the Expires cookie-attribute, and V1 cookies
13744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
13754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
13764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # accept either (but prefer Max-Age).
13774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            max_age_set = False
13784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            bad_cookie = False
13804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
13814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            standard = {}
13824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            rest = {}
13834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for k, v in cookie_attrs[1:]:
13844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                lc = k.lower()
13854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # don't lose case distinction for unknown fields
13864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if lc in value_attrs or lc in boolean_attrs:
13874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    k = lc
13884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k in boolean_attrs and v is None:
13894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # boolean cookie-attribute is present, but has no value
13904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # (like "discard", rather than "port=80")
13914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = True
13924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k in standard:
13934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # only first value is significant
13944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue
13954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k == "domain":
13964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if v is None:
13974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        _debug("   missing value for domain attribute")
13984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        bad_cookie = True
13994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        break
14004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # RFC 2965 section 3.3.3
14014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = v.lower()
14024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k == "expires":
14034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if max_age_set:
14044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        # Prefer max-age to expires (like Mozilla)
14054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        continue
14064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if v is None:
14074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        _debug("   missing or invalid value for expires "
14084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              "attribute: treating as session cookie")
14094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        continue
14104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if k == "max-age":
14114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    max_age_set = True
14124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    try:
14134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        v = int(v)
14144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    except ValueError:
14154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        _debug("   missing or invalid (non-numeric) value for "
14164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                              "max-age attribute")
14174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        bad_cookie = True
14184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        break
14194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # convert RFC 2965 Max-Age to seconds since epoch
14204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # XXX Strictly you're supposed to follow RFC 2616
14214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    #   age-calculation rules.  Remember that zero Max-Age is a
14224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    #   is a request to discard (old and new) cookie, though.
14234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    k = "expires"
14244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    v = self._now + v
14254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (k in value_attrs) or (k in boolean_attrs):
14264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (v is None and
14274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        k not in ("port", "comment", "commenturl")):
14284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        _debug("   missing value for %s attribute" % k)
14294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        bad_cookie = True
14304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        break
14314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    standard[k] = v
14324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
14334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    rest[k] = v
14344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if bad_cookie:
14364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue
14374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookie_tuples.append((name, value, standard, rest))
14394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cookie_tuples
14414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _cookie_from_cookie_tuple(self, tup, request):
14434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # standard is dict of standard cookie-attributes, rest is dict of the
14444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # rest of them
14454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        name, value, standard, rest = tup
14464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        domain = standard.get("domain", Absent)
14484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        path = standard.get("path", Absent)
14494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        port = standard.get("port", Absent)
14504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        expires = standard.get("expires", Absent)
14514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # set the easy defaults
14534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        version = standard.get("version", None)
14544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if version is not None:
14554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
14564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                version = int(version)
14574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except ValueError:
14584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return None  # invalid version, ignore cookie
14594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        secure = standard.get("secure", False)
14604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # (discard is also set if expires is Absent)
14614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        discard = standard.get("discard", False)
14624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        comment = standard.get("comment", None)
14634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        comment_url = standard.get("commenturl", None)
14644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # set default path
14664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if path is not Absent and path != "":
14674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path_specified = True
14684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = escape_path(path)
14694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
14704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path_specified = False
14714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            path = request_path(request)
14724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            i = path.rfind("/")
14734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if i != -1:
14744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if version == 0:
14754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # Netscape spec parts company from reality here
14764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    path = path[:i]
14774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                else:
14784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    path = path[:i+1]
14794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if len(path) == 0: path = "/"
14804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # set default domain
14824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        domain_specified = domain is not Absent
14834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # but first we have to remember whether it starts with a dot
14844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        domain_initial_dot = False
14854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if domain_specified:
14864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            domain_initial_dot = bool(domain.startswith("."))
14874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if domain is Absent:
14884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            req_host, erhn = eff_request_host(request)
14894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            domain = erhn
14904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif not domain.startswith("."):
14914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            domain = "."+domain
14924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
14934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # set default port
14944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        port_specified = False
14954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if port is not Absent:
14964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if port is None:
14974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # Port attr present, but has no value: default to request port.
14984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # Cookie should then only be sent back on that port.
14994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                port = request_port(request)
15004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
15014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                port_specified = True
15024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                port = re.sub(r"\s+", "", port)
15034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
15044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # No port attr present.  Cookie can be sent back on any port.
15054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            port = None
15064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # set default expires and discard
15084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if expires is Absent:
15094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            expires = None
15104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            discard = True
15114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif expires <= self._now:
15124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Expiry date in past is request to delete cookie.  This can't be
15134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # in DefaultCookiePolicy, because can't delete cookies there.
15144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
15154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.clear(domain, path, name)
15164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except KeyError:
15174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                pass
15184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
15194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   domain, path, name)
15204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return None
15214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return Cookie(version,
15234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      name, value,
15244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      port, port_specified,
15254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      domain, domain_specified, domain_initial_dot,
15264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      path, path_specified,
15274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      secure,
15284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      expires,
15294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      discard,
15304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      comment,
15314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      comment_url,
15324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                      rest)
15334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _cookies_from_attrs_set(self, attrs_set, request):
15354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
15364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cookies = []
15384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for tup in cookie_tuples:
15394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookie = self._cookie_from_cookie_tuple(tup, request)
15404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie: cookies.append(cookie)
15414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cookies
15424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def _process_rfc2109_cookies(self, cookies):
15444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
15454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if rfc2109_as_ns is None:
15464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            rfc2109_as_ns = not self._policy.rfc2965
15474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie in cookies:
15484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.version == 1:
15494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                cookie.rfc2109 = True
15504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if rfc2109_as_ns:
15514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # treat 2109 cookies as Netscape cookies rather than
15524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    # as RFC2965 cookies
15534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    cookie.version = 0
15544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def make_cookies(self, response, request):
15564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return sequence of Cookie objects extracted from response object."""
15574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # get cookie-attributes for RFC 2965 and Netscape protocols
15584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        headers = response.info()
15594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
15604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ns_hdrs = headers.getheaders("Set-Cookie")
15614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        rfc2965 = self._policy.rfc2965
15634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        netscape = self._policy.netscape
15644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if ((not rfc2965_hdrs and not ns_hdrs) or
15664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (not ns_hdrs and not rfc2965) or
15674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (not rfc2965_hdrs and not netscape) or
15684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            (not netscape and not rfc2965)):
15694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return []  # no relevant cookie headers: quick exit
15704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
15724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookies = self._cookies_from_attrs_set(
15734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                split_header_words(rfc2965_hdrs), request)
15744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        except Exception:
15754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            _warn_unhandled_exception()
15764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            cookies = []
15774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if ns_hdrs and netscape:
15794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
15804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                # RFC 2109 and Netscape cookies
15814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ns_cookies = self._cookies_from_attrs_set(
15824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    parse_ns_headers(ns_hdrs), request)
15834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except Exception:
15844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                _warn_unhandled_exception()
15854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ns_cookies = []
15864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._process_rfc2109_cookies(ns_cookies)
15874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Look for Netscape cookies (from Set-Cookie headers) that match
15894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
15904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # For each match, keep the RFC 2965 cookie and ignore the Netscape
15914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
15924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # bundled in with the Netscape cookies for this purpose, which is
15934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # reasonable behaviour.
15944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if rfc2965:
15954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                lookup = {}
15964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for cookie in cookies:
15974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
15984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
15994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                def no_matching_rfc2965(ns_cookie, lookup=lookup):
16004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
16014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return key not in lookup
16024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
16034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ns_cookies:
16054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                cookies.extend(ns_cookies)
16064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return cookies
16084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_cookie_if_ok(self, cookie, request):
16104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Set a cookie if policy says it's OK to do so."""
16114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
16124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._policy._now = self._now = int(time.time())
16144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self._policy.set_ok(cookie, request):
16164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.set_cookie(cookie)
16174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
16204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
16214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def set_cookie(self, cookie):
16234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Set a cookie, without checking whether or not it should be set."""
16244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        c = self._cookies
16254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
16264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.domain not in c: c[cookie.domain] = {}
16284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            c2 = c[cookie.domain]
16294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if cookie.path not in c2: c2[cookie.path] = {}
16304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            c3 = c2[cookie.path]
16314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            c3[cookie.name] = cookie
16324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
16334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
16344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def extract_cookies(self, response, request):
16364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Extract cookies from response, where allowable given the request."""
16374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _debug("extract_cookies: %s", response.info())
16384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
16394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._policy._now = self._now = int(time.time())
16414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cookie in self.make_cookies(response, request):
16434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if self._policy.set_ok(cookie, request):
16444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    _debug(" setting cookie: %s", cookie)
16454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.set_cookie(cookie)
16464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
16474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
16484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clear(self, domain=None, path=None, name=None):
16504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Clear some cookies.
16514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Invoking this method without arguments will clear all cookies.  If
16534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        given a single argument, only cookies belonging to that domain will be
16544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        removed.  If given two arguments, cookies belonging to the specified
16554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        path within that domain are removed.  If given three arguments, then
16564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        the cookie with the specified name, path and domain is removed.
16574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Raises KeyError if no matching cookie exists.
16594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
16614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if name is not None:
16624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (domain is None) or (path is None):
16634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ValueError(
16644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    "domain and path must be given to remove a cookie by name")
16654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del self._cookies[domain][path][name]
16664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif path is not None:
16674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if domain is None:
16684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ValueError(
16694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    "domain must be given to remove cookies by path")
16704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del self._cookies[domain][path]
16714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif domain is not None:
16724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del self._cookies[domain]
16734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
16744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies = {}
16754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clear_session_cookies(self):
16774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Discard all session cookies.
16784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Note that the .save() method won't save session cookies anyway, unless
16804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        you ask otherwise by passing a true ignore_discard argument.
16814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
16834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
16844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
16854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cookie in self:
16864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cookie.discard:
16874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.clear(cookie.domain, cookie.path, cookie.name)
16884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
16894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
16904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def clear_expired_cookies(self):
16924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Discard all expired cookies.
16934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
16944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        You probably don't need to call this method: expired cookies are never
16954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sent back to the server (provided you're using DefaultCookiePolicy),
16964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        this method is called by CookieJar itself every so often, and the
16974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        .save() method won't save expired cookies anyway (unless you ask
16984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        otherwise by passing a true ignore_expires argument).
16994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
17014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
17024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
17034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            now = time.time()
17044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for cookie in self:
17054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if cookie.is_expired(now):
17064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    self.clear(cookie.domain, cookie.path, cookie.name)
17074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
17084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
17094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __iter__(self):
17114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return deepvalues(self._cookies)
17124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __len__(self):
17144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return number of contained cookies."""
17154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        i = 0
17164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie in self: i = i + 1
17174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return i
17184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __repr__(self):
17204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        r = []
17214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie in self: r.append(repr(cookie))
17224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "<%s[%s]>" % (self.__class__, ", ".join(r))
17234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __str__(self):
17254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        r = []
17264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for cookie in self: r.append(str(cookie))
17274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return "<%s[%s]>" % (self.__class__, ", ".join(r))
17284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# derives from IOError for backwards-compatibility with Python 2.4.0
17314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass LoadError(IOError): pass
17324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass FileCookieJar(CookieJar):
17344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """CookieJar that can be loaded from and saved to a file."""
17354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, filename=None, delayload=False, policy=None):
17374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
17384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Cookies are NOT loaded from the named file until either the .load() or
17394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        .revert() method is called.
17404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
17424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        CookieJar.__init__(self, policy)
17434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if filename is not None:
17444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
17454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                filename+""
17464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except:
17474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise ValueError("filename must be string-like")
17484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.filename = filename
17494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.delayload = bool(delayload)
17504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
17524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Save cookies to a file."""
17534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise NotImplementedError()
17544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
17564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Load cookies from a file."""
17574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if filename is None:
17584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.filename is not None: filename = self.filename
17594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: raise ValueError(MISSING_FILENAME_TEXT)
17604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        f = open(filename)
17624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
17634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._really_load(f, filename, ignore_discard, ignore_expires)
17644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
17654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            f.close()
17664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def revert(self, filename=None,
17684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm               ignore_discard=False, ignore_expires=False):
17694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Clear all cookies and reload cookies from a saved file.
17704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Raises LoadError (or IOError) if reversion is not successful; the
17724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        object's state will not be altered if this happens.
17734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """
17754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if filename is None:
17764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.filename is not None: filename = self.filename
17774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: raise ValueError(MISSING_FILENAME_TEXT)
17784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self._cookies_lock.acquire()
17804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        try:
17814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            old_state = copy.deepcopy(self._cookies)
17834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies = {}
17844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
17854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.load(filename, ignore_discard, ignore_expires)
17864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except (LoadError, IOError):
17874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self._cookies = old_state
17884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise
17894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        finally:
17914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self._cookies_lock.release()
17924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
17934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _LWPCookieJar import LWPCookieJar, lwp_cookie_str
17944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _MozillaCookieJar import MozillaCookieJar
1795