1b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
4b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport cgi
5b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport six
6b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport re
7b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom six.moves import html_entities
8b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom six.moves.urllib.parse import quote, unquote
9b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
10b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
11b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik__all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote',
12b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik           'strip_html']
13b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
14b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdefault_encoding = 'UTF-8'
15b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
16b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef html_quote(v, encoding=None):
17b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    r"""
18b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Quote the value (turned to a string) as HTML.  This quotes <, >,
19b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    and quotes:
20b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
21b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    encoding = encoding or default_encoding
22b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if v is None:
23b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        return ''
24b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif isinstance(v, six.binary_type):
25b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        return cgi.escape(v, 1)
26b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    elif isinstance(v, six.text_type):
27b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if six.PY3:
28b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            return cgi.escape(v, 1)
29b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        else:
30b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            return cgi.escape(v.encode(encoding), 1)
31b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
32b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        if six.PY3:
33b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            return cgi.escape(six.text_type(v), 1)
34b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        else:
35b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik            return cgi.escape(six.text_type(v).encode(encoding), 1)
36b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
37b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_unquote_re = re.compile(r'&([a-zA-Z]+);')
38b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _entity_subber(match, name2c=html_entities.name2codepoint):
39b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    code = name2c.get(match.group(1))
40b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if code:
41b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        return six.unichr(code)
42b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    else:
43b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        return match.group(0)
44b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
45b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef html_unquote(s, encoding=None):
46b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    r"""
47b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Decode the value.
48b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
49b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
50b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    if isinstance(s, six.binary_type):
51b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik        s = s.decode(encoding or default_encoding)
52b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return _unquote_re.sub(_entity_subber, s)
53b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
54b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef strip_html(s):
55b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    # should this use html_unquote?
56b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    s = re.sub('<.*?>', '', s)
57b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    s = html_unquote(s)
58b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return s
59b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
60b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef no_quote(s):
61b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
62b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Quoting that doesn't do anything
63b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
64b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return s
65b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
66b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_comment_quote_re = re.compile(r'\-\s*\>')
67b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Everything but \r, \n, \t:
68b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_bad_chars_re = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
69b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef comment_quote(s):
70b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
71b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    Quote that makes sure text can't escape a comment
72b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    """
73b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    comment = str(s)
74b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    #comment = _bad_chars_re.sub('', comment)
75b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    #print('in ', repr(str(s)))
76b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    #print('out', repr(comment))
77b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    comment = _comment_quote_re.sub('-&gt;', comment)
78b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    return comment
79b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
80b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikurl_quote = quote
81b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikurl_unquote = unquote
82b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik
83b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikif __name__ == '__main__':
84b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    import doctest
85b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik    doctest.testmod()
86