quoting.py revision b2cbf1594f8d6e4ba32d384cf379f62a74ed7654
1b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 4b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport cgi 5b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport six 6b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport re 7b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom six.moves import html_entities 8b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom six.moves.urllib.parse import quote, unquote 9b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 10b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 11b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik__all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote', 12b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'strip_html'] 13b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 14b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdefault_encoding = 'UTF-8' 15b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 16b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef html_quote(v, encoding=None): 17b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik r""" 18b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Quote the value (turned to a string) as HTML. This quotes <, >, 19b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik and quotes: 20b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 21b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik encoding = encoding or default_encoding 22b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if v is None: 23b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return '' 24b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik elif isinstance(v, six.binary_type): 25b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return cgi.escape(v, 1) 26b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik elif isinstance(v, six.text_type): 27b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if six.PY3: 28b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return cgi.escape(v, 1) 29b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik else: 30b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return cgi.escape(v.encode(encoding), 1) 31b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik else: 32b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if six.PY3: 33b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return cgi.escape(six.text_type(v), 1) 34b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik else: 35b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return cgi.escape(six.text_type(v).encode(encoding), 1) 36b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 37b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_unquote_re = re.compile(r'&([a-zA-Z]+);') 38b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _entity_subber(match, name2c=html_entities.name2codepoint): 39b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik code = name2c.get(match.group(1)) 40b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if code: 41b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return six.unichr(code) 42b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik else: 43b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return match.group(0) 44b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 45b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef html_unquote(s, encoding=None): 46b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik r""" 47b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Decode the value. 48b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 49b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 50b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if isinstance(s, six.binary_type): 51b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = s.decode(encoding or default_encoding) 52b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return _unquote_re.sub(_entity_subber, s) 53b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 54b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef strip_html(s): 55b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik # should this use html_unquote? 56b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = re.sub('<.*?>', '', s) 57b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = html_unquote(s) 58b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return s 59b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 60b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef no_quote(s): 61b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 62b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Quoting that doesn't do anything 63b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 64b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return s 65b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 66b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_comment_quote_re = re.compile(r'\-\s*\>') 67b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Everything but \r, \n, \t: 68b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_bad_chars_re = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]') 69b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef comment_quote(s): 70b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 71b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Quote that makes sure text can't escape a comment 72b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 73b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik comment = str(s) 74b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik #comment = _bad_chars_re.sub('', comment) 75b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik #print('in ', repr(str(s))) 76b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik #print('out', repr(comment)) 77b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik comment = _comment_quote_re.sub('->', comment) 78b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return comment 79b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 80b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikurl_quote = quote 81b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikurl_unquote = unquote 82b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 83b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikif __name__ == '__main__': 84b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik import doctest 85b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik doctest.testmod() 86