1b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 4b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik""" 5b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris CraikCreates a human-readable identifier, using numbers and digits, 6b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikavoiding ambiguous numbers and letters. hash_identifier can be used 7b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikto create compact representations that are unique for a certain string 8b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik(or concatenation of strings) 9b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik""" 10b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 11b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craiktry: 12b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik from hashlib import md5 13b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikexcept ImportError: 14b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik from md5 import md5 15b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 16b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport six 17b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 18b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikgood_characters = "23456789abcdefghjkmnpqrtuvwxyz" 19b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 20b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikbase = len(good_characters) 21b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 22b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef make_identifier(number): 23b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 24b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Encodes a number as an identifier. 25b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 26b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not isinstance(number, six.integer_types): 27b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError( 28b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik "You can only make identifiers out of integers (not %r)" 29b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik % number) 30b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if number < 0: 31b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError( 32b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik "You cannot make identifiers out of negative numbers: %r" 33b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik % number) 34b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik result = [] 35b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik while number: 36b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik next = number % base 37b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik result.append(good_characters[next]) 38b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik # Note, this depends on integer rounding of results: 39b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik number = number // base 40b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return ''.join(result) 41b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 42b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef hash_identifier(s, length, pad=True, hasher=md5, prefix='', 43b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik group=None, upper=False): 44b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 45b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Hashes the string (with the given hashing module), then turns that 46b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik hash into an identifier of the given length (using modulo to 47b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik reduce the length of the identifier). If ``pad`` is False, then 48b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik the minimum-length identifier will be used; otherwise the 49b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik identifier will be padded with 0's as necessary. 50b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 51b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ``prefix`` will be added last, and does not count towards the 52b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik target length. ``group`` will group the characters with ``-`` in 53b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik the given lengths, and also does not count towards the target 54b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik length. E.g., ``group=4`` will cause a identifier like 55b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ``a5f3-hgk3-asdf``. Grouping occurs before the prefix. 56b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 57b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not callable(hasher): 58b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik # Accept sha/md5 modules as well as callables 59b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik hasher = hasher.new 60b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if length > 26 and hasher is md5: 61b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError( 62b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik "md5 cannot create hashes longer than 26 characters in " 63b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik "length (you gave %s)" % length) 64b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if isinstance(s, six.text_type): 65b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = s.encode('utf-8') 66b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik elif not isinstance(s, six.binary_type): 67b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = str(s) 68b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if six.PY3: 69b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik s = s.encode('utf-8') 70b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik h = hasher(s) 71b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik bin_hash = h.digest() 72b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik modulo = base ** length 73b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik number = 0 74b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik for c in list(bin_hash): 75b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik number = (number * 256 + six.byte2int([c])) % modulo 76b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ident = make_identifier(number) 77b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if pad: 78b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ident = good_characters[0]*(length-len(ident)) + ident 79b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if group: 80b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik parts = [] 81b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik while ident: 82b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik parts.insert(0, ident[-group:]) 83b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ident = ident[:-group] 84b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ident = '-'.join(parts) 85b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if upper: 86b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ident = ident.upper() 87b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return prefix + ident 88b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 89b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# doctest tests: 90b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik__test__ = { 91b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'make_identifier': """ 92b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> make_identifier(0) 93b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik '' 94b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> make_identifier(1000) 95b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'c53' 96b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> make_identifier(-100) 97b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Traceback (most recent call last): 98b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ... 99b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: You cannot make identifiers out of negative numbers: -100 100b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> make_identifier('test') 101b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Traceback (most recent call last): 102b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ... 103b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: You can only make identifiers out of integers (not 'test') 104b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> make_identifier(1000000000000) 105b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'c53x9rqh3' 106b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """, 107b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'hash_identifier': """ 108b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 5) 109b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'cy2dr' 110b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 10) 111b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'cy2dr6rg46' 112b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier('this is a test of a long string', 5) 113b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'awatu' 114b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 26) 115b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'cy2dr6rg46cx8t4w2f3nfexzk4' 116b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 30) 117b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Traceback (most recent call last): 118b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ... 119b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30) 120b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 10, group=4) 121b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'cy-2dr6-rg46' 122b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-') 123b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'M-CY-2DR6-RG46' 124b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """} 125b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 126b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikif __name__ == '__main__': 127b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik import doctest 128b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik doctest.testmod() 129b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 130