1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3
4"""
5Creates a human-readable identifier, using numbers and digits,
6avoiding ambiguous numbers and letters.  hash_identifier can be used
7to create compact representations that are unique for a certain string
8(or concatenation of strings)
9"""
10
11try:
12    from hashlib import md5
13except ImportError:
14    from md5 import md5
15
16import six
17
18good_characters = "23456789abcdefghjkmnpqrtuvwxyz"
19
20base = len(good_characters)
21
22def make_identifier(number):
23    """
24    Encodes a number as an identifier.
25    """
26    if not isinstance(number, six.integer_types):
27        raise ValueError(
28            "You can only make identifiers out of integers (not %r)"
29            % number)
30    if number < 0:
31        raise ValueError(
32            "You cannot make identifiers out of negative numbers: %r"
33            % number)
34    result = []
35    while number:
36        next = number % base
37        result.append(good_characters[next])
38        # Note, this depends on integer rounding of results:
39        number = number // base
40    return ''.join(result)
41
42def hash_identifier(s, length, pad=True, hasher=md5, prefix='',
43                    group=None, upper=False):
44    """
45    Hashes the string (with the given hashing module), then turns that
46    hash into an identifier of the given length (using modulo to
47    reduce the length of the identifier).  If ``pad`` is False, then
48    the minimum-length identifier will be used; otherwise the
49    identifier will be padded with 0's as necessary.
50
51    ``prefix`` will be added last, and does not count towards the
52    target length.  ``group`` will group the characters with ``-`` in
53    the given lengths, and also does not count towards the target
54    length.  E.g., ``group=4`` will cause a identifier like
55    ``a5f3-hgk3-asdf``.  Grouping occurs before the prefix.
56    """
57    if not callable(hasher):
58        # Accept sha/md5 modules as well as callables
59        hasher = hasher.new
60    if length > 26 and hasher is md5:
61        raise ValueError(
62            "md5 cannot create hashes longer than 26 characters in "
63            "length (you gave %s)" % length)
64    if isinstance(s, six.text_type):
65        s = s.encode('utf-8')
66    elif not isinstance(s, six.binary_type):
67        s = str(s)
68        if six.PY3:
69            s = s.encode('utf-8')
70    h = hasher(s)
71    bin_hash = h.digest()
72    modulo = base ** length
73    number = 0
74    for c in list(bin_hash):
75        number = (number * 256 + six.byte2int([c])) % modulo
76    ident = make_identifier(number)
77    if pad:
78        ident = good_characters[0]*(length-len(ident)) + ident
79    if group:
80        parts = []
81        while ident:
82            parts.insert(0, ident[-group:])
83            ident = ident[:-group]
84        ident = '-'.join(parts)
85    if upper:
86        ident = ident.upper()
87    return prefix + ident
88
89# doctest tests:
90__test__ = {
91    'make_identifier': """
92    >>> make_identifier(0)
93    ''
94    >>> make_identifier(1000)
95    'c53'
96    >>> make_identifier(-100)
97    Traceback (most recent call last):
98        ...
99    ValueError: You cannot make identifiers out of negative numbers: -100
100    >>> make_identifier('test')
101    Traceback (most recent call last):
102        ...
103    ValueError: You can only make identifiers out of integers (not 'test')
104    >>> make_identifier(1000000000000)
105    'c53x9rqh3'
106    """,
107    'hash_identifier': """
108    >>> hash_identifier(0, 5)
109    'cy2dr'
110    >>> hash_identifier(0, 10)
111    'cy2dr6rg46'
112    >>> hash_identifier('this is a test of a long string', 5)
113    'awatu'
114    >>> hash_identifier(0, 26)
115    'cy2dr6rg46cx8t4w2f3nfexzk4'
116    >>> hash_identifier(0, 30)
117    Traceback (most recent call last):
118        ...
119    ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30)
120    >>> hash_identifier(0, 10, group=4)
121    'cy-2dr6-rg46'
122    >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-')
123    'M-CY-2DR6-RG46'
124    """}
125
126if __name__ == '__main__':
127    import doctest
128    doctest.testmod()
129
130