1__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
7
8from _collections import deque, defaultdict
9from operator import itemgetter as _itemgetter
10from keyword import iskeyword as _iskeyword
11import sys as _sys
12import heapq as _heapq
13from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
15try:
16    from thread import get_ident as _get_ident
17except ImportError:
18    from dummy_thread import get_ident as _get_ident
19
20
21################################################################################
22### OrderedDict
23################################################################################
24
25class OrderedDict(dict):
26    'Dictionary that remembers insertion order'
27    # An inherited dict maps keys to values.
28    # The inherited dict provides __getitem__, __len__, __contains__, and get.
29    # The remaining methods are order-aware.
30    # Big-O running times for all methods are the same as regular dictionaries.
31
32    # The internal self.__map dict maps keys to links in a doubly linked list.
33    # The circular doubly linked list starts and ends with a sentinel element.
34    # The sentinel element never gets deleted (this simplifies the algorithm).
35    # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
36
37    def __init__(self, *args, **kwds):
38        '''Initialize an ordered dictionary.  The signature is the same as
39        regular dictionaries, but keyword arguments are not recommended because
40        their insertion order is arbitrary.
41
42        '''
43        if len(args) > 1:
44            raise TypeError('expected at most 1 arguments, got %d' % len(args))
45        try:
46            self.__root
47        except AttributeError:
48            self.__root = root = []                     # sentinel node
49            root[:] = [root, root, None]
50            self.__map = {}
51        self.__update(*args, **kwds)
52
53    def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
54        'od.__setitem__(i, y) <==> od[i]=y'
55        # Setting a new item creates a new link at the end of the linked list,
56        # and the inherited dictionary is updated with the new key/value pair.
57        if key not in self:
58            root = self.__root
59            last = root[PREV]
60            last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
61        dict_setitem(self, key, value)
62
63    def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
64        'od.__delitem__(y) <==> del od[y]'
65        # Deleting an existing item uses self.__map to find the link which gets
66        # removed by updating the links in the predecessor and successor nodes.
67        dict_delitem(self, key)
68        link_prev, link_next, key = self.__map.pop(key)
69        link_prev[NEXT] = link_next
70        link_next[PREV] = link_prev
71
72    def __iter__(self):
73        'od.__iter__() <==> iter(od)'
74        # Traverse the linked list in order.
75        NEXT, KEY = 1, 2
76        root = self.__root
77        curr = root[NEXT]
78        while curr is not root:
79            yield curr[KEY]
80            curr = curr[NEXT]
81
82    def __reversed__(self):
83        'od.__reversed__() <==> reversed(od)'
84        # Traverse the linked list in reverse order.
85        PREV, KEY = 0, 2
86        root = self.__root
87        curr = root[PREV]
88        while curr is not root:
89            yield curr[KEY]
90            curr = curr[PREV]
91
92    def clear(self):
93        'od.clear() -> None.  Remove all items from od.'
94        for node in self.__map.itervalues():
95            del node[:]
96        root = self.__root
97        root[:] = [root, root, None]
98        self.__map.clear()
99        dict.clear(self)
100
101    # -- the following methods do not depend on the internal structure --
102
103    def keys(self):
104        'od.keys() -> list of keys in od'
105        return list(self)
106
107    def values(self):
108        'od.values() -> list of values in od'
109        return [self[key] for key in self]
110
111    def items(self):
112        'od.items() -> list of (key, value) pairs in od'
113        return [(key, self[key]) for key in self]
114
115    def iterkeys(self):
116        'od.iterkeys() -> an iterator over the keys in od'
117        return iter(self)
118
119    def itervalues(self):
120        'od.itervalues -> an iterator over the values in od'
121        for k in self:
122            yield self[k]
123
124    def iteritems(self):
125        'od.iteritems -> an iterator over the (key, value) pairs in od'
126        for k in self:
127            yield (k, self[k])
128
129    update = MutableMapping.update
130
131    __update = update # let subclasses override update without breaking __init__
132
133    __marker = object()
134
135    def pop(self, key, default=__marker):
136        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
137        value.  If key is not found, d is returned if given, otherwise KeyError
138        is raised.
139
140        '''
141        if key in self:
142            result = self[key]
143            del self[key]
144            return result
145        if default is self.__marker:
146            raise KeyError(key)
147        return default
148
149    def setdefault(self, key, default=None):
150        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
151        if key in self:
152            return self[key]
153        self[key] = default
154        return default
155
156    def popitem(self, last=True):
157        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
158        Pairs are returned in LIFO order if last is true or FIFO order if false.
159
160        '''
161        if not self:
162            raise KeyError('dictionary is empty')
163        key = next(reversed(self) if last else iter(self))
164        value = self.pop(key)
165        return key, value
166
167    def __repr__(self, _repr_running={}):
168        'od.__repr__() <==> repr(od)'
169        call_key = id(self), _get_ident()
170        if call_key in _repr_running:
171            return '...'
172        _repr_running[call_key] = 1
173        try:
174            if not self:
175                return '%s()' % (self.__class__.__name__,)
176            return '%s(%r)' % (self.__class__.__name__, self.items())
177        finally:
178            del _repr_running[call_key]
179
180    def __reduce__(self):
181        'Return state information for pickling'
182        items = [[k, self[k]] for k in self]
183        inst_dict = vars(self).copy()
184        for k in vars(OrderedDict()):
185            inst_dict.pop(k, None)
186        if inst_dict:
187            return (self.__class__, (items,), inst_dict)
188        return self.__class__, (items,)
189
190    def copy(self):
191        'od.copy() -> a shallow copy of od'
192        return self.__class__(self)
193
194    @classmethod
195    def fromkeys(cls, iterable, value=None):
196        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
197        If not specified, the value defaults to None.
198
199        '''
200        self = cls()
201        for key in iterable:
202            self[key] = value
203        return self
204
205    def __eq__(self, other):
206        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
207        while comparison to a regular mapping is order-insensitive.
208
209        '''
210        if isinstance(other, OrderedDict):
211            return len(self)==len(other) and self.items() == other.items()
212        return dict.__eq__(self, other)
213
214    def __ne__(self, other):
215        'od.__ne__(y) <==> od!=y'
216        return not self == other
217
218    # -- the following methods support python 3.x style dictionary views --
219
220    def viewkeys(self):
221        "od.viewkeys() -> a set-like object providing a view on od's keys"
222        return KeysView(self)
223
224    def viewvalues(self):
225        "od.viewvalues() -> an object providing a view on od's values"
226        return ValuesView(self)
227
228    def viewitems(self):
229        "od.viewitems() -> a set-like object providing a view on od's items"
230        return ItemsView(self)
231
232
233################################################################################
234### namedtuple
235################################################################################
236
237def namedtuple(typename, field_names, verbose=False, rename=False):
238    """Returns a new subclass of tuple with named fields.
239
240    >>> Point = namedtuple('Point', 'x y')
241    >>> Point.__doc__                   # docstring for the new class
242    'Point(x, y)'
243    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
244    >>> p[0] + p[1]                     # indexable like a plain tuple
245    33
246    >>> x, y = p                        # unpack like a regular tuple
247    >>> x, y
248    (11, 22)
249    >>> p.x + p.y                       # fields also accessable by name
250    33
251    >>> d = p._asdict()                 # convert to a dictionary
252    >>> d['x']
253    11
254    >>> Point(**d)                      # convert from a dictionary
255    Point(x=11, y=22)
256    >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
257    Point(x=100, y=22)
258
259    """
260
261    # Parse and validate the field names.  Validation serves two purposes,
262    # generating informative error messages and preventing template injection attacks.
263    if isinstance(field_names, basestring):
264        field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
265    field_names = tuple(map(str, field_names))
266    if rename:
267        names = list(field_names)
268        seen = set()
269        for i, name in enumerate(names):
270            if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
271                or not name or name[0].isdigit() or name.startswith('_')
272                or name in seen):
273                names[i] = '_%d' % i
274            seen.add(name)
275        field_names = tuple(names)
276    for name in (typename,) + field_names:
277        if not all(c.isalnum() or c=='_' for c in name):
278            raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
279        if _iskeyword(name):
280            raise ValueError('Type names and field names cannot be a keyword: %r' % name)
281        if name[0].isdigit():
282            raise ValueError('Type names and field names cannot start with a number: %r' % name)
283    seen_names = set()
284    for name in field_names:
285        if name.startswith('_') and not rename:
286            raise ValueError('Field names cannot start with an underscore: %r' % name)
287        if name in seen_names:
288            raise ValueError('Encountered duplicate field name: %r' % name)
289        seen_names.add(name)
290
291    # Create and fill-in the class template
292    numfields = len(field_names)
293    argtxt = repr(field_names).replace("'", "")[1:-1]   # tuple repr without parens or quotes
294    reprtxt = ', '.join('%s=%%r' % name for name in field_names)
295    template = '''class %(typename)s(tuple):
296        '%(typename)s(%(argtxt)s)' \n
297        __slots__ = () \n
298        _fields = %(field_names)r \n
299        def __new__(_cls, %(argtxt)s):
300            'Create new instance of %(typename)s(%(argtxt)s)'
301            return _tuple.__new__(_cls, (%(argtxt)s)) \n
302        @classmethod
303        def _make(cls, iterable, new=tuple.__new__, len=len):
304            'Make a new %(typename)s object from a sequence or iterable'
305            result = new(cls, iterable)
306            if len(result) != %(numfields)d:
307                raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
308            return result \n
309        def __repr__(self):
310            'Return a nicely formatted representation string'
311            return '%(typename)s(%(reprtxt)s)' %% self \n
312        def _asdict(self):
313            'Return a new OrderedDict which maps field names to their values'
314            return OrderedDict(zip(self._fields, self)) \n
315        def _replace(_self, **kwds):
316            'Return a new %(typename)s object replacing specified fields with new values'
317            result = _self._make(map(kwds.pop, %(field_names)r, _self))
318            if kwds:
319                raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
320            return result \n
321        def __getnewargs__(self):
322            'Return self as a plain tuple.  Used by copy and pickle.'
323            return tuple(self) \n\n''' % locals()
324    for i, name in enumerate(field_names):
325        template += "        %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
326    if verbose:
327        print template
328
329    # Execute the template string in a temporary namespace and
330    # support tracing utilities by setting a value for frame.f_globals['__name__']
331    namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
332                     OrderedDict=OrderedDict, _property=property, _tuple=tuple)
333    try:
334        exec template in namespace
335    except SyntaxError, e:
336        raise SyntaxError(e.message + ':\n' + template)
337    result = namespace[typename]
338
339    # For pickling to work, the __module__ variable needs to be set to the frame
340    # where the named tuple is created.  Bypass this step in enviroments where
341    # sys._getframe is not defined (Jython for example) or sys._getframe is not
342    # defined for arguments greater than 0 (IronPython).
343    try:
344        result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
345    except (AttributeError, ValueError):
346        pass
347
348    return result
349
350
351########################################################################
352###  Counter
353########################################################################
354
355class Counter(dict):
356    '''Dict subclass for counting hashable items.  Sometimes called a bag
357    or multiset.  Elements are stored as dictionary keys and their counts
358    are stored as dictionary values.
359
360    >>> c = Counter('abcdeabcdabcaba')  # count elements from a string
361
362    >>> c.most_common(3)                # three most common elements
363    [('a', 5), ('b', 4), ('c', 3)]
364    >>> sorted(c)                       # list all unique elements
365    ['a', 'b', 'c', 'd', 'e']
366    >>> ''.join(sorted(c.elements()))   # list elements with repetitions
367    'aaaaabbbbcccdde'
368    >>> sum(c.values())                 # total of all counts
369    15
370
371    >>> c['a']                          # count of letter 'a'
372    5
373    >>> for elem in 'shazam':           # update counts from an iterable
374    ...     c[elem] += 1                # by adding 1 to each element's count
375    >>> c['a']                          # now there are seven 'a'
376    7
377    >>> del c['b']                      # remove all 'b'
378    >>> c['b']                          # now there are zero 'b'
379    0
380
381    >>> d = Counter('simsalabim')       # make another counter
382    >>> c.update(d)                     # add in the second counter
383    >>> c['a']                          # now there are nine 'a'
384    9
385
386    >>> c.clear()                       # empty the counter
387    >>> c
388    Counter()
389
390    Note:  If a count is set to zero or reduced to zero, it will remain
391    in the counter until the entry is deleted or the counter is cleared:
392
393    >>> c = Counter('aaabbc')
394    >>> c['b'] -= 2                     # reduce the count of 'b' by two
395    >>> c.most_common()                 # 'b' is still in, but its count is zero
396    [('a', 3), ('c', 1), ('b', 0)]
397
398    '''
399    # References:
400    #   http://en.wikipedia.org/wiki/Multiset
401    #   http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
402    #   http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
403    #   http://code.activestate.com/recipes/259174/
404    #   Knuth, TAOCP Vol. II section 4.6.3
405
406    def __init__(self, iterable=None, **kwds):
407        '''Create a new, empty Counter object.  And if given, count elements
408        from an input iterable.  Or, initialize the count from another mapping
409        of elements to their counts.
410
411        >>> c = Counter()                           # a new, empty counter
412        >>> c = Counter('gallahad')                 # a new counter from an iterable
413        >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping
414        >>> c = Counter(a=4, b=2)                   # a new counter from keyword args
415
416        '''
417        super(Counter, self).__init__()
418        self.update(iterable, **kwds)
419
420    def __missing__(self, key):
421        'The count of elements not in the Counter is zero.'
422        # Needed so that self[missing_item] does not raise KeyError
423        return 0
424
425    def most_common(self, n=None):
426        '''List the n most common elements and their counts from the most
427        common to the least.  If n is None, then list all element counts.
428
429        >>> Counter('abcdeabcdabcaba').most_common(3)
430        [('a', 5), ('b', 4), ('c', 3)]
431
432        '''
433        # Emulate Bag.sortedByCount from Smalltalk
434        if n is None:
435            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
436        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
437
438    def elements(self):
439        '''Iterator over elements repeating each as many times as its count.
440
441        >>> c = Counter('ABCABC')
442        >>> sorted(c.elements())
443        ['A', 'A', 'B', 'B', 'C', 'C']
444
445        # Knuth's example for prime factors of 1836:  2**2 * 3**3 * 17**1
446        >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
447        >>> product = 1
448        >>> for factor in prime_factors.elements():     # loop over factors
449        ...     product *= factor                       # and multiply them
450        >>> product
451        1836
452
453        Note, if an element's count has been set to zero or is a negative
454        number, elements() will ignore it.
455
456        '''
457        # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
458        return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
459
460    # Override dict methods where necessary
461
462    @classmethod
463    def fromkeys(cls, iterable, v=None):
464        # There is no equivalent method for counters because setting v=1
465        # means that no element can have a count greater than one.
466        raise NotImplementedError(
467            'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')
468
469    def update(self, iterable=None, **kwds):
470        '''Like dict.update() but add counts instead of replacing them.
471
472        Source can be an iterable, a dictionary, or another Counter instance.
473
474        >>> c = Counter('which')
475        >>> c.update('witch')           # add elements from another iterable
476        >>> d = Counter('watch')
477        >>> c.update(d)                 # add elements from another counter
478        >>> c['h']                      # four 'h' in which, witch, and watch
479        4
480
481        '''
482        # The regular dict.update() operation makes no sense here because the
483        # replace behavior results in the some of original untouched counts
484        # being mixed-in with all of the other counts for a mismash that
485        # doesn't have a straight-forward interpretation in most counting
486        # contexts.  Instead, we implement straight-addition.  Both the inputs
487        # and outputs are allowed to contain zero and negative counts.
488
489        if iterable is not None:
490            if isinstance(iterable, Mapping):
491                if self:
492                    self_get = self.get
493                    for elem, count in iterable.iteritems():
494                        self[elem] = self_get(elem, 0) + count
495                else:
496                    super(Counter, self).update(iterable) # fast path when counter is empty
497            else:
498                self_get = self.get
499                for elem in iterable:
500                    self[elem] = self_get(elem, 0) + 1
501        if kwds:
502            self.update(kwds)
503
504    def subtract(self, iterable=None, **kwds):
505        '''Like dict.update() but subtracts counts instead of replacing them.
506        Counts can be reduced below zero.  Both the inputs and outputs are
507        allowed to contain zero and negative counts.
508
509        Source can be an iterable, a dictionary, or another Counter instance.
510
511        >>> c = Counter('which')
512        >>> c.subtract('witch')             # subtract elements from another iterable
513        >>> c.subtract(Counter('watch'))    # subtract elements from another counter
514        >>> c['h']                          # 2 in which, minus 1 in witch, minus 1 in watch
515        0
516        >>> c['w']                          # 1 in which, minus 1 in witch, minus 1 in watch
517        -1
518
519        '''
520        if iterable is not None:
521            self_get = self.get
522            if isinstance(iterable, Mapping):
523                for elem, count in iterable.items():
524                    self[elem] = self_get(elem, 0) - count
525            else:
526                for elem in iterable:
527                    self[elem] = self_get(elem, 0) - 1
528        if kwds:
529            self.subtract(kwds)
530
531    def copy(self):
532        'Return a shallow copy.'
533        return self.__class__(self)
534
535    def __reduce__(self):
536        return self.__class__, (dict(self),)
537
538    def __delitem__(self, elem):
539        'Like dict.__delitem__() but does not raise KeyError for missing values.'
540        if elem in self:
541            super(Counter, self).__delitem__(elem)
542
543    def __repr__(self):
544        if not self:
545            return '%s()' % self.__class__.__name__
546        items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
547        return '%s({%s})' % (self.__class__.__name__, items)
548
549    # Multiset-style mathematical operations discussed in:
550    #       Knuth TAOCP Volume II section 4.6.3 exercise 19
551    #       and at http://en.wikipedia.org/wiki/Multiset
552    #
553    # Outputs guaranteed to only include positive counts.
554    #
555    # To strip negative and zero counts, add-in an empty counter:
556    #       c += Counter()
557
558    def __add__(self, other):
559        '''Add counts from two counters.
560
561        >>> Counter('abbb') + Counter('bcc')
562        Counter({'b': 4, 'c': 2, 'a': 1})
563
564        '''
565        if not isinstance(other, Counter):
566            return NotImplemented
567        result = Counter()
568        for elem, count in self.items():
569            newcount = count + other[elem]
570            if newcount > 0:
571                result[elem] = newcount
572        for elem, count in other.items():
573            if elem not in self and count > 0:
574                result[elem] = count
575        return result
576
577    def __sub__(self, other):
578        ''' Subtract count, but keep only results with positive counts.
579
580        >>> Counter('abbbc') - Counter('bccd')
581        Counter({'b': 2, 'a': 1})
582
583        '''
584        if not isinstance(other, Counter):
585            return NotImplemented
586        result = Counter()
587        for elem, count in self.items():
588            newcount = count - other[elem]
589            if newcount > 0:
590                result[elem] = newcount
591        for elem, count in other.items():
592            if elem not in self and count < 0:
593                result[elem] = 0 - count
594        return result
595
596    def __or__(self, other):
597        '''Union is the maximum of value in either of the input counters.
598
599        >>> Counter('abbb') | Counter('bcc')
600        Counter({'b': 3, 'c': 2, 'a': 1})
601
602        '''
603        if not isinstance(other, Counter):
604            return NotImplemented
605        result = Counter()
606        for elem, count in self.items():
607            other_count = other[elem]
608            newcount = other_count if count < other_count else count
609            if newcount > 0:
610                result[elem] = newcount
611        for elem, count in other.items():
612            if elem not in self and count > 0:
613                result[elem] = count
614        return result
615
616    def __and__(self, other):
617        ''' Intersection is the minimum of corresponding counts.
618
619        >>> Counter('abbb') & Counter('bcc')
620        Counter({'b': 1})
621
622        '''
623        if not isinstance(other, Counter):
624            return NotImplemented
625        result = Counter()
626        for elem, count in self.items():
627            other_count = other[elem]
628            newcount = count if count < other_count else other_count
629            if newcount > 0:
630                result[elem] = newcount
631        return result
632
633
634if __name__ == '__main__':
635    # verify that instances can be pickled
636    from cPickle import loads, dumps
637    Point = namedtuple('Point', 'x, y', True)
638    p = Point(x=10, y=20)
639    assert p == loads(dumps(p))
640
641    # test and demonstrate ability to override methods
642    class Point(namedtuple('Point', 'x y')):
643        __slots__ = ()
644        @property
645        def hypot(self):
646            return (self.x ** 2 + self.y ** 2) ** 0.5
647        def __str__(self):
648            return 'Point: x=%6.3f  y=%6.3f  hypot=%6.3f' % (self.x, self.y, self.hypot)
649
650    for p in Point(3, 4), Point(14, 5/7.):
651        print p
652
653    class Point(namedtuple('Point', 'x y')):
654        'Point class with optimized _make() and _replace() without error-checking'
655        __slots__ = ()
656        _make = classmethod(tuple.__new__)
657        def _replace(self, _map=map, **kwds):
658            return self._make(_map(kwds.get, ('x', 'y'), self))
659
660    print Point(11, 22)._replace(x=100)
661
662    Point3D = namedtuple('Point3D', Point._fields + ('z',))
663    print Point3D.__doc__
664
665    import doctest
666    TestResults = namedtuple('TestResults', 'failed attempted')
667    print TestResults(*doctest.testmod())
668