1"""Create portable serialized representations of Python objects.
2
3See module cPickle for a (much) faster implementation.
4See module copy_reg for a mechanism for registering custom picklers.
5See module pickletools source for extensive comments.
6
7Classes:
8
9    Pickler
10    Unpickler
11
12Functions:
13
14    dump(object, file)
15    dumps(object) -> string
16    load(file) -> object
17    loads(string) -> object
18
19Misc variables:
20
21    __version__
22    format_version
23    compatible_formats
24
25"""
26
27__version__ = "$Revision$"       # Code version
28
29from types import *
30from copy_reg import dispatch_table
31from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32import marshal
33import sys
34import struct
35import re
36
37__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38           "Unpickler", "dump", "dumps", "load", "loads"]
39
40# These are purely informational; no code uses these.
41format_version = "2.0"                  # File format version we write
42compatible_formats = ["1.0",            # Original protocol 0
43                      "1.1",            # Protocol 0 with INST added
44                      "1.2",            # Original protocol 1
45                      "1.3",            # Protocol 1 with BINFLOAT added
46                      "2.0",            # Protocol 2
47                      ]                 # Old format versions we can read
48
49# Keep in synch with cPickle.  This is the highest protocol number we
50# know how to read.
51HIGHEST_PROTOCOL = 2
52
53# Why use struct.pack() for pickling but marshal.loads() for
54# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
55# marshal.loads() is twice as fast as struct.unpack()!
56mloads = marshal.loads
57
58class PickleError(Exception):
59    """A common base class for the other pickling exceptions."""
60    pass
61
62class PicklingError(PickleError):
63    """This exception is raised when an unpicklable object is passed to the
64    dump() method.
65
66    """
67    pass
68
69class UnpicklingError(PickleError):
70    """This exception is raised when there is a problem unpickling an object,
71    such as a security violation.
72
73    Note that other exceptions may also be raised during unpickling, including
74    (but not necessarily limited to) AttributeError, EOFError, ImportError,
75    and IndexError.
76
77    """
78    pass
79
80# An instance of _Stop is raised by Unpickler.load_stop() in response to
81# the STOP opcode, passing the object that is the result of unpickling.
82class _Stop(Exception):
83    def __init__(self, value):
84        self.value = value
85
86# Jython has PyStringMap; it's a dict subclass with string keys
87try:
88    from org.python.core import PyStringMap
89except ImportError:
90    PyStringMap = None
91
92# UnicodeType may or may not be exported (normally imported from types)
93try:
94    UnicodeType
95except NameError:
96    UnicodeType = None
97
98# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
99# here is in kind-of alphabetical order of 1-character pickle code.
100# pickletools groups them by purpose.
101
102MARK            = '('   # push special markobject on stack
103STOP            = '.'   # every pickle ends with STOP
104POP             = '0'   # discard topmost stack item
105POP_MARK        = '1'   # discard stack top through topmost markobject
106DUP             = '2'   # duplicate top stack item
107FLOAT           = 'F'   # push float object; decimal string argument
108INT             = 'I'   # push integer or bool; decimal string argument
109BININT          = 'J'   # push four-byte signed int
110BININT1         = 'K'   # push 1-byte unsigned int
111LONG            = 'L'   # push long; decimal string argument
112BININT2         = 'M'   # push 2-byte unsigned int
113NONE            = 'N'   # push None
114PERSID          = 'P'   # push persistent object; id is taken from string arg
115BINPERSID       = 'Q'   #  "       "         "  ;  "  "   "     "  stack
116REDUCE          = 'R'   # apply callable to argtuple, both on stack
117STRING          = 'S'   # push string; NL-terminated string argument
118BINSTRING       = 'T'   # push string; counted binary string argument
119SHORT_BINSTRING = 'U'   #  "     "   ;    "      "       "      " < 256 bytes
120UNICODE         = 'V'   # push Unicode string; raw-unicode-escaped'd argument
121BINUNICODE      = 'X'   #   "     "       "  ; counted UTF-8 string argument
122APPEND          = 'a'   # append stack top to list below it
123BUILD           = 'b'   # call __setstate__ or __dict__.update()
124GLOBAL          = 'c'   # push self.find_class(modname, name); 2 string args
125DICT            = 'd'   # build a dict from stack items
126EMPTY_DICT      = '}'   # push empty dict
127APPENDS         = 'e'   # extend list on stack by topmost stack slice
128GET             = 'g'   # push item from memo on stack; index is string arg
129BINGET          = 'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
130INST            = 'i'   # build & push class instance
131LONG_BINGET     = 'j'   # push item from memo on stack; index is 4-byte arg
132LIST            = 'l'   # build list from topmost stack items
133EMPTY_LIST      = ']'   # push empty list
134OBJ             = 'o'   # build & push class instance
135PUT             = 'p'   # store stack top in memo; index is string arg
136BINPUT          = 'q'   #   "     "    "   "   " ;   "    " 1-byte arg
137LONG_BINPUT     = 'r'   #   "     "    "   "   " ;   "    " 4-byte arg
138SETITEM         = 's'   # add key+value pair to dict
139TUPLE           = 't'   # build tuple from topmost stack items
140EMPTY_TUPLE     = ')'   # push empty tuple
141SETITEMS        = 'u'   # modify dict by adding topmost key+value pairs
142BINFLOAT        = 'G'   # push float; arg is 8-byte float encoding
143
144TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
145FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
146
147# Protocol 2
148
149PROTO           = '\x80'  # identify pickle protocol
150NEWOBJ          = '\x81'  # build object by applying cls.__new__ to argtuple
151EXT1            = '\x82'  # push object from extension registry; 1-byte index
152EXT2            = '\x83'  # ditto, but 2-byte index
153EXT4            = '\x84'  # ditto, but 4-byte index
154TUPLE1          = '\x85'  # build 1-tuple from stack top
155TUPLE2          = '\x86'  # build 2-tuple from two topmost stack items
156TUPLE3          = '\x87'  # build 3-tuple from three topmost stack items
157NEWTRUE         = '\x88'  # push True
158NEWFALSE        = '\x89'  # push False
159LONG1           = '\x8a'  # push long from < 256 bytes
160LONG4           = '\x8b'  # push really big long
161
162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164
165__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166del x
167
168
169# Pickling machinery
170
171class Pickler:
172
173    def __init__(self, file, protocol=None):
174        """This takes a file-like object for writing a pickle data stream.
175
176        The optional protocol argument tells the pickler to use the
177        given protocol; supported protocols are 0, 1, 2.  The default
178        protocol is 0, to be backwards compatible.  (Protocol 0 is the
179        only protocol that can be written to a file opened in text
180        mode and read back successfully.  When using a protocol higher
181        than 0, make sure the file is opened in binary mode, both when
182        pickling and unpickling.)
183
184        Protocol 1 is more efficient than protocol 0; protocol 2 is
185        more efficient than protocol 1.
186
187        Specifying a negative protocol version selects the highest
188        protocol version supported.  The higher the protocol used, the
189        more recent the version of Python needed to read the pickle
190        produced.
191
192        The file parameter must have a write() method that accepts a single
193        string argument.  It can thus be an open file object, a StringIO
194        object, or any other custom object that meets this interface.
195
196        """
197        if protocol is None:
198            protocol = 0
199        if protocol < 0:
200            protocol = HIGHEST_PROTOCOL
201        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203        self.write = file.write
204        self.memo = {}
205        self.proto = int(protocol)
206        self.bin = protocol >= 1
207        self.fast = 0
208
209    def clear_memo(self):
210        """Clears the pickler's "memo".
211
212        The memo is the data structure that remembers which objects the
213        pickler has already seen, so that shared or recursive objects are
214        pickled by reference and not by value.  This method is useful when
215        re-using picklers.
216
217        """
218        self.memo.clear()
219
220    def dump(self, obj):
221        """Write a pickled representation of obj to the open file."""
222        if self.proto >= 2:
223            self.write(PROTO + chr(self.proto))
224        self.save(obj)
225        self.write(STOP)
226
227    def memoize(self, obj):
228        """Store an object in the memo."""
229
230        # The Pickler memo is a dictionary mapping object ids to 2-tuples
231        # that contain the Unpickler memo key and the object being memoized.
232        # The memo key is written to the pickle and will become
233        # the key in the Unpickler's memo.  The object is stored in the
234        # Pickler memo so that transient objects are kept alive during
235        # pickling.
236
237        # The use of the Unpickler memo length as the memo key is just a
238        # convention.  The only requirement is that the memo values be unique.
239        # But there appears no advantage to any other scheme, and this
240        # scheme allows the Unpickler memo to be implemented as a plain (but
241        # growable) array, indexed by memo key.
242        if self.fast:
243            return
244        assert id(obj) not in self.memo
245        memo_len = len(self.memo)
246        self.write(self.put(memo_len))
247        self.memo[id(obj)] = memo_len, obj
248
249    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250    def put(self, i, pack=struct.pack):
251        if self.bin:
252            if i < 256:
253                return BINPUT + chr(i)
254            else:
255                return LONG_BINPUT + pack("<i", i)
256
257        return PUT + repr(i) + '\n'
258
259    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260    def get(self, i, pack=struct.pack):
261        if self.bin:
262            if i < 256:
263                return BINGET + chr(i)
264            else:
265                return LONG_BINGET + pack("<i", i)
266
267        return GET + repr(i) + '\n'
268
269    def save(self, obj):
270        # Check for persistent id (defined by a subclass)
271        pid = self.persistent_id(obj)
272        if pid:
273            self.save_pers(pid)
274            return
275
276        # Check the memo
277        x = self.memo.get(id(obj))
278        if x:
279            self.write(self.get(x[0]))
280            return
281
282        # Check the type dispatch table
283        t = type(obj)
284        f = self.dispatch.get(t)
285        if f:
286            f(self, obj) # Call unbound method with explicit self
287            return
288
289        # Check for a class with a custom metaclass; treat as regular class
290        try:
291            issc = issubclass(t, TypeType)
292        except TypeError: # t is not a class (old Boost; see SF #502085)
293            issc = 0
294        if issc:
295            self.save_global(obj)
296            return
297
298        # Check copy_reg.dispatch_table
299        reduce = dispatch_table.get(t)
300        if reduce:
301            rv = reduce(obj)
302        else:
303            # Check for a __reduce_ex__ method, fall back to __reduce__
304            reduce = getattr(obj, "__reduce_ex__", None)
305            if reduce:
306                rv = reduce(self.proto)
307            else:
308                reduce = getattr(obj, "__reduce__", None)
309                if reduce:
310                    rv = reduce()
311                else:
312                    raise PicklingError("Can't pickle %r object: %r" %
313                                        (t.__name__, obj))
314
315        # Check for string returned by reduce(), meaning "save as global"
316        if type(rv) is StringType:
317            self.save_global(obj, rv)
318            return
319
320        # Assert that reduce() returned a tuple
321        if type(rv) is not TupleType:
322            raise PicklingError("%s must return string or tuple" % reduce)
323
324        # Assert that it returned an appropriately sized tuple
325        l = len(rv)
326        if not (2 <= l <= 5):
327            raise PicklingError("Tuple returned by %s must have "
328                                "two to five elements" % reduce)
329
330        # Save the reduce() output and finally memoize the object
331        self.save_reduce(obj=obj, *rv)
332
333    def persistent_id(self, obj):
334        # This exists so a subclass can override it
335        return None
336
337    def save_pers(self, pid):
338        # Save a persistent id reference
339        if self.bin:
340            self.save(pid)
341            self.write(BINPERSID)
342        else:
343            self.write(PERSID + str(pid) + '\n')
344
345    def save_reduce(self, func, args, state=None,
346                    listitems=None, dictitems=None, obj=None):
347        # This API is called by some subclasses
348
349        # Assert that args is a tuple or None
350        if not isinstance(args, TupleType):
351            raise PicklingError("args from reduce() should be a tuple")
352
353        # Assert that func is callable
354        if not hasattr(func, '__call__'):
355            raise PicklingError("func from reduce should be callable")
356
357        save = self.save
358        write = self.write
359
360        # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361        if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362            # A __reduce__ implementation can direct protocol 2 to
363            # use the more efficient NEWOBJ opcode, while still
364            # allowing protocol 0 and 1 to work normally.  For this to
365            # work, the function returned by __reduce__ should be
366            # called __newobj__, and its first argument should be a
367            # new-style class.  The implementation for __newobj__
368            # should be as follows, although pickle has no way to
369            # verify this:
370            #
371            # def __newobj__(cls, *args):
372            #     return cls.__new__(cls, *args)
373            #
374            # Protocols 0 and 1 will pickle a reference to __newobj__,
375            # while protocol 2 (and above) will pickle a reference to
376            # cls, the remaining args tuple, and the NEWOBJ code,
377            # which calls cls.__new__(cls, *args) at unpickling time
378            # (see load_newobj below).  If __reduce__ returns a
379            # three-tuple, the state from the third tuple item will be
380            # pickled regardless of the protocol, calling __setstate__
381            # at unpickling time (see load_build below).
382            #
383            # Note that no standard __newobj__ implementation exists;
384            # you have to provide your own.  This is to enforce
385            # compatibility with Python 2.2 (pickles written using
386            # protocol 0 or 1 in Python 2.3 should be unpicklable by
387            # Python 2.2).
388            cls = args[0]
389            if not hasattr(cls, "__new__"):
390                raise PicklingError(
391                    "args[0] from __newobj__ args has no __new__")
392            if obj is not None and cls is not obj.__class__:
393                raise PicklingError(
394                    "args[0] from __newobj__ args has the wrong class")
395            args = args[1:]
396            save(cls)
397            save(args)
398            write(NEWOBJ)
399        else:
400            save(func)
401            save(args)
402            write(REDUCE)
403
404        if obj is not None:
405            self.memoize(obj)
406
407        # More new special cases (that work with older protocols as
408        # well): when __reduce__ returns a tuple with 4 or 5 items,
409        # the 4th and 5th item should be iterators that provide list
410        # items and dict items (as (key, value) tuples), or None.
411
412        if listitems is not None:
413            self._batch_appends(listitems)
414
415        if dictitems is not None:
416            self._batch_setitems(dictitems)
417
418        if state is not None:
419            save(state)
420            write(BUILD)
421
422    # Methods below this point are dispatched through the dispatch table
423
424    dispatch = {}
425
426    def save_none(self, obj):
427        self.write(NONE)
428    dispatch[NoneType] = save_none
429
430    def save_bool(self, obj):
431        if self.proto >= 2:
432            self.write(obj and NEWTRUE or NEWFALSE)
433        else:
434            self.write(obj and TRUE or FALSE)
435    dispatch[bool] = save_bool
436
437    def save_int(self, obj, pack=struct.pack):
438        if self.bin:
439            # If the int is small enough to fit in a signed 4-byte 2's-comp
440            # format, we can store it more efficiently than the general
441            # case.
442            # First one- and two-byte unsigned ints:
443            if obj >= 0:
444                if obj <= 0xff:
445                    self.write(BININT1 + chr(obj))
446                    return
447                if obj <= 0xffff:
448                    self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
449                    return
450            # Next check for 4-byte signed ints:
451            high_bits = obj >> 31  # note that Python shift sign-extends
452            if high_bits == 0 or high_bits == -1:
453                # All high bits are copies of bit 2**31, so the value
454                # fits in a 4-byte signed int.
455                self.write(BININT + pack("<i", obj))
456                return
457        # Text pickle, or int too big to fit in signed 4-byte format.
458        self.write(INT + repr(obj) + '\n')
459    dispatch[IntType] = save_int
460
461    def save_long(self, obj, pack=struct.pack):
462        if self.proto >= 2:
463            bytes = encode_long(obj)
464            n = len(bytes)
465            if n < 256:
466                self.write(LONG1 + chr(n) + bytes)
467            else:
468                self.write(LONG4 + pack("<i", n) + bytes)
469            return
470        self.write(LONG + repr(obj) + '\n')
471    dispatch[LongType] = save_long
472
473    def save_float(self, obj, pack=struct.pack):
474        if self.bin:
475            self.write(BINFLOAT + pack('>d', obj))
476        else:
477            self.write(FLOAT + repr(obj) + '\n')
478    dispatch[FloatType] = save_float
479
480    def save_string(self, obj, pack=struct.pack):
481        if self.bin:
482            n = len(obj)
483            if n < 256:
484                self.write(SHORT_BINSTRING + chr(n) + obj)
485            else:
486                self.write(BINSTRING + pack("<i", n) + obj)
487        else:
488            self.write(STRING + repr(obj) + '\n')
489        self.memoize(obj)
490    dispatch[StringType] = save_string
491
492    def save_unicode(self, obj, pack=struct.pack):
493        if self.bin:
494            encoding = obj.encode('utf-8')
495            n = len(encoding)
496            self.write(BINUNICODE + pack("<i", n) + encoding)
497        else:
498            obj = obj.replace("\\", "\\u005c")
499            obj = obj.replace("\n", "\\u000a")
500            self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
501        self.memoize(obj)
502    dispatch[UnicodeType] = save_unicode
503
504    if StringType is UnicodeType:
505        # This is true for Jython
506        def save_string(self, obj, pack=struct.pack):
507            unicode = obj.isunicode()
508
509            if self.bin:
510                if unicode:
511                    obj = obj.encode("utf-8")
512                l = len(obj)
513                if l < 256 and not unicode:
514                    self.write(SHORT_BINSTRING + chr(l) + obj)
515                else:
516                    s = pack("<i", l)
517                    if unicode:
518                        self.write(BINUNICODE + s + obj)
519                    else:
520                        self.write(BINSTRING + s + obj)
521            else:
522                if unicode:
523                    obj = obj.replace("\\", "\\u005c")
524                    obj = obj.replace("\n", "\\u000a")
525                    obj = obj.encode('raw-unicode-escape')
526                    self.write(UNICODE + obj + '\n')
527                else:
528                    self.write(STRING + repr(obj) + '\n')
529            self.memoize(obj)
530        dispatch[StringType] = save_string
531
532    def save_tuple(self, obj):
533        write = self.write
534        proto = self.proto
535
536        n = len(obj)
537        if n == 0:
538            if proto:
539                write(EMPTY_TUPLE)
540            else:
541                write(MARK + TUPLE)
542            return
543
544        save = self.save
545        memo = self.memo
546        if n <= 3 and proto >= 2:
547            for element in obj:
548                save(element)
549            # Subtle.  Same as in the big comment below.
550            if id(obj) in memo:
551                get = self.get(memo[id(obj)][0])
552                write(POP * n + get)
553            else:
554                write(_tuplesize2code[n])
555                self.memoize(obj)
556            return
557
558        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
559        # has more than 3 elements.
560        write(MARK)
561        for element in obj:
562            save(element)
563
564        if id(obj) in memo:
565            # Subtle.  d was not in memo when we entered save_tuple(), so
566            # the process of saving the tuple's elements must have saved
567            # the tuple itself:  the tuple is recursive.  The proper action
568            # now is to throw away everything we put on the stack, and
569            # simply GET the tuple (it's already constructed).  This check
570            # could have been done in the "for element" loop instead, but
571            # recursive tuples are a rare thing.
572            get = self.get(memo[id(obj)][0])
573            if proto:
574                write(POP_MARK + get)
575            else:   # proto 0 -- POP_MARK not available
576                write(POP * (n+1) + get)
577            return
578
579        # No recursion.
580        self.write(TUPLE)
581        self.memoize(obj)
582
583    dispatch[TupleType] = save_tuple
584
585    # save_empty_tuple() isn't used by anything in Python 2.3.  However, I
586    # found a Pickler subclass in Zope3 that calls it, so it's not harmless
587    # to remove it.
588    def save_empty_tuple(self, obj):
589        self.write(EMPTY_TUPLE)
590
591    def save_list(self, obj):
592        write = self.write
593
594        if self.bin:
595            write(EMPTY_LIST)
596        else:   # proto 0 -- can't use EMPTY_LIST
597            write(MARK + LIST)
598
599        self.memoize(obj)
600        self._batch_appends(iter(obj))
601
602    dispatch[ListType] = save_list
603
604    # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets
605    # out of synch, though.
606    _BATCHSIZE = 1000
607
608    def _batch_appends(self, items):
609        # Helper to batch up APPENDS sequences
610        save = self.save
611        write = self.write
612
613        if not self.bin:
614            for x in items:
615                save(x)
616                write(APPEND)
617            return
618
619        r = xrange(self._BATCHSIZE)
620        while items is not None:
621            tmp = []
622            for i in r:
623                try:
624                    x = items.next()
625                    tmp.append(x)
626                except StopIteration:
627                    items = None
628                    break
629            n = len(tmp)
630            if n > 1:
631                write(MARK)
632                for x in tmp:
633                    save(x)
634                write(APPENDS)
635            elif n:
636                save(tmp[0])
637                write(APPEND)
638            # else tmp is empty, and we're done
639
640    def save_dict(self, obj):
641        write = self.write
642
643        if self.bin:
644            write(EMPTY_DICT)
645        else:   # proto 0 -- can't use EMPTY_DICT
646            write(MARK + DICT)
647
648        self.memoize(obj)
649        self._batch_setitems(obj.iteritems())
650
651    dispatch[DictionaryType] = save_dict
652    if not PyStringMap is None:
653        dispatch[PyStringMap] = save_dict
654
655    def _batch_setitems(self, items):
656        # Helper to batch up SETITEMS sequences; proto >= 1 only
657        save = self.save
658        write = self.write
659
660        if not self.bin:
661            for k, v in items:
662                save(k)
663                save(v)
664                write(SETITEM)
665            return
666
667        r = xrange(self._BATCHSIZE)
668        while items is not None:
669            tmp = []
670            for i in r:
671                try:
672                    tmp.append(items.next())
673                except StopIteration:
674                    items = None
675                    break
676            n = len(tmp)
677            if n > 1:
678                write(MARK)
679                for k, v in tmp:
680                    save(k)
681                    save(v)
682                write(SETITEMS)
683            elif n:
684                k, v = tmp[0]
685                save(k)
686                save(v)
687                write(SETITEM)
688            # else tmp is empty, and we're done
689
690    def save_inst(self, obj):
691        cls = obj.__class__
692
693        memo  = self.memo
694        write = self.write
695        save  = self.save
696
697        if hasattr(obj, '__getinitargs__'):
698            args = obj.__getinitargs__()
699            len(args) # XXX Assert it's a sequence
700            _keep_alive(args, memo)
701        else:
702            args = ()
703
704        write(MARK)
705
706        if self.bin:
707            save(cls)
708            for arg in args:
709                save(arg)
710            write(OBJ)
711        else:
712            for arg in args:
713                save(arg)
714            write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
715
716        self.memoize(obj)
717
718        try:
719            getstate = obj.__getstate__
720        except AttributeError:
721            stuff = obj.__dict__
722        else:
723            stuff = getstate()
724            _keep_alive(stuff, memo)
725        save(stuff)
726        write(BUILD)
727
728    dispatch[InstanceType] = save_inst
729
730    def save_global(self, obj, name=None, pack=struct.pack):
731        write = self.write
732        memo = self.memo
733
734        if name is None:
735            name = obj.__name__
736
737        module = getattr(obj, "__module__", None)
738        if module is None:
739            module = whichmodule(obj, name)
740
741        try:
742            __import__(module)
743            mod = sys.modules[module]
744            klass = getattr(mod, name)
745        except (ImportError, KeyError, AttributeError):
746            raise PicklingError(
747                "Can't pickle %r: it's not found as %s.%s" %
748                (obj, module, name))
749        else:
750            if klass is not obj:
751                raise PicklingError(
752                    "Can't pickle %r: it's not the same object as %s.%s" %
753                    (obj, module, name))
754
755        if self.proto >= 2:
756            code = _extension_registry.get((module, name))
757            if code:
758                assert code > 0
759                if code <= 0xff:
760                    write(EXT1 + chr(code))
761                elif code <= 0xffff:
762                    write("%c%c%c" % (EXT2, code&0xff, code>>8))
763                else:
764                    write(EXT4 + pack("<i", code))
765                return
766
767        write(GLOBAL + module + '\n' + name + '\n')
768        self.memoize(obj)
769
770    dispatch[ClassType] = save_global
771    dispatch[FunctionType] = save_global
772    dispatch[BuiltinFunctionType] = save_global
773    dispatch[TypeType] = save_global
774
775# Pickling helpers
776
777def _keep_alive(x, memo):
778    """Keeps a reference to the object x in the memo.
779
780    Because we remember objects by their id, we have
781    to assure that possibly temporary objects are kept
782    alive by referencing them.
783    We store a reference at the id of the memo, which should
784    normally not be used unless someone tries to deepcopy
785    the memo itself...
786    """
787    try:
788        memo[id(memo)].append(x)
789    except KeyError:
790        # aha, this is the first one :-)
791        memo[id(memo)]=[x]
792
793
794# A cache for whichmodule(), mapping a function object to the name of
795# the module in which the function was found.
796
797classmap = {} # called classmap for backwards compatibility
798
799def whichmodule(func, funcname):
800    """Figure out the module in which a function occurs.
801
802    Search sys.modules for the module.
803    Cache in classmap.
804    Return a module name.
805    If the function cannot be found, return "__main__".
806    """
807    # Python functions should always get an __module__ from their globals.
808    mod = getattr(func, "__module__", None)
809    if mod is not None:
810        return mod
811    if func in classmap:
812        return classmap[func]
813
814    for name, module in sys.modules.items():
815        if module is None:
816            continue # skip dummy package entries
817        if name != '__main__' and getattr(module, funcname, None) is func:
818            break
819    else:
820        name = '__main__'
821    classmap[func] = name
822    return name
823
824
825# Unpickling machinery
826
827class Unpickler:
828
829    def __init__(self, file):
830        """This takes a file-like object for reading a pickle data stream.
831
832        The protocol version of the pickle is detected automatically, so no
833        proto argument is needed.
834
835        The file-like object must have two methods, a read() method that
836        takes an integer argument, and a readline() method that requires no
837        arguments.  Both methods should return a string.  Thus file-like
838        object can be a file object opened for reading, a StringIO object,
839        or any other custom object that meets this interface.
840        """
841        self.readline = file.readline
842        self.read = file.read
843        self.memo = {}
844
845    def load(self):
846        """Read a pickled object representation from the open file.
847
848        Return the reconstituted object hierarchy specified in the file.
849        """
850        self.mark = object() # any new unique object
851        self.stack = []
852        self.append = self.stack.append
853        read = self.read
854        dispatch = self.dispatch
855        try:
856            while 1:
857                key = read(1)
858                dispatch[key](self)
859        except _Stop, stopinst:
860            return stopinst.value
861
862    # Return largest index k such that self.stack[k] is self.mark.
863    # If the stack doesn't contain a mark, eventually raises IndexError.
864    # This could be sped by maintaining another stack, of indices at which
865    # the mark appears.  For that matter, the latter stack would suffice,
866    # and we wouldn't need to push mark objects on self.stack at all.
867    # Doing so is probably a good thing, though, since if the pickle is
868    # corrupt (or hostile) we may get a clue from finding self.mark embedded
869    # in unpickled objects.
870    def marker(self):
871        stack = self.stack
872        mark = self.mark
873        k = len(stack)-1
874        while stack[k] is not mark: k = k-1
875        return k
876
877    dispatch = {}
878
879    def load_eof(self):
880        raise EOFError
881    dispatch[''] = load_eof
882
883    def load_proto(self):
884        proto = ord(self.read(1))
885        if not 0 <= proto <= 2:
886            raise ValueError, "unsupported pickle protocol: %d" % proto
887    dispatch[PROTO] = load_proto
888
889    def load_persid(self):
890        pid = self.readline()[:-1]
891        self.append(self.persistent_load(pid))
892    dispatch[PERSID] = load_persid
893
894    def load_binpersid(self):
895        pid = self.stack.pop()
896        self.append(self.persistent_load(pid))
897    dispatch[BINPERSID] = load_binpersid
898
899    def load_none(self):
900        self.append(None)
901    dispatch[NONE] = load_none
902
903    def load_false(self):
904        self.append(False)
905    dispatch[NEWFALSE] = load_false
906
907    def load_true(self):
908        self.append(True)
909    dispatch[NEWTRUE] = load_true
910
911    def load_int(self):
912        data = self.readline()
913        if data == FALSE[1:]:
914            val = False
915        elif data == TRUE[1:]:
916            val = True
917        else:
918            try:
919                val = int(data)
920            except ValueError:
921                val = long(data)
922        self.append(val)
923    dispatch[INT] = load_int
924
925    def load_binint(self):
926        self.append(mloads('i' + self.read(4)))
927    dispatch[BININT] = load_binint
928
929    def load_binint1(self):
930        self.append(ord(self.read(1)))
931    dispatch[BININT1] = load_binint1
932
933    def load_binint2(self):
934        self.append(mloads('i' + self.read(2) + '\000\000'))
935    dispatch[BININT2] = load_binint2
936
937    def load_long(self):
938        self.append(long(self.readline()[:-1], 0))
939    dispatch[LONG] = load_long
940
941    def load_long1(self):
942        n = ord(self.read(1))
943        bytes = self.read(n)
944        self.append(decode_long(bytes))
945    dispatch[LONG1] = load_long1
946
947    def load_long4(self):
948        n = mloads('i' + self.read(4))
949        bytes = self.read(n)
950        self.append(decode_long(bytes))
951    dispatch[LONG4] = load_long4
952
953    def load_float(self):
954        self.append(float(self.readline()[:-1]))
955    dispatch[FLOAT] = load_float
956
957    def load_binfloat(self, unpack=struct.unpack):
958        self.append(unpack('>d', self.read(8))[0])
959    dispatch[BINFLOAT] = load_binfloat
960
961    def load_string(self):
962        rep = self.readline()[:-1]
963        for q in "\"'": # double or single quote
964            if rep.startswith(q):
965                if not rep.endswith(q):
966                    raise ValueError, "insecure string pickle"
967                rep = rep[len(q):-len(q)]
968                break
969        else:
970            raise ValueError, "insecure string pickle"
971        self.append(rep.decode("string-escape"))
972    dispatch[STRING] = load_string
973
974    def load_binstring(self):
975        len = mloads('i' + self.read(4))
976        self.append(self.read(len))
977    dispatch[BINSTRING] = load_binstring
978
979    def load_unicode(self):
980        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
981    dispatch[UNICODE] = load_unicode
982
983    def load_binunicode(self):
984        len = mloads('i' + self.read(4))
985        self.append(unicode(self.read(len),'utf-8'))
986    dispatch[BINUNICODE] = load_binunicode
987
988    def load_short_binstring(self):
989        len = ord(self.read(1))
990        self.append(self.read(len))
991    dispatch[SHORT_BINSTRING] = load_short_binstring
992
993    def load_tuple(self):
994        k = self.marker()
995        self.stack[k:] = [tuple(self.stack[k+1:])]
996    dispatch[TUPLE] = load_tuple
997
998    def load_empty_tuple(self):
999        self.stack.append(())
1000    dispatch[EMPTY_TUPLE] = load_empty_tuple
1001
1002    def load_tuple1(self):
1003        self.stack[-1] = (self.stack[-1],)
1004    dispatch[TUPLE1] = load_tuple1
1005
1006    def load_tuple2(self):
1007        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1008    dispatch[TUPLE2] = load_tuple2
1009
1010    def load_tuple3(self):
1011        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1012    dispatch[TUPLE3] = load_tuple3
1013
1014    def load_empty_list(self):
1015        self.stack.append([])
1016    dispatch[EMPTY_LIST] = load_empty_list
1017
1018    def load_empty_dictionary(self):
1019        self.stack.append({})
1020    dispatch[EMPTY_DICT] = load_empty_dictionary
1021
1022    def load_list(self):
1023        k = self.marker()
1024        self.stack[k:] = [self.stack[k+1:]]
1025    dispatch[LIST] = load_list
1026
1027    def load_dict(self):
1028        k = self.marker()
1029        d = {}
1030        items = self.stack[k+1:]
1031        for i in range(0, len(items), 2):
1032            key = items[i]
1033            value = items[i+1]
1034            d[key] = value
1035        self.stack[k:] = [d]
1036    dispatch[DICT] = load_dict
1037
1038    # INST and OBJ differ only in how they get a class object.  It's not
1039    # only sensible to do the rest in a common routine, the two routines
1040    # previously diverged and grew different bugs.
1041    # klass is the class to instantiate, and k points to the topmost mark
1042    # object, following which are the arguments for klass.__init__.
1043    def _instantiate(self, klass, k):
1044        args = tuple(self.stack[k+1:])
1045        del self.stack[k:]
1046        instantiated = 0
1047        if (not args and
1048                type(klass) is ClassType and
1049                not hasattr(klass, "__getinitargs__")):
1050            try:
1051                value = _EmptyClass()
1052                value.__class__ = klass
1053                instantiated = 1
1054            except RuntimeError:
1055                # In restricted execution, assignment to inst.__class__ is
1056                # prohibited
1057                pass
1058        if not instantiated:
1059            try:
1060                value = klass(*args)
1061            except TypeError, err:
1062                raise TypeError, "in constructor for %s: %s" % (
1063                    klass.__name__, str(err)), sys.exc_info()[2]
1064        self.append(value)
1065
1066    def load_inst(self):
1067        module = self.readline()[:-1]
1068        name = self.readline()[:-1]
1069        klass = self.find_class(module, name)
1070        self._instantiate(klass, self.marker())
1071    dispatch[INST] = load_inst
1072
1073    def load_obj(self):
1074        # Stack is ... markobject classobject arg1 arg2 ...
1075        k = self.marker()
1076        klass = self.stack.pop(k+1)
1077        self._instantiate(klass, k)
1078    dispatch[OBJ] = load_obj
1079
1080    def load_newobj(self):
1081        args = self.stack.pop()
1082        cls = self.stack[-1]
1083        obj = cls.__new__(cls, *args)
1084        self.stack[-1] = obj
1085    dispatch[NEWOBJ] = load_newobj
1086
1087    def load_global(self):
1088        module = self.readline()[:-1]
1089        name = self.readline()[:-1]
1090        klass = self.find_class(module, name)
1091        self.append(klass)
1092    dispatch[GLOBAL] = load_global
1093
1094    def load_ext1(self):
1095        code = ord(self.read(1))
1096        self.get_extension(code)
1097    dispatch[EXT1] = load_ext1
1098
1099    def load_ext2(self):
1100        code = mloads('i' + self.read(2) + '\000\000')
1101        self.get_extension(code)
1102    dispatch[EXT2] = load_ext2
1103
1104    def load_ext4(self):
1105        code = mloads('i' + self.read(4))
1106        self.get_extension(code)
1107    dispatch[EXT4] = load_ext4
1108
1109    def get_extension(self, code):
1110        nil = []
1111        obj = _extension_cache.get(code, nil)
1112        if obj is not nil:
1113            self.append(obj)
1114            return
1115        key = _inverted_registry.get(code)
1116        if not key:
1117            raise ValueError("unregistered extension code %d" % code)
1118        obj = self.find_class(*key)
1119        _extension_cache[code] = obj
1120        self.append(obj)
1121
1122    def find_class(self, module, name):
1123        # Subclasses may override this
1124        __import__(module)
1125        mod = sys.modules[module]
1126        klass = getattr(mod, name)
1127        return klass
1128
1129    def load_reduce(self):
1130        stack = self.stack
1131        args = stack.pop()
1132        func = stack[-1]
1133        value = func(*args)
1134        stack[-1] = value
1135    dispatch[REDUCE] = load_reduce
1136
1137    def load_pop(self):
1138        del self.stack[-1]
1139    dispatch[POP] = load_pop
1140
1141    def load_pop_mark(self):
1142        k = self.marker()
1143        del self.stack[k:]
1144    dispatch[POP_MARK] = load_pop_mark
1145
1146    def load_dup(self):
1147        self.append(self.stack[-1])
1148    dispatch[DUP] = load_dup
1149
1150    def load_get(self):
1151        self.append(self.memo[self.readline()[:-1]])
1152    dispatch[GET] = load_get
1153
1154    def load_binget(self):
1155        i = ord(self.read(1))
1156        self.append(self.memo[repr(i)])
1157    dispatch[BINGET] = load_binget
1158
1159    def load_long_binget(self):
1160        i = mloads('i' + self.read(4))
1161        self.append(self.memo[repr(i)])
1162    dispatch[LONG_BINGET] = load_long_binget
1163
1164    def load_put(self):
1165        self.memo[self.readline()[:-1]] = self.stack[-1]
1166    dispatch[PUT] = load_put
1167
1168    def load_binput(self):
1169        i = ord(self.read(1))
1170        self.memo[repr(i)] = self.stack[-1]
1171    dispatch[BINPUT] = load_binput
1172
1173    def load_long_binput(self):
1174        i = mloads('i' + self.read(4))
1175        self.memo[repr(i)] = self.stack[-1]
1176    dispatch[LONG_BINPUT] = load_long_binput
1177
1178    def load_append(self):
1179        stack = self.stack
1180        value = stack.pop()
1181        list = stack[-1]
1182        list.append(value)
1183    dispatch[APPEND] = load_append
1184
1185    def load_appends(self):
1186        stack = self.stack
1187        mark = self.marker()
1188        list = stack[mark - 1]
1189        list.extend(stack[mark + 1:])
1190        del stack[mark:]
1191    dispatch[APPENDS] = load_appends
1192
1193    def load_setitem(self):
1194        stack = self.stack
1195        value = stack.pop()
1196        key = stack.pop()
1197        dict = stack[-1]
1198        dict[key] = value
1199    dispatch[SETITEM] = load_setitem
1200
1201    def load_setitems(self):
1202        stack = self.stack
1203        mark = self.marker()
1204        dict = stack[mark - 1]
1205        for i in range(mark + 1, len(stack), 2):
1206            dict[stack[i]] = stack[i + 1]
1207
1208        del stack[mark:]
1209    dispatch[SETITEMS] = load_setitems
1210
1211    def load_build(self):
1212        stack = self.stack
1213        state = stack.pop()
1214        inst = stack[-1]
1215        setstate = getattr(inst, "__setstate__", None)
1216        if setstate:
1217            setstate(state)
1218            return
1219        slotstate = None
1220        if isinstance(state, tuple) and len(state) == 2:
1221            state, slotstate = state
1222        if state:
1223            try:
1224                d = inst.__dict__
1225                try:
1226                    for k, v in state.iteritems():
1227                        d[intern(k)] = v
1228                # keys in state don't have to be strings
1229                # don't blow up, but don't go out of our way
1230                except TypeError:
1231                    d.update(state)
1232
1233            except RuntimeError:
1234                # XXX In restricted execution, the instance's __dict__
1235                # is not accessible.  Use the old way of unpickling
1236                # the instance variables.  This is a semantic
1237                # difference when unpickling in restricted
1238                # vs. unrestricted modes.
1239                # Note, however, that cPickle has never tried to do the
1240                # .update() business, and always uses
1241                #     PyObject_SetItem(inst.__dict__, key, value) in a
1242                # loop over state.items().
1243                for k, v in state.items():
1244                    setattr(inst, k, v)
1245        if slotstate:
1246            for k, v in slotstate.items():
1247                setattr(inst, k, v)
1248    dispatch[BUILD] = load_build
1249
1250    def load_mark(self):
1251        self.append(self.mark)
1252    dispatch[MARK] = load_mark
1253
1254    def load_stop(self):
1255        value = self.stack.pop()
1256        raise _Stop(value)
1257    dispatch[STOP] = load_stop
1258
1259# Helper class for load_inst/load_obj
1260
1261class _EmptyClass:
1262    pass
1263
1264# Encode/decode longs in linear time.
1265
1266import binascii as _binascii
1267
1268def encode_long(x):
1269    r"""Encode a long to a two's complement little-endian binary string.
1270    Note that 0L is a special case, returning an empty string, to save a
1271    byte in the LONG1 pickling context.
1272
1273    >>> encode_long(0L)
1274    ''
1275    >>> encode_long(255L)
1276    '\xff\x00'
1277    >>> encode_long(32767L)
1278    '\xff\x7f'
1279    >>> encode_long(-256L)
1280    '\x00\xff'
1281    >>> encode_long(-32768L)
1282    '\x00\x80'
1283    >>> encode_long(-128L)
1284    '\x80'
1285    >>> encode_long(127L)
1286    '\x7f'
1287    >>>
1288    """
1289
1290    if x == 0:
1291        return ''
1292    if x > 0:
1293        ashex = hex(x)
1294        assert ashex.startswith("0x")
1295        njunkchars = 2 + ashex.endswith('L')
1296        nibbles = len(ashex) - njunkchars
1297        if nibbles & 1:
1298            # need an even # of nibbles for unhexlify
1299            ashex = "0x0" + ashex[2:]
1300        elif int(ashex[2], 16) >= 8:
1301            # "looks negative", so need a byte of sign bits
1302            ashex = "0x00" + ashex[2:]
1303    else:
1304        # Build the 256's-complement:  (1L << nbytes) + x.  The trick is
1305        # to find the number of bytes in linear time (although that should
1306        # really be a constant-time task).
1307        ashex = hex(-x)
1308        assert ashex.startswith("0x")
1309        njunkchars = 2 + ashex.endswith('L')
1310        nibbles = len(ashex) - njunkchars
1311        if nibbles & 1:
1312            # Extend to a full byte.
1313            nibbles += 1
1314        nbits = nibbles * 4
1315        x += 1L << nbits
1316        assert x > 0
1317        ashex = hex(x)
1318        njunkchars = 2 + ashex.endswith('L')
1319        newnibbles = len(ashex) - njunkchars
1320        if newnibbles < nibbles:
1321            ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1322        if int(ashex[2], 16) < 8:
1323            # "looks positive", so need a byte of sign bits
1324            ashex = "0xff" + ashex[2:]
1325
1326    if ashex.endswith('L'):
1327        ashex = ashex[2:-1]
1328    else:
1329        ashex = ashex[2:]
1330    assert len(ashex) & 1 == 0, (x, ashex)
1331    binary = _binascii.unhexlify(ashex)
1332    return binary[::-1]
1333
1334def decode_long(data):
1335    r"""Decode a long from a two's complement little-endian binary string.
1336
1337    >>> decode_long('')
1338    0L
1339    >>> decode_long("\xff\x00")
1340    255L
1341    >>> decode_long("\xff\x7f")
1342    32767L
1343    >>> decode_long("\x00\xff")
1344    -256L
1345    >>> decode_long("\x00\x80")
1346    -32768L
1347    >>> decode_long("\x80")
1348    -128L
1349    >>> decode_long("\x7f")
1350    127L
1351    """
1352
1353    nbytes = len(data)
1354    if nbytes == 0:
1355        return 0L
1356    ashex = _binascii.hexlify(data[::-1])
1357    n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1358    if data[-1] >= '\x80':
1359        n -= 1L << (nbytes * 8)
1360    return n
1361
1362# Shorthands
1363
1364try:
1365    from cStringIO import StringIO
1366except ImportError:
1367    from StringIO import StringIO
1368
1369def dump(obj, file, protocol=None):
1370    Pickler(file, protocol).dump(obj)
1371
1372def dumps(obj, protocol=None):
1373    file = StringIO()
1374    Pickler(file, protocol).dump(obj)
1375    return file.getvalue()
1376
1377def load(file):
1378    return Unpickler(file).load()
1379
1380def loads(str):
1381    file = StringIO(str)
1382    return Unpickler(file).load()
1383
1384# Doctest
1385
1386def _test():
1387    import doctest
1388    return doctest.testmod()
1389
1390if __name__ == "__main__":
1391    _test()
1392