1import unittest
2import pickle
3import cPickle
4import StringIO
5import cStringIO
6import pickletools
7import copy_reg
8
9from test.test_support import TestFailed, verbose, have_unicode, TESTFN
10try:
11    from test.test_support import _2G, _1M, precisionbigmemtest
12except ImportError:
13    # this import might fail when run on older Python versions by test_xpickle
14    _2G = _1M = 0
15    def precisionbigmemtest(*args, **kwargs):
16        return lambda self: None
17
18# Tests that try a number of pickle protocols should have a
19#     for proto in protocols:
20# kind of outer loop.
21assert pickle.HIGHEST_PROTOCOL == cPickle.HIGHEST_PROTOCOL == 2
22protocols = range(pickle.HIGHEST_PROTOCOL + 1)
23
24# Copy of test.test_support.run_with_locale. This is needed to support Python
25# 2.4, which didn't include it. This is all to support test_xpickle, which
26# bounces pickled objects through older Python versions to test backwards
27# compatibility.
28def run_with_locale(catstr, *locales):
29    def decorator(func):
30        def inner(*args, **kwds):
31            try:
32                import locale
33                category = getattr(locale, catstr)
34                orig_locale = locale.setlocale(category)
35            except AttributeError:
36                # if the test author gives us an invalid category string
37                raise
38            except:
39                # cannot retrieve original locale, so do nothing
40                locale = orig_locale = None
41            else:
42                for loc in locales:
43                    try:
44                        locale.setlocale(category, loc)
45                        break
46                    except:
47                        pass
48
49            # now run the function, resetting the locale on exceptions
50            try:
51                return func(*args, **kwds)
52            finally:
53                if locale and orig_locale:
54                    locale.setlocale(category, orig_locale)
55        inner.func_name = func.func_name
56        inner.__doc__ = func.__doc__
57        return inner
58    return decorator
59
60
61# Return True if opcode code appears in the pickle, else False.
62def opcode_in_pickle(code, pickle):
63    for op, dummy, dummy in pickletools.genops(pickle):
64        if op.code == code:
65            return True
66    return False
67
68# Return the number of times opcode code appears in pickle.
69def count_opcode(code, pickle):
70    n = 0
71    for op, dummy, dummy in pickletools.genops(pickle):
72        if op.code == code:
73            n += 1
74    return n
75
76# We can't very well test the extension registry without putting known stuff
77# in it, but we have to be careful to restore its original state.  Code
78# should do this:
79#
80#     e = ExtensionSaver(extension_code)
81#     try:
82#         fiddle w/ the extension registry's stuff for extension_code
83#     finally:
84#         e.restore()
85
86class ExtensionSaver:
87    # Remember current registration for code (if any), and remove it (if
88    # there is one).
89    def __init__(self, code):
90        self.code = code
91        if code in copy_reg._inverted_registry:
92            self.pair = copy_reg._inverted_registry[code]
93            copy_reg.remove_extension(self.pair[0], self.pair[1], code)
94        else:
95            self.pair = None
96
97    # Restore previous registration for code.
98    def restore(self):
99        code = self.code
100        curpair = copy_reg._inverted_registry.get(code)
101        if curpair is not None:
102            copy_reg.remove_extension(curpair[0], curpair[1], code)
103        pair = self.pair
104        if pair is not None:
105            copy_reg.add_extension(pair[0], pair[1], code)
106
107class C:
108    def __cmp__(self, other):
109        return cmp(self.__dict__, other.__dict__)
110
111import __main__
112__main__.C = C
113C.__module__ = "__main__"
114
115class myint(int):
116    def __init__(self, x):
117        self.str = str(x)
118
119class initarg(C):
120
121    def __init__(self, a, b):
122        self.a = a
123        self.b = b
124
125    def __getinitargs__(self):
126        return self.a, self.b
127
128class metaclass(type):
129    pass
130
131class use_metaclass(object):
132    __metaclass__ = metaclass
133
134class pickling_metaclass(type):
135    def __eq__(self, other):
136        return (type(self) == type(other) and
137                self.reduce_args == other.reduce_args)
138
139    def __reduce__(self):
140        return (create_dynamic_class, self.reduce_args)
141
142    __hash__ = None
143
144def create_dynamic_class(name, bases):
145    result = pickling_metaclass(name, bases, dict())
146    result.reduce_args = (name, bases)
147    return result
148
149# DATA0 .. DATA2 are the pickles we expect under the various protocols, for
150# the object returned by create_data().
151
152# break into multiple strings to avoid confusing font-lock-mode
153DATA0 = """(lp1
154I0
155aL1L
156aF2
157ac__builtin__
158complex
159p2
160""" + \
161"""(F3
162F0
163tRp3
164aI1
165aI-1
166aI255
167aI-255
168aI-256
169aI65535
170aI-65535
171aI-65536
172aI2147483647
173aI-2147483647
174aI-2147483648
175a""" + \
176"""(S'abc'
177p4
178g4
179""" + \
180"""(i__main__
181C
182p5
183""" + \
184"""(dp6
185S'foo'
186p7
187I1
188sS'bar'
189p8
190I2
191sbg5
192tp9
193ag9
194aI5
195a.
196"""
197
198# Disassembly of DATA0.
199DATA0_DIS = """\
200    0: (    MARK
201    1: l        LIST       (MARK at 0)
202    2: p    PUT        1
203    5: I    INT        0
204    8: a    APPEND
205    9: L    LONG       1L
206   13: a    APPEND
207   14: F    FLOAT      2.0
208   17: a    APPEND
209   18: c    GLOBAL     '__builtin__ complex'
210   39: p    PUT        2
211   42: (    MARK
212   43: F        FLOAT      3.0
213   46: F        FLOAT      0.0
214   49: t        TUPLE      (MARK at 42)
215   50: R    REDUCE
216   51: p    PUT        3
217   54: a    APPEND
218   55: I    INT        1
219   58: a    APPEND
220   59: I    INT        -1
221   63: a    APPEND
222   64: I    INT        255
223   69: a    APPEND
224   70: I    INT        -255
225   76: a    APPEND
226   77: I    INT        -256
227   83: a    APPEND
228   84: I    INT        65535
229   91: a    APPEND
230   92: I    INT        -65535
231  100: a    APPEND
232  101: I    INT        -65536
233  109: a    APPEND
234  110: I    INT        2147483647
235  122: a    APPEND
236  123: I    INT        -2147483647
237  136: a    APPEND
238  137: I    INT        -2147483648
239  150: a    APPEND
240  151: (    MARK
241  152: S        STRING     'abc'
242  159: p        PUT        4
243  162: g        GET        4
244  165: (        MARK
245  166: i            INST       '__main__ C' (MARK at 165)
246  178: p        PUT        5
247  181: (        MARK
248  182: d            DICT       (MARK at 181)
249  183: p        PUT        6
250  186: S        STRING     'foo'
251  193: p        PUT        7
252  196: I        INT        1
253  199: s        SETITEM
254  200: S        STRING     'bar'
255  207: p        PUT        8
256  210: I        INT        2
257  213: s        SETITEM
258  214: b        BUILD
259  215: g        GET        5
260  218: t        TUPLE      (MARK at 151)
261  219: p    PUT        9
262  222: a    APPEND
263  223: g    GET        9
264  226: a    APPEND
265  227: I    INT        5
266  230: a    APPEND
267  231: .    STOP
268highest protocol among opcodes = 0
269"""
270
271DATA1 = (']q\x01(K\x00L1L\nG@\x00\x00\x00\x00\x00\x00\x00'
272         'c__builtin__\ncomplex\nq\x02(G@\x08\x00\x00\x00\x00\x00'
273         '\x00G\x00\x00\x00\x00\x00\x00\x00\x00tRq\x03K\x01J\xff\xff'
274         '\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xff'
275         'J\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00'
276         '\x00\x80J\x00\x00\x00\x80(U\x03abcq\x04h\x04(c__main__\n'
277         'C\nq\x05oq\x06}q\x07(U\x03fooq\x08K\x01U\x03barq\tK\x02ubh'
278         '\x06tq\nh\nK\x05e.'
279        )
280
281# Disassembly of DATA1.
282DATA1_DIS = """\
283    0: ]    EMPTY_LIST
284    1: q    BINPUT     1
285    3: (    MARK
286    4: K        BININT1    0
287    6: L        LONG       1L
288   10: G        BINFLOAT   2.0
289   19: c        GLOBAL     '__builtin__ complex'
290   40: q        BINPUT     2
291   42: (        MARK
292   43: G            BINFLOAT   3.0
293   52: G            BINFLOAT   0.0
294   61: t            TUPLE      (MARK at 42)
295   62: R        REDUCE
296   63: q        BINPUT     3
297   65: K        BININT1    1
298   67: J        BININT     -1
299   72: K        BININT1    255
300   74: J        BININT     -255
301   79: J        BININT     -256
302   84: M        BININT2    65535
303   87: J        BININT     -65535
304   92: J        BININT     -65536
305   97: J        BININT     2147483647
306  102: J        BININT     -2147483647
307  107: J        BININT     -2147483648
308  112: (        MARK
309  113: U            SHORT_BINSTRING 'abc'
310  118: q            BINPUT     4
311  120: h            BINGET     4
312  122: (            MARK
313  123: c                GLOBAL     '__main__ C'
314  135: q                BINPUT     5
315  137: o                OBJ        (MARK at 122)
316  138: q            BINPUT     6
317  140: }            EMPTY_DICT
318  141: q            BINPUT     7
319  143: (            MARK
320  144: U                SHORT_BINSTRING 'foo'
321  149: q                BINPUT     8
322  151: K                BININT1    1
323  153: U                SHORT_BINSTRING 'bar'
324  158: q                BINPUT     9
325  160: K                BININT1    2
326  162: u                SETITEMS   (MARK at 143)
327  163: b            BUILD
328  164: h            BINGET     6
329  166: t            TUPLE      (MARK at 112)
330  167: q        BINPUT     10
331  169: h        BINGET     10
332  171: K        BININT1    5
333  173: e        APPENDS    (MARK at 3)
334  174: .    STOP
335highest protocol among opcodes = 1
336"""
337
338DATA2 = ('\x80\x02]q\x01(K\x00\x8a\x01\x01G@\x00\x00\x00\x00\x00\x00\x00'
339         'c__builtin__\ncomplex\nq\x02G@\x08\x00\x00\x00\x00\x00\x00G\x00'
340         '\x00\x00\x00\x00\x00\x00\x00\x86Rq\x03K\x01J\xff\xff\xff\xffK'
341         '\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xff'
342         'J\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00'
343         '\x80(U\x03abcq\x04h\x04(c__main__\nC\nq\x05oq\x06}q\x07(U\x03foo'
344         'q\x08K\x01U\x03barq\tK\x02ubh\x06tq\nh\nK\x05e.')
345
346# Disassembly of DATA2.
347DATA2_DIS = """\
348    0: \x80 PROTO      2
349    2: ]    EMPTY_LIST
350    3: q    BINPUT     1
351    5: (    MARK
352    6: K        BININT1    0
353    8: \x8a     LONG1      1L
354   11: G        BINFLOAT   2.0
355   20: c        GLOBAL     '__builtin__ complex'
356   41: q        BINPUT     2
357   43: G        BINFLOAT   3.0
358   52: G        BINFLOAT   0.0
359   61: \x86     TUPLE2
360   62: R        REDUCE
361   63: q        BINPUT     3
362   65: K        BININT1    1
363   67: J        BININT     -1
364   72: K        BININT1    255
365   74: J        BININT     -255
366   79: J        BININT     -256
367   84: M        BININT2    65535
368   87: J        BININT     -65535
369   92: J        BININT     -65536
370   97: J        BININT     2147483647
371  102: J        BININT     -2147483647
372  107: J        BININT     -2147483648
373  112: (        MARK
374  113: U            SHORT_BINSTRING 'abc'
375  118: q            BINPUT     4
376  120: h            BINGET     4
377  122: (            MARK
378  123: c                GLOBAL     '__main__ C'
379  135: q                BINPUT     5
380  137: o                OBJ        (MARK at 122)
381  138: q            BINPUT     6
382  140: }            EMPTY_DICT
383  141: q            BINPUT     7
384  143: (            MARK
385  144: U                SHORT_BINSTRING 'foo'
386  149: q                BINPUT     8
387  151: K                BININT1    1
388  153: U                SHORT_BINSTRING 'bar'
389  158: q                BINPUT     9
390  160: K                BININT1    2
391  162: u                SETITEMS   (MARK at 143)
392  163: b            BUILD
393  164: h            BINGET     6
394  166: t            TUPLE      (MARK at 112)
395  167: q        BINPUT     10
396  169: h        BINGET     10
397  171: K        BININT1    5
398  173: e        APPENDS    (MARK at 5)
399  174: .    STOP
400highest protocol among opcodes = 2
401"""
402
403def create_data():
404    c = C()
405    c.foo = 1
406    c.bar = 2
407    x = [0, 1L, 2.0, 3.0+0j]
408    # Append some integer test cases at cPickle.c's internal size
409    # cutoffs.
410    uint1max = 0xff
411    uint2max = 0xffff
412    int4max = 0x7fffffff
413    x.extend([1, -1,
414              uint1max, -uint1max, -uint1max-1,
415              uint2max, -uint2max, -uint2max-1,
416               int4max,  -int4max,  -int4max-1])
417    y = ('abc', 'abc', c, c)
418    x.append(y)
419    x.append(y)
420    x.append(5)
421    return x
422
423class AbstractPickleTests(unittest.TestCase):
424    # Subclass must define self.dumps, self.loads, self.error.
425
426    _testdata = create_data()
427
428    def setUp(self):
429        pass
430
431    def test_misc(self):
432        # test various datatypes not tested by testdata
433        for proto in protocols:
434            x = myint(4)
435            s = self.dumps(x, proto)
436            y = self.loads(s)
437            self.assertEqual(x, y)
438
439            x = (1, ())
440            s = self.dumps(x, proto)
441            y = self.loads(s)
442            self.assertEqual(x, y)
443
444            x = initarg(1, x)
445            s = self.dumps(x, proto)
446            y = self.loads(s)
447            self.assertEqual(x, y)
448
449        # XXX test __reduce__ protocol?
450
451    def test_roundtrip_equality(self):
452        expected = self._testdata
453        for proto in protocols:
454            s = self.dumps(expected, proto)
455            got = self.loads(s)
456            self.assertEqual(expected, got)
457
458    def test_load_from_canned_string(self):
459        expected = self._testdata
460        for canned in DATA0, DATA1, DATA2:
461            got = self.loads(canned)
462            self.assertEqual(expected, got)
463
464    # There are gratuitous differences between pickles produced by
465    # pickle and cPickle, largely because cPickle starts PUT indices at
466    # 1 and pickle starts them at 0.  See XXX comment in cPickle's put2() --
467    # there's a comment with an exclamation point there whose meaning
468    # is a mystery.  cPickle also suppresses PUT for objects with a refcount
469    # of 1.
470    def dont_test_disassembly(self):
471        from pickletools import dis
472
473        for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
474            s = self.dumps(self._testdata, proto)
475            filelike = cStringIO.StringIO()
476            dis(s, out=filelike)
477            got = filelike.getvalue()
478            self.assertEqual(expected, got)
479
480    def test_recursive_list(self):
481        l = []
482        l.append(l)
483        for proto in protocols:
484            s = self.dumps(l, proto)
485            x = self.loads(s)
486            self.assertEqual(len(x), 1)
487            self.assertTrue(x is x[0])
488
489    def test_recursive_tuple(self):
490        t = ([],)
491        t[0].append(t)
492        for proto in protocols:
493            s = self.dumps(t, proto)
494            x = self.loads(s)
495            self.assertEqual(len(x), 1)
496            self.assertEqual(len(x[0]), 1)
497            self.assertTrue(x is x[0][0])
498
499    def test_recursive_dict(self):
500        d = {}
501        d[1] = d
502        for proto in protocols:
503            s = self.dumps(d, proto)
504            x = self.loads(s)
505            self.assertEqual(x.keys(), [1])
506            self.assertTrue(x[1] is x)
507
508    def test_recursive_inst(self):
509        i = C()
510        i.attr = i
511        for proto in protocols:
512            s = self.dumps(i, proto)
513            x = self.loads(s)
514            self.assertEqual(dir(x), dir(i))
515            self.assertIs(x.attr, x)
516
517    def test_recursive_multi(self):
518        l = []
519        d = {1:l}
520        i = C()
521        i.attr = d
522        l.append(i)
523        for proto in protocols:
524            s = self.dumps(l, proto)
525            x = self.loads(s)
526            self.assertEqual(len(x), 1)
527            self.assertEqual(dir(x[0]), dir(i))
528            self.assertEqual(x[0].attr.keys(), [1])
529            self.assertTrue(x[0].attr[1] is x)
530
531    def test_garyp(self):
532        self.assertRaises(self.error, self.loads, 'garyp')
533
534    def test_insecure_strings(self):
535        insecure = ["abc", "2 + 2", # not quoted
536                    #"'abc' + 'def'", # not a single quoted string
537                    "'abc", # quote is not closed
538                    "'abc\"", # open quote and close quote don't match
539                    "'abc'   ?", # junk after close quote
540                    "'\\'", # trailing backslash
541                    "'",    # issue #17710
542                    "' ",   # issue #17710
543                    # some tests of the quoting rules
544                    #"'abc\"\''",
545                    #"'\\\\a\'\'\'\\\'\\\\\''",
546                    ]
547        for s in insecure:
548            buf = "S" + s + "\012p0\012."
549            self.assertRaises(ValueError, self.loads, buf)
550
551    if have_unicode:
552        def test_unicode(self):
553            endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>',
554                        u'<\\>', u'<\\\U00012345>']
555            for proto in protocols:
556                for u in endcases:
557                    p = self.dumps(u, proto)
558                    u2 = self.loads(p)
559                    self.assertEqual(u2, u)
560
561        def test_unicode_high_plane(self):
562            t = u'\U00012345'
563            for proto in protocols:
564                p = self.dumps(t, proto)
565                t2 = self.loads(p)
566                self.assertEqual(t2, t)
567
568    def test_ints(self):
569        import sys
570        for proto in protocols:
571            n = sys.maxint
572            while n:
573                for expected in (-n, n):
574                    s = self.dumps(expected, proto)
575                    n2 = self.loads(s)
576                    self.assertEqual(expected, n2)
577                n = n >> 1
578
579    def test_maxint64(self):
580        maxint64 = (1L << 63) - 1
581        data = 'I' + str(maxint64) + '\n.'
582        got = self.loads(data)
583        self.assertEqual(got, maxint64)
584
585        # Try too with a bogus literal.
586        data = 'I' + str(maxint64) + 'JUNK\n.'
587        self.assertRaises(ValueError, self.loads, data)
588
589    def test_long(self):
590        for proto in protocols:
591            # 256 bytes is where LONG4 begins.
592            for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
593                nbase = 1L << nbits
594                for npos in nbase-1, nbase, nbase+1:
595                    for n in npos, -npos:
596                        pickle = self.dumps(n, proto)
597                        got = self.loads(pickle)
598                        self.assertEqual(n, got)
599        # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
600        # bother with those.
601        nbase = long("deadbeeffeedface", 16)
602        nbase += nbase << 1000000
603        for n in nbase, -nbase:
604            p = self.dumps(n, 2)
605            got = self.loads(p)
606            self.assertEqual(n, got)
607
608    def test_float(self):
609        test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5,
610                       3.14, 263.44582062374053, 6.022e23, 1e30]
611        test_values = test_values + [-x for x in test_values]
612        for proto in protocols:
613            for value in test_values:
614                pickle = self.dumps(value, proto)
615                got = self.loads(pickle)
616                self.assertEqual(value, got)
617
618    @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
619    def test_float_format(self):
620        # make sure that floats are formatted locale independent
621        self.assertEqual(self.dumps(1.2)[0:3], 'F1.')
622
623    def test_reduce(self):
624        pass
625
626    def test_getinitargs(self):
627        pass
628
629    def test_metaclass(self):
630        a = use_metaclass()
631        for proto in protocols:
632            s = self.dumps(a, proto)
633            b = self.loads(s)
634            self.assertEqual(a.__class__, b.__class__)
635
636    def test_dynamic_class(self):
637        a = create_dynamic_class("my_dynamic_class", (object,))
638        copy_reg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
639        for proto in protocols:
640            s = self.dumps(a, proto)
641            b = self.loads(s)
642            self.assertEqual(a, b)
643
644    def test_structseq(self):
645        import time
646        import os
647
648        t = time.localtime()
649        for proto in protocols:
650            s = self.dumps(t, proto)
651            u = self.loads(s)
652            self.assertEqual(t, u)
653            if hasattr(os, "stat"):
654                t = os.stat(os.curdir)
655                s = self.dumps(t, proto)
656                u = self.loads(s)
657                self.assertEqual(t, u)
658            if hasattr(os, "statvfs"):
659                t = os.statvfs(os.curdir)
660                s = self.dumps(t, proto)
661                u = self.loads(s)
662                self.assertEqual(t, u)
663
664    # Tests for protocol 2
665
666    def test_proto(self):
667        build_none = pickle.NONE + pickle.STOP
668        for proto in protocols:
669            expected = build_none
670            if proto >= 2:
671                expected = pickle.PROTO + chr(proto) + expected
672            p = self.dumps(None, proto)
673            self.assertEqual(p, expected)
674
675        oob = protocols[-1] + 1     # a future protocol
676        badpickle = pickle.PROTO + chr(oob) + build_none
677        try:
678            self.loads(badpickle)
679        except ValueError, detail:
680            self.assertTrue(str(detail).startswith(
681                                            "unsupported pickle protocol"))
682        else:
683            self.fail("expected bad protocol number to raise ValueError")
684
685    def test_long1(self):
686        x = 12345678910111213141516178920L
687        for proto in protocols:
688            s = self.dumps(x, proto)
689            y = self.loads(s)
690            self.assertEqual(x, y)
691            self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
692
693    def test_long4(self):
694        x = 12345678910111213141516178920L << (256*8)
695        for proto in protocols:
696            s = self.dumps(x, proto)
697            y = self.loads(s)
698            self.assertEqual(x, y)
699            self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
700
701    def test_short_tuples(self):
702        # Map (proto, len(tuple)) to expected opcode.
703        expected_opcode = {(0, 0): pickle.TUPLE,
704                           (0, 1): pickle.TUPLE,
705                           (0, 2): pickle.TUPLE,
706                           (0, 3): pickle.TUPLE,
707                           (0, 4): pickle.TUPLE,
708
709                           (1, 0): pickle.EMPTY_TUPLE,
710                           (1, 1): pickle.TUPLE,
711                           (1, 2): pickle.TUPLE,
712                           (1, 3): pickle.TUPLE,
713                           (1, 4): pickle.TUPLE,
714
715                           (2, 0): pickle.EMPTY_TUPLE,
716                           (2, 1): pickle.TUPLE1,
717                           (2, 2): pickle.TUPLE2,
718                           (2, 3): pickle.TUPLE3,
719                           (2, 4): pickle.TUPLE,
720                          }
721        a = ()
722        b = (1,)
723        c = (1, 2)
724        d = (1, 2, 3)
725        e = (1, 2, 3, 4)
726        for proto in protocols:
727            for x in a, b, c, d, e:
728                s = self.dumps(x, proto)
729                y = self.loads(s)
730                self.assertEqual(x, y, (proto, x, s, y))
731                expected = expected_opcode[proto, len(x)]
732                self.assertEqual(opcode_in_pickle(expected, s), True)
733
734    def test_singletons(self):
735        # Map (proto, singleton) to expected opcode.
736        expected_opcode = {(0, None): pickle.NONE,
737                           (1, None): pickle.NONE,
738                           (2, None): pickle.NONE,
739
740                           (0, True): pickle.INT,
741                           (1, True): pickle.INT,
742                           (2, True): pickle.NEWTRUE,
743
744                           (0, False): pickle.INT,
745                           (1, False): pickle.INT,
746                           (2, False): pickle.NEWFALSE,
747                          }
748        for proto in protocols:
749            for x in None, False, True:
750                s = self.dumps(x, proto)
751                y = self.loads(s)
752                self.assertTrue(x is y, (proto, x, s, y))
753                expected = expected_opcode[proto, x]
754                self.assertEqual(opcode_in_pickle(expected, s), True)
755
756    def test_newobj_tuple(self):
757        x = MyTuple([1, 2, 3])
758        x.foo = 42
759        x.bar = "hello"
760        for proto in protocols:
761            s = self.dumps(x, proto)
762            y = self.loads(s)
763            self.assertEqual(tuple(x), tuple(y))
764            self.assertEqual(x.__dict__, y.__dict__)
765
766    def test_newobj_list(self):
767        x = MyList([1, 2, 3])
768        x.foo = 42
769        x.bar = "hello"
770        for proto in protocols:
771            s = self.dumps(x, proto)
772            y = self.loads(s)
773            self.assertEqual(list(x), list(y))
774            self.assertEqual(x.__dict__, y.__dict__)
775
776    def test_newobj_generic(self):
777        for proto in protocols:
778            for C in myclasses:
779                B = C.__base__
780                x = C(C.sample)
781                x.foo = 42
782                s = self.dumps(x, proto)
783                y = self.loads(s)
784                detail = (proto, C, B, x, y, type(y))
785                self.assertEqual(B(x), B(y), detail)
786                self.assertEqual(x.__dict__, y.__dict__, detail)
787
788    # Register a type with copy_reg, with extension code extcode.  Pickle
789    # an object of that type.  Check that the resulting pickle uses opcode
790    # (EXT[124]) under proto 2, and not in proto 1.
791
792    def produce_global_ext(self, extcode, opcode):
793        e = ExtensionSaver(extcode)
794        try:
795            copy_reg.add_extension(__name__, "MyList", extcode)
796            x = MyList([1, 2, 3])
797            x.foo = 42
798            x.bar = "hello"
799
800            # Dump using protocol 1 for comparison.
801            s1 = self.dumps(x, 1)
802            self.assertIn(__name__, s1)
803            self.assertIn("MyList", s1)
804            self.assertEqual(opcode_in_pickle(opcode, s1), False)
805
806            y = self.loads(s1)
807            self.assertEqual(list(x), list(y))
808            self.assertEqual(x.__dict__, y.__dict__)
809
810            # Dump using protocol 2 for test.
811            s2 = self.dumps(x, 2)
812            self.assertNotIn(__name__, s2)
813            self.assertNotIn("MyList", s2)
814            self.assertEqual(opcode_in_pickle(opcode, s2), True)
815
816            y = self.loads(s2)
817            self.assertEqual(list(x), list(y))
818            self.assertEqual(x.__dict__, y.__dict__)
819
820        finally:
821            e.restore()
822
823    def test_global_ext1(self):
824        self.produce_global_ext(0x00000001, pickle.EXT1)  # smallest EXT1 code
825        self.produce_global_ext(0x000000ff, pickle.EXT1)  # largest EXT1 code
826
827    def test_global_ext2(self):
828        self.produce_global_ext(0x00000100, pickle.EXT2)  # smallest EXT2 code
829        self.produce_global_ext(0x0000ffff, pickle.EXT2)  # largest EXT2 code
830        self.produce_global_ext(0x0000abcd, pickle.EXT2)  # check endianness
831
832    def test_global_ext4(self):
833        self.produce_global_ext(0x00010000, pickle.EXT4)  # smallest EXT4 code
834        self.produce_global_ext(0x7fffffff, pickle.EXT4)  # largest EXT4 code
835        self.produce_global_ext(0x12abcdef, pickle.EXT4)  # check endianness
836
837    def test_list_chunking(self):
838        n = 10  # too small to chunk
839        x = range(n)
840        for proto in protocols:
841            s = self.dumps(x, proto)
842            y = self.loads(s)
843            self.assertEqual(x, y)
844            num_appends = count_opcode(pickle.APPENDS, s)
845            self.assertEqual(num_appends, proto > 0)
846
847        n = 2500  # expect at least two chunks when proto > 0
848        x = range(n)
849        for proto in protocols:
850            s = self.dumps(x, proto)
851            y = self.loads(s)
852            self.assertEqual(x, y)
853            num_appends = count_opcode(pickle.APPENDS, s)
854            if proto == 0:
855                self.assertEqual(num_appends, 0)
856            else:
857                self.assertTrue(num_appends >= 2)
858
859    def test_dict_chunking(self):
860        n = 10  # too small to chunk
861        x = dict.fromkeys(range(n))
862        for proto in protocols:
863            s = self.dumps(x, proto)
864            y = self.loads(s)
865            self.assertEqual(x, y)
866            num_setitems = count_opcode(pickle.SETITEMS, s)
867            self.assertEqual(num_setitems, proto > 0)
868
869        n = 2500  # expect at least two chunks when proto > 0
870        x = dict.fromkeys(range(n))
871        for proto in protocols:
872            s = self.dumps(x, proto)
873            y = self.loads(s)
874            self.assertEqual(x, y)
875            num_setitems = count_opcode(pickle.SETITEMS, s)
876            if proto == 0:
877                self.assertEqual(num_setitems, 0)
878            else:
879                self.assertTrue(num_setitems >= 2)
880
881    def test_simple_newobj(self):
882        x = object.__new__(SimpleNewObj)  # avoid __init__
883        x.abc = 666
884        for proto in protocols:
885            s = self.dumps(x, proto)
886            self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2)
887            y = self.loads(s)   # will raise TypeError if __init__ called
888            self.assertEqual(y.abc, 666)
889            self.assertEqual(x.__dict__, y.__dict__)
890
891    def test_newobj_list_slots(self):
892        x = SlotList([1, 2, 3])
893        x.foo = 42
894        x.bar = "hello"
895        s = self.dumps(x, 2)
896        y = self.loads(s)
897        self.assertEqual(list(x), list(y))
898        self.assertEqual(x.__dict__, y.__dict__)
899        self.assertEqual(x.foo, y.foo)
900        self.assertEqual(x.bar, y.bar)
901
902    def test_reduce_overrides_default_reduce_ex(self):
903        for proto in protocols:
904            x = REX_one()
905            self.assertEqual(x._reduce_called, 0)
906            s = self.dumps(x, proto)
907            self.assertEqual(x._reduce_called, 1)
908            y = self.loads(s)
909            self.assertEqual(y._reduce_called, 0)
910
911    def test_reduce_ex_called(self):
912        for proto in protocols:
913            x = REX_two()
914            self.assertEqual(x._proto, None)
915            s = self.dumps(x, proto)
916            self.assertEqual(x._proto, proto)
917            y = self.loads(s)
918            self.assertEqual(y._proto, None)
919
920    def test_reduce_ex_overrides_reduce(self):
921        for proto in protocols:
922            x = REX_three()
923            self.assertEqual(x._proto, None)
924            s = self.dumps(x, proto)
925            self.assertEqual(x._proto, proto)
926            y = self.loads(s)
927            self.assertEqual(y._proto, None)
928
929    def test_reduce_ex_calls_base(self):
930        for proto in protocols:
931            x = REX_four()
932            self.assertEqual(x._proto, None)
933            s = self.dumps(x, proto)
934            self.assertEqual(x._proto, proto)
935            y = self.loads(s)
936            self.assertEqual(y._proto, proto)
937
938    def test_reduce_calls_base(self):
939        for proto in protocols:
940            x = REX_five()
941            self.assertEqual(x._reduce_called, 0)
942            s = self.dumps(x, proto)
943            self.assertEqual(x._reduce_called, 1)
944            y = self.loads(s)
945            self.assertEqual(y._reduce_called, 1)
946
947    def test_reduce_bad_iterator(self):
948        # Issue4176: crash when 4th and 5th items of __reduce__()
949        # are not iterators
950        class C(object):
951            def __reduce__(self):
952                # 4th item is not an iterator
953                return list, (), None, [], None
954        class D(object):
955            def __reduce__(self):
956                # 5th item is not an iterator
957                return dict, (), None, None, []
958
959        # Protocol 0 is less strict and also accept iterables.
960        for proto in protocols:
961            try:
962                self.dumps(C(), proto)
963            except (AttributeError, pickle.PickleError, cPickle.PickleError):
964                pass
965            try:
966                self.dumps(D(), proto)
967            except (AttributeError, pickle.PickleError, cPickle.PickleError):
968                pass
969
970    def test_many_puts_and_gets(self):
971        # Test that internal data structures correctly deal with lots of
972        # puts/gets.
973        keys = ("aaa" + str(i) for i in xrange(100))
974        large_dict = dict((k, [4, 5, 6]) for k in keys)
975        obj = [dict(large_dict), dict(large_dict), dict(large_dict)]
976
977        for proto in protocols:
978            dumped = self.dumps(obj, proto)
979            loaded = self.loads(dumped)
980            self.assertEqual(loaded, obj,
981                             "Failed protocol %d: %r != %r"
982                             % (proto, obj, loaded))
983
984    def test_attribute_name_interning(self):
985        # Test that attribute names of pickled objects are interned when
986        # unpickling.
987        for proto in protocols:
988            x = C()
989            x.foo = 42
990            x.bar = "hello"
991            s = self.dumps(x, proto)
992            y = self.loads(s)
993            x_keys = sorted(x.__dict__)
994            y_keys = sorted(y.__dict__)
995            for x_key, y_key in zip(x_keys, y_keys):
996                self.assertIs(x_key, y_key)
997
998
999# Test classes for reduce_ex
1000
1001class REX_one(object):
1002    _reduce_called = 0
1003    def __reduce__(self):
1004        self._reduce_called = 1
1005        return REX_one, ()
1006    # No __reduce_ex__ here, but inheriting it from object
1007
1008class REX_two(object):
1009    _proto = None
1010    def __reduce_ex__(self, proto):
1011        self._proto = proto
1012        return REX_two, ()
1013    # No __reduce__ here, but inheriting it from object
1014
1015class REX_three(object):
1016    _proto = None
1017    def __reduce_ex__(self, proto):
1018        self._proto = proto
1019        return REX_two, ()
1020    def __reduce__(self):
1021        raise TestFailed, "This __reduce__ shouldn't be called"
1022
1023class REX_four(object):
1024    _proto = None
1025    def __reduce_ex__(self, proto):
1026        self._proto = proto
1027        return object.__reduce_ex__(self, proto)
1028    # Calling base class method should succeed
1029
1030class REX_five(object):
1031    _reduce_called = 0
1032    def __reduce__(self):
1033        self._reduce_called = 1
1034        return object.__reduce__(self)
1035    # This one used to fail with infinite recursion
1036
1037# Test classes for newobj
1038
1039class MyInt(int):
1040    sample = 1
1041
1042class MyLong(long):
1043    sample = 1L
1044
1045class MyFloat(float):
1046    sample = 1.0
1047
1048class MyComplex(complex):
1049    sample = 1.0 + 0.0j
1050
1051class MyStr(str):
1052    sample = "hello"
1053
1054class MyUnicode(unicode):
1055    sample = u"hello \u1234"
1056
1057class MyTuple(tuple):
1058    sample = (1, 2, 3)
1059
1060class MyList(list):
1061    sample = [1, 2, 3]
1062
1063class MyDict(dict):
1064    sample = {"a": 1, "b": 2}
1065
1066myclasses = [MyInt, MyLong, MyFloat,
1067             MyComplex,
1068             MyStr, MyUnicode,
1069             MyTuple, MyList, MyDict]
1070
1071
1072class SlotList(MyList):
1073    __slots__ = ["foo"]
1074
1075class SimpleNewObj(object):
1076    def __init__(self, a, b, c):
1077        # raise an error, to make sure this isn't called
1078        raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
1079
1080class AbstractPickleModuleTests(unittest.TestCase):
1081
1082    def test_dump_closed_file(self):
1083        import os
1084        f = open(TESTFN, "w")
1085        try:
1086            f.close()
1087            self.assertRaises(ValueError, self.module.dump, 123, f)
1088        finally:
1089            os.remove(TESTFN)
1090
1091    def test_load_closed_file(self):
1092        import os
1093        f = open(TESTFN, "w")
1094        try:
1095            f.close()
1096            self.assertRaises(ValueError, self.module.dump, 123, f)
1097        finally:
1098            os.remove(TESTFN)
1099
1100    def test_load_from_and_dump_to_file(self):
1101        stream = cStringIO.StringIO()
1102        data = [123, {}, 124]
1103        self.module.dump(data, stream)
1104        stream.seek(0)
1105        unpickled = self.module.load(stream)
1106        self.assertEqual(unpickled, data)
1107
1108    def test_highest_protocol(self):
1109        # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
1110        self.assertEqual(self.module.HIGHEST_PROTOCOL, 2)
1111
1112    def test_callapi(self):
1113        f = cStringIO.StringIO()
1114        # With and without keyword arguments
1115        self.module.dump(123, f, -1)
1116        self.module.dump(123, file=f, protocol=-1)
1117        self.module.dumps(123, -1)
1118        self.module.dumps(123, protocol=-1)
1119        self.module.Pickler(f, -1)
1120        self.module.Pickler(f, protocol=-1)
1121
1122    def test_incomplete_input(self):
1123        s = StringIO.StringIO("X''.")
1124        self.assertRaises(EOFError, self.module.load, s)
1125
1126    def test_restricted(self):
1127        # issue7128: cPickle failed in restricted mode
1128        builtins = {self.module.__name__: self.module,
1129                    '__import__': __import__}
1130        d = {}
1131        teststr = "def f(): {0}.dumps(0)".format(self.module.__name__)
1132        exec teststr in {'__builtins__': builtins}, d
1133        d['f']()
1134
1135    def test_bad_input(self):
1136        # Test issue4298
1137        s = '\x58\0\0\0\x54'
1138        self.assertRaises(EOFError, self.module.loads, s)
1139        # Test issue7455
1140        s = '0'
1141        # XXX Why doesn't pickle raise UnpicklingError?
1142        self.assertRaises((IndexError, cPickle.UnpicklingError),
1143                          self.module.loads, s)
1144
1145class AbstractPersistentPicklerTests(unittest.TestCase):
1146
1147    # This class defines persistent_id() and persistent_load()
1148    # functions that should be used by the pickler.  All even integers
1149    # are pickled using persistent ids.
1150
1151    def persistent_id(self, object):
1152        if isinstance(object, int) and object % 2 == 0:
1153            self.id_count += 1
1154            return str(object)
1155        else:
1156            return None
1157
1158    def persistent_load(self, oid):
1159        self.load_count += 1
1160        object = int(oid)
1161        assert object % 2 == 0
1162        return object
1163
1164    def test_persistence(self):
1165        self.id_count = 0
1166        self.load_count = 0
1167        L = range(10)
1168        self.assertEqual(self.loads(self.dumps(L)), L)
1169        self.assertEqual(self.id_count, 5)
1170        self.assertEqual(self.load_count, 5)
1171
1172    def test_bin_persistence(self):
1173        self.id_count = 0
1174        self.load_count = 0
1175        L = range(10)
1176        self.assertEqual(self.loads(self.dumps(L, 1)), L)
1177        self.assertEqual(self.id_count, 5)
1178        self.assertEqual(self.load_count, 5)
1179
1180class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
1181
1182    pickler_class = None
1183    unpickler_class = None
1184
1185    def setUp(self):
1186        assert self.pickler_class
1187        assert self.unpickler_class
1188
1189    def test_clear_pickler_memo(self):
1190        # To test whether clear_memo() has any effect, we pickle an object,
1191        # then pickle it again without clearing the memo; the two serialized
1192        # forms should be different. If we clear_memo() and then pickle the
1193        # object again, the third serialized form should be identical to the
1194        # first one we obtained.
1195        data = ["abcdefg", "abcdefg", 44]
1196        f = cStringIO.StringIO()
1197        pickler = self.pickler_class(f)
1198
1199        pickler.dump(data)
1200        first_pickled = f.getvalue()
1201
1202        # Reset StringIO object.
1203        f.seek(0)
1204        f.truncate()
1205
1206        pickler.dump(data)
1207        second_pickled = f.getvalue()
1208
1209        # Reset the Pickler and StringIO objects.
1210        pickler.clear_memo()
1211        f.seek(0)
1212        f.truncate()
1213
1214        pickler.dump(data)
1215        third_pickled = f.getvalue()
1216
1217        self.assertNotEqual(first_pickled, second_pickled)
1218        self.assertEqual(first_pickled, third_pickled)
1219
1220    def test_priming_pickler_memo(self):
1221        # Verify that we can set the Pickler's memo attribute.
1222        data = ["abcdefg", "abcdefg", 44]
1223        f = cStringIO.StringIO()
1224        pickler = self.pickler_class(f)
1225
1226        pickler.dump(data)
1227        first_pickled = f.getvalue()
1228
1229        f = cStringIO.StringIO()
1230        primed = self.pickler_class(f)
1231        primed.memo = pickler.memo
1232
1233        primed.dump(data)
1234        primed_pickled = f.getvalue()
1235
1236        self.assertNotEqual(first_pickled, primed_pickled)
1237
1238    def test_priming_unpickler_memo(self):
1239        # Verify that we can set the Unpickler's memo attribute.
1240        data = ["abcdefg", "abcdefg", 44]
1241        f = cStringIO.StringIO()
1242        pickler = self.pickler_class(f)
1243
1244        pickler.dump(data)
1245        first_pickled = f.getvalue()
1246
1247        f = cStringIO.StringIO()
1248        primed = self.pickler_class(f)
1249        primed.memo = pickler.memo
1250
1251        primed.dump(data)
1252        primed_pickled = f.getvalue()
1253
1254        unpickler = self.unpickler_class(cStringIO.StringIO(first_pickled))
1255        unpickled_data1 = unpickler.load()
1256
1257        self.assertEqual(unpickled_data1, data)
1258
1259        primed = self.unpickler_class(cStringIO.StringIO(primed_pickled))
1260        primed.memo = unpickler.memo
1261        unpickled_data2 = primed.load()
1262
1263        primed.memo.clear()
1264
1265        self.assertEqual(unpickled_data2, data)
1266        self.assertTrue(unpickled_data2 is unpickled_data1)
1267
1268    def test_reusing_unpickler_objects(self):
1269        data1 = ["abcdefg", "abcdefg", 44]
1270        f = cStringIO.StringIO()
1271        pickler = self.pickler_class(f)
1272        pickler.dump(data1)
1273        pickled1 = f.getvalue()
1274
1275        data2 = ["abcdefg", 44, 44]
1276        f = cStringIO.StringIO()
1277        pickler = self.pickler_class(f)
1278        pickler.dump(data2)
1279        pickled2 = f.getvalue()
1280
1281        f = cStringIO.StringIO()
1282        f.write(pickled1)
1283        f.seek(0)
1284        unpickler = self.unpickler_class(f)
1285        self.assertEqual(unpickler.load(), data1)
1286
1287        f.seek(0)
1288        f.truncate()
1289        f.write(pickled2)
1290        f.seek(0)
1291        self.assertEqual(unpickler.load(), data2)
1292
1293class BigmemPickleTests(unittest.TestCase):
1294
1295    # Memory requirements: 1 byte per character for input strings, 1 byte
1296    # for pickled data, 1 byte for unpickled strings, 1 byte for internal
1297    # buffer and 1 byte of free space for resizing of internal buffer.
1298
1299    @precisionbigmemtest(size=_2G + 100*_1M, memuse=5)
1300    def test_huge_strlist(self, size):
1301        chunksize = 2**20
1302        data = []
1303        while size > chunksize:
1304            data.append('x' * chunksize)
1305            size -= chunksize
1306            chunksize += 1
1307        data.append('y' * size)
1308
1309        try:
1310            for proto in protocols:
1311                try:
1312                    pickled = self.dumps(data, proto)
1313                    res = self.loads(pickled)
1314                    self.assertEqual(res, data)
1315                finally:
1316                    res = None
1317                    pickled = None
1318        finally:
1319            data = None
1320