marshal.c revision 1aa78938b0d019b7c9cbb153c2f35709ee01a19a
1ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
2ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov/* Write Python objects to files and read them back.
3ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   This is primarily intended for writing and reading compiled Python code,
4ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   even though dicts, lists, sets and frozensets, not commonly seen in
5ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   code objects, are supported.
6ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   Version 3 of this protocol properly supports circular links
7ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   and sharing. */
8ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
9ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define PY_SSIZE_T_CLEAN
10ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
11ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#include "Python.h"
12ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#include "longintrepr.h"
13ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#include "code.h"
14ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#include "marshal.h"
15ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#include "../Modules/hashtable.h"
16ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
17ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov/* High water mark to determine when the marshalled object is dangerously deep
18ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov * and risks coring the interpreter.  When the object stack gets this deep,
19ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov * raise an exception instead of continuing.
20ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov * On Windows debug builds, reduce this value.
21ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov */
22ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#if defined(MS_WINDOWS) && defined(_DEBUG)
23ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define MAX_MARSHAL_STACK_DEPTH 1000
24ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#else
25ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define MAX_MARSHAL_STACK_DEPTH 2000
26ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#endif
27ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
28ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_NULL               '0'
29ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_NONE               'N'
30ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_FALSE              'F'
31ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_TRUE               'T'
32ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_STOPITER           'S'
33a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei#define TYPE_ELLIPSIS           '.'
34ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_INT                'i'
35ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_FLOAT              'f'
36ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_BINARY_FLOAT       'g'
37ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_COMPLEX            'x'
38ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_BINARY_COMPLEX     'y'
39ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_LONG               'l'
40ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_STRING             's'
41ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_INTERNED           't'
42ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_REF                'r'
43ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_TUPLE              '('
44ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_LIST               '['
45ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_DICT               '{'
46ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_CODE               'c'
47ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_UNICODE            'u'
48ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_UNKNOWN            '?'
49ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_SET                '<'
50ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_FROZENSET          '>'
51ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define FLAG_REF                '\x80' /* with a type, add obj to index */
52ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
53ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_ASCII              'a'
54ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_ASCII_INTERNED     'A'
55ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_SMALL_TUPLE        ')'
56ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_SHORT_ASCII        'z'
57ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define TYPE_SHORT_ASCII_INTERNED 'Z'
58ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
59ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define WFERR_OK 0
60ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define WFERR_UNMARSHALLABLE 1
61ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define WFERR_NESTEDTOODEEP 2
62ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define WFERR_NOMEMORY 3
63ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
64ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovtypedef struct {
65ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    FILE *fp;
66ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    int error;  /* see WFERR_* values */
67ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    int depth;
68ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    PyObject *str;
69ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    char *ptr;
70ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    char *end;
71ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    char *buf;
72ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    _Py_hashtable_t *hashtable;
73ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    int version;
74ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov} WFILE;
75ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
76ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define w_byte(c, p) do {                               \
77ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
78ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            *(p)->ptr++ = (c);                          \
79ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    } while(0)
80ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
81ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
82ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_flush(WFILE *p)
83ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
84ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    assert(p->fp != NULL);
85ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
86ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    p->ptr = p->buf;
87ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
88ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
89ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic int
90ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_reserve(WFILE *p, Py_ssize_t needed)
91ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
92ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    Py_ssize_t pos, size, delta;
93ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (p->ptr == NULL)
94ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 0; /* An error already occurred */
95ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (p->fp != NULL) {
96ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_flush(p);
97ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return needed <= p->end - p->ptr;
98ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
99ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    assert(p->str != NULL);
100ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    pos = p->ptr - p->buf;
101ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    size = PyBytes_Size(p->str);
102ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (size > 16*1024*1024)
103ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        delta = (size >> 3);            /* 12.5% overallocation */
104ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    else
105ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        delta = size + 1024;
106ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    delta = Py_MAX(delta, needed);
107ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (delta > PY_SSIZE_T_MAX - size) {
108ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->error = WFERR_NOMEMORY;
109ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 0;
110ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
111ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    size += delta;
112ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (_PyBytes_Resize(&p->str, size) != 0) {
113ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->ptr = p->buf = p->end = NULL;
114ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 0;
115ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
116ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    else {
117ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->buf = PyBytes_AS_STRING(p->str);
118ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->ptr = p->buf + pos;
119ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->end = p->buf + size;
120ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 1;
121a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    }
122a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei}
123a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei
124a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyuleistatic void
125a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyuleiw_string(const char *s, Py_ssize_t n, WFILE *p)
126a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei{
127a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    Py_ssize_t m;
128a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    if (!n || p->ptr == NULL)
129a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        return;
130a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    m = p->end - p->ptr;
131a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    if (p->fp != NULL) {
132a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        if (n <= m) {
133a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei            Py_MEMCPY(p->ptr, s, n);
134a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei            p->ptr += n;
135a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        }
136a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        else {
137ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            w_flush(p);
138ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            fwrite(s, 1, n, p->fp);
139ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }
140ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
141ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    else {
142ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        if (n <= m || w_reserve(p, n - m)) {
143ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            Py_MEMCPY(p->ptr, s, n);
144a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei            p->ptr += n;
145ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }
146ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
147ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
148ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
149ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
150ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_short(int x, WFILE *p)
151ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
152ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)( x      & 0xff), p);
153ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)((x>> 8) & 0xff), p);
154ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
155ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
156ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
157ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_long(long x, WFILE *p)
158ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
159ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)( x      & 0xff), p);
160ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)((x>> 8) & 0xff), p);
161ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)((x>>16) & 0xff), p);
162ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((char)((x>>24) & 0xff), p);
163ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
164ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
165ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define SIZE32_MAX  0x7FFFFFFF
166ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
167ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#if SIZEOF_SIZE_T > 4
168ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov# define W_SIZE(n, p)  do {                     \
169ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        if ((n) > SIZE32_MAX) {                 \
170ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            (p)->depth--;                       \
171ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            (p)->error = WFERR_UNMARSHALLABLE;  \
172ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            return;                             \
173ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }                                       \
174ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_long((long)(n), p);                   \
175ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    } while(0)
176ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#else
177ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov# define W_SIZE  w_long
178ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#endif
179ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
180ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
181ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_pstring(const char *s, Py_ssize_t n, WFILE *p)
182ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
183ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        W_SIZE(n, p);
184ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_string(s, n, p);
185ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
186ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
187ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
188ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
189ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
190ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
191ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_string(s, n, p);
192ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov}
193ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
194ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov/* We assume that Python ints are stored internally in base some power of
195ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   2**15; for the sake of portability we'll always read and write them in base
196ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov   exactly 2**15. */
197ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
198ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define PyLong_MARSHAL_SHIFT 15
199ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
200ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
201ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
202ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
203ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#endif
204ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
205ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
206ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov#define W_TYPE(t, p) do { \
207ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_byte((t) | flag, (p)); \
208ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov} while(0)
209ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
210ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovstatic void
211ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganovw_PyLong(const PyLongObject *ob, char flag, WFILE *p)
212ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov{
213ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    Py_ssize_t i, j, n, l;
214ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    digit d;
215ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
216ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    W_TYPE(TYPE_LONG, p);
217ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (Py_SIZE(ob) == 0) {
218ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_long((long)0, p);
219ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return;
220ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
221ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
222ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    /* set l to number of base PyLong_MARSHAL_BASE digits */
223ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    n = Py_ABS(Py_SIZE(ob));
224ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    l = (n-1) * PyLong_MARSHAL_RATIO;
225ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    d = ob->ob_digit[n-1];
226ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    assert(d != 0); /* a PyLong is always normalized */
227ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    do {
228ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        d >>= PyLong_MARSHAL_SHIFT;
229ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        l++;
230ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    } while (d != 0);
231ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    if (l > SIZE32_MAX) {
232ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->depth--;
233ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        p->error = WFERR_UNMARSHALLABLE;
234ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return;
235ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
236ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
237ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov
238ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    for (i=0; i < n-1; i++) {
239ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        d = ob->ob_digit[i];
240ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
241ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            w_short(d & PyLong_MARSHAL_MASK, p);
242ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            d >>= PyLong_MARSHAL_SHIFT;
243ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }
244ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        assert (d == 0);
245ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
246ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    d = ob->ob_digit[n-1];
247ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    do {
248ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_short(d & PyLong_MARSHAL_MASK, p);
249ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        d >>= PyLong_MARSHAL_SHIFT;
250ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    } while (d != 0);
251a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei}
252a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei
253a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyuleistatic int
254a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyuleiw_ref(PyObject *v, char *flag, WFILE *p)
255a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei{
256a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    _Py_hashtable_entry_t *entry;
257a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    int w;
258a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei
259a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    if (p->version < 3 || p->hashtable == NULL)
260a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        return 0; /* not writing object references */
261a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei
262a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    /* if it has only one reference, it definitely isn't shared */
263a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    if (Py_REFCNT(v) == 1)
264a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        return 0;
265a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei
266a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    entry = _Py_hashtable_get_entry(p->hashtable, v);
267a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei    if (entry != NULL) {
268a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        /* write the reference index to the stream */
269a90bfb5f80ff45442ba7806e4c36540ff5fc7577jackqdyulei        _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
270ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        /* we don't store "long" indices in the dict */
271ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        assert(0 <= w && w <= 0x7fffffff);
272ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_byte(TYPE_REF, p);
273ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w_long(w, p);
274ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 1;
275ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    } else {
276ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        size_t s = p->hashtable->entries;
2773f24e69dbed74fd7724c0a4714ce612f1cb5bc5cAndreas Gampe        /* we don't support long indices */
278ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        if (s >= 0x7fffffff) {
279ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            PyErr_SetString(PyExc_ValueError, "too many objects");
280ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            goto err;
281ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }
282ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        w = (int)s;
283ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        Py_INCREF(v);
284ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
285ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            Py_DECREF(v);
286ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov            goto err;
287ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        }
288ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        *flag |= FLAG_REF;
289ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov        return 0;
290ae0e03a9e03de34e37b768b971d7596d7220a053Svet Ganov    }
291err:
292    p->error = WFERR_UNMARSHALLABLE;
293    return 1;
294}
295
296static void
297w_complex_object(PyObject *v, char flag, WFILE *p);
298
299static void
300w_object(PyObject *v, WFILE *p)
301{
302    char flag = '\0';
303
304    p->depth++;
305
306    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
307        p->error = WFERR_NESTEDTOODEEP;
308    }
309    else if (v == NULL) {
310        w_byte(TYPE_NULL, p);
311    }
312    else if (v == Py_None) {
313        w_byte(TYPE_NONE, p);
314    }
315    else if (v == PyExc_StopIteration) {
316        w_byte(TYPE_STOPITER, p);
317    }
318    else if (v == Py_Ellipsis) {
319        w_byte(TYPE_ELLIPSIS, p);
320    }
321    else if (v == Py_False) {
322        w_byte(TYPE_FALSE, p);
323    }
324    else if (v == Py_True) {
325        w_byte(TYPE_TRUE, p);
326    }
327    else if (!w_ref(v, &flag, p))
328        w_complex_object(v, flag, p);
329
330    p->depth--;
331}
332
333static void
334w_complex_object(PyObject *v, char flag, WFILE *p)
335{
336    Py_ssize_t i, n;
337
338    if (PyLong_CheckExact(v)) {
339        long x = PyLong_AsLong(v);
340        if ((x == -1)  && PyErr_Occurred()) {
341            PyLongObject *ob = (PyLongObject *)v;
342            PyErr_Clear();
343            w_PyLong(ob, flag, p);
344        }
345        else {
346#if SIZEOF_LONG > 4
347            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
348            if (y && y != -1) {
349                /* Too large for TYPE_INT */
350                w_PyLong((PyLongObject*)v, flag, p);
351            }
352            else
353#endif
354            {
355                W_TYPE(TYPE_INT, p);
356                w_long(x, p);
357            }
358        }
359    }
360    else if (PyFloat_CheckExact(v)) {
361        if (p->version > 1) {
362            unsigned char buf[8];
363            if (_PyFloat_Pack8(PyFloat_AsDouble(v),
364                               buf, 1) < 0) {
365                p->error = WFERR_UNMARSHALLABLE;
366                return;
367            }
368            W_TYPE(TYPE_BINARY_FLOAT, p);
369            w_string((char*)buf, 8, p);
370        }
371        else {
372            char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
373                                              'g', 17, 0, NULL);
374            if (!buf) {
375                p->error = WFERR_NOMEMORY;
376                return;
377            }
378            n = strlen(buf);
379            W_TYPE(TYPE_FLOAT, p);
380            w_byte((int)n, p);
381            w_string(buf, n, p);
382            PyMem_Free(buf);
383        }
384    }
385    else if (PyComplex_CheckExact(v)) {
386        if (p->version > 1) {
387            unsigned char buf[8];
388            if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
389                               buf, 1) < 0) {
390                p->error = WFERR_UNMARSHALLABLE;
391                return;
392            }
393            W_TYPE(TYPE_BINARY_COMPLEX, p);
394            w_string((char*)buf, 8, p);
395            if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
396                               buf, 1) < 0) {
397                p->error = WFERR_UNMARSHALLABLE;
398                return;
399            }
400            w_string((char*)buf, 8, p);
401        }
402        else {
403            char *buf;
404            W_TYPE(TYPE_COMPLEX, p);
405            buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
406                                        'g', 17, 0, NULL);
407            if (!buf) {
408                p->error = WFERR_NOMEMORY;
409                return;
410            }
411            n = strlen(buf);
412            w_byte((int)n, p);
413            w_string(buf, n, p);
414            PyMem_Free(buf);
415            buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
416                                        'g', 17, 0, NULL);
417            if (!buf) {
418                p->error = WFERR_NOMEMORY;
419                return;
420            }
421            n = strlen(buf);
422            w_byte((int)n, p);
423            w_string(buf, n, p);
424            PyMem_Free(buf);
425        }
426    }
427    else if (PyBytes_CheckExact(v)) {
428        W_TYPE(TYPE_STRING, p);
429        w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
430    }
431    else if (PyUnicode_CheckExact(v)) {
432        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
433            int is_short = PyUnicode_GET_LENGTH(v) < 256;
434            if (is_short) {
435                if (PyUnicode_CHECK_INTERNED(v))
436                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
437                else
438                    W_TYPE(TYPE_SHORT_ASCII, p);
439                w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
440                                PyUnicode_GET_LENGTH(v), p);
441            }
442            else {
443                if (PyUnicode_CHECK_INTERNED(v))
444                    W_TYPE(TYPE_ASCII_INTERNED, p);
445                else
446                    W_TYPE(TYPE_ASCII, p);
447                w_pstring((char *) PyUnicode_1BYTE_DATA(v),
448                          PyUnicode_GET_LENGTH(v), p);
449            }
450        }
451        else {
452            PyObject *utf8;
453            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
454            if (utf8 == NULL) {
455                p->depth--;
456                p->error = WFERR_UNMARSHALLABLE;
457                return;
458            }
459            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
460                W_TYPE(TYPE_INTERNED, p);
461            else
462                W_TYPE(TYPE_UNICODE, p);
463            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
464            Py_DECREF(utf8);
465        }
466    }
467    else if (PyTuple_CheckExact(v)) {
468        n = PyTuple_Size(v);
469        if (p->version >= 4 && n < 256) {
470            W_TYPE(TYPE_SMALL_TUPLE, p);
471            w_byte((unsigned char)n, p);
472        }
473        else {
474            W_TYPE(TYPE_TUPLE, p);
475            W_SIZE(n, p);
476        }
477        for (i = 0; i < n; i++) {
478            w_object(PyTuple_GET_ITEM(v, i), p);
479        }
480    }
481    else if (PyList_CheckExact(v)) {
482        W_TYPE(TYPE_LIST, p);
483        n = PyList_GET_SIZE(v);
484        W_SIZE(n, p);
485        for (i = 0; i < n; i++) {
486            w_object(PyList_GET_ITEM(v, i), p);
487        }
488    }
489    else if (PyDict_CheckExact(v)) {
490        Py_ssize_t pos;
491        PyObject *key, *value;
492        W_TYPE(TYPE_DICT, p);
493        /* This one is NULL object terminated! */
494        pos = 0;
495        while (PyDict_Next(v, &pos, &key, &value)) {
496            w_object(key, p);
497            w_object(value, p);
498        }
499        w_object((PyObject *)NULL, p);
500    }
501    else if (PyAnySet_CheckExact(v)) {
502        PyObject *value, *it;
503
504        if (PyObject_TypeCheck(v, &PySet_Type))
505            W_TYPE(TYPE_SET, p);
506        else
507            W_TYPE(TYPE_FROZENSET, p);
508        n = PyObject_Size(v);
509        if (n == -1) {
510            p->depth--;
511            p->error = WFERR_UNMARSHALLABLE;
512            return;
513        }
514        W_SIZE(n, p);
515        it = PyObject_GetIter(v);
516        if (it == NULL) {
517            p->depth--;
518            p->error = WFERR_UNMARSHALLABLE;
519            return;
520        }
521        while ((value = PyIter_Next(it)) != NULL) {
522            w_object(value, p);
523            Py_DECREF(value);
524        }
525        Py_DECREF(it);
526        if (PyErr_Occurred()) {
527            p->depth--;
528            p->error = WFERR_UNMARSHALLABLE;
529            return;
530        }
531    }
532    else if (PyCode_Check(v)) {
533        PyCodeObject *co = (PyCodeObject *)v;
534        W_TYPE(TYPE_CODE, p);
535        w_long(co->co_argcount, p);
536        w_long(co->co_kwonlyargcount, p);
537        w_long(co->co_nlocals, p);
538        w_long(co->co_stacksize, p);
539        w_long(co->co_flags, p);
540        w_object(co->co_code, p);
541        w_object(co->co_consts, p);
542        w_object(co->co_names, p);
543        w_object(co->co_varnames, p);
544        w_object(co->co_freevars, p);
545        w_object(co->co_cellvars, p);
546        w_object(co->co_filename, p);
547        w_object(co->co_name, p);
548        w_long(co->co_firstlineno, p);
549        w_object(co->co_lnotab, p);
550    }
551    else if (PyObject_CheckBuffer(v)) {
552        /* Write unknown bytes-like objects as a byte string */
553        Py_buffer view;
554        if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
555            w_byte(TYPE_UNKNOWN, p);
556            p->depth--;
557            p->error = WFERR_UNMARSHALLABLE;
558            return;
559        }
560        W_TYPE(TYPE_STRING, p);
561        w_pstring(view.buf, view.len, p);
562        PyBuffer_Release(&view);
563    }
564    else {
565        W_TYPE(TYPE_UNKNOWN, p);
566        p->error = WFERR_UNMARSHALLABLE;
567    }
568}
569
570static int
571w_init_refs(WFILE *wf, int version)
572{
573    if (version >= 3) {
574        wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
575                                          _Py_hashtable_compare_direct);
576        if (wf->hashtable == NULL) {
577            PyErr_NoMemory();
578            return -1;
579        }
580    }
581    return 0;
582}
583
584static int
585w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
586{
587    Py_XDECREF(entry->key);
588    return 0;
589}
590
591static void
592w_clear_refs(WFILE *wf)
593{
594    if (wf->hashtable != NULL) {
595        _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
596        _Py_hashtable_destroy(wf->hashtable);
597    }
598}
599
600/* version currently has no effect for writing ints. */
601void
602PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
603{
604    char buf[4];
605    WFILE wf;
606    memset(&wf, 0, sizeof(wf));
607    wf.fp = fp;
608    wf.ptr = wf.buf = buf;
609    wf.end = wf.ptr + sizeof(buf);
610    wf.error = WFERR_OK;
611    wf.version = version;
612    w_long(x, &wf);
613    w_flush(&wf);
614}
615
616void
617PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
618{
619    char buf[BUFSIZ];
620    WFILE wf;
621    memset(&wf, 0, sizeof(wf));
622    wf.fp = fp;
623    wf.ptr = wf.buf = buf;
624    wf.end = wf.ptr + sizeof(buf);
625    wf.error = WFERR_OK;
626    wf.version = version;
627    if (w_init_refs(&wf, version))
628        return; /* caller mush check PyErr_Occurred() */
629    w_object(x, &wf);
630    w_clear_refs(&wf);
631    w_flush(&wf);
632}
633
634typedef struct {
635    FILE *fp;
636    int depth;
637    PyObject *readable;  /* Stream-like object being read from */
638    PyObject *current_filename;
639    char *ptr;
640    char *end;
641    char *buf;
642    Py_ssize_t buf_size;
643    PyObject *refs;  /* a list */
644} RFILE;
645
646static const char *
647r_string(Py_ssize_t n, RFILE *p)
648{
649    Py_ssize_t read = -1;
650
651    if (p->ptr != NULL) {
652        /* Fast path for loads() */
653        char *res = p->ptr;
654        Py_ssize_t left = p->end - p->ptr;
655        if (left < n) {
656            PyErr_SetString(PyExc_EOFError,
657                            "marshal data too short");
658            return NULL;
659        }
660        p->ptr += n;
661        return res;
662    }
663    if (p->buf == NULL) {
664        p->buf = PyMem_MALLOC(n);
665        if (p->buf == NULL) {
666            PyErr_NoMemory();
667            return NULL;
668        }
669        p->buf_size = n;
670    }
671    else if (p->buf_size < n) {
672        p->buf = PyMem_REALLOC(p->buf, n);
673        if (p->buf == NULL) {
674            PyErr_NoMemory();
675            return NULL;
676        }
677        p->buf_size = n;
678    }
679
680    if (!p->readable) {
681        assert(p->fp != NULL);
682        read = fread(p->buf, 1, n, p->fp);
683    }
684    else {
685        _Py_IDENTIFIER(readinto);
686        PyObject *res, *mview;
687        Py_buffer buf;
688
689        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
690            return NULL;
691        mview = PyMemoryView_FromBuffer(&buf);
692        if (mview == NULL)
693            return NULL;
694
695        res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
696        if (res != NULL) {
697            read = PyNumber_AsSsize_t(res, PyExc_ValueError);
698            Py_DECREF(res);
699        }
700    }
701    if (read != n) {
702        if (!PyErr_Occurred()) {
703            if (read > n)
704                PyErr_Format(PyExc_ValueError,
705                             "read() returned too much data: "
706                             "%zd bytes requested, %zd returned",
707                             n, read);
708            else
709                PyErr_SetString(PyExc_EOFError,
710                                "EOF read where not expected");
711        }
712        return NULL;
713    }
714    return p->buf;
715}
716
717static int
718r_byte(RFILE *p)
719{
720    int c = EOF;
721
722    if (p->ptr != NULL) {
723        if (p->ptr < p->end)
724            c = (unsigned char) *p->ptr++;
725        return c;
726    }
727    if (!p->readable) {
728        assert(p->fp);
729        c = getc(p->fp);
730    }
731    else {
732        const char *ptr = r_string(1, p);
733        if (ptr != NULL)
734            c = *(unsigned char *) ptr;
735    }
736    return c;
737}
738
739static int
740r_short(RFILE *p)
741{
742    short x = -1;
743    const unsigned char *buffer;
744
745    buffer = (const unsigned char *) r_string(2, p);
746    if (buffer != NULL) {
747        x = buffer[0];
748        x |= buffer[1] << 8;
749        /* Sign-extension, in case short greater than 16 bits */
750        x |= -(x & 0x8000);
751    }
752    return x;
753}
754
755static long
756r_long(RFILE *p)
757{
758    long x = -1;
759    const unsigned char *buffer;
760
761    buffer = (const unsigned char *) r_string(4, p);
762    if (buffer != NULL) {
763        x = buffer[0];
764        x |= (long)buffer[1] << 8;
765        x |= (long)buffer[2] << 16;
766        x |= (long)buffer[3] << 24;
767#if SIZEOF_LONG > 4
768        /* Sign extension for 64-bit machines */
769        x |= -(x & 0x80000000L);
770#endif
771    }
772    return x;
773}
774
775static PyObject *
776r_PyLong(RFILE *p)
777{
778    PyLongObject *ob;
779    long n, size, i;
780    int j, md, shorts_in_top_digit;
781    digit d;
782
783    n = r_long(p);
784    if (PyErr_Occurred())
785        return NULL;
786    if (n == 0)
787        return (PyObject *)_PyLong_New(0);
788    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
789        PyErr_SetString(PyExc_ValueError,
790                       "bad marshal data (long size out of range)");
791        return NULL;
792    }
793
794    size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
795    shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
796    ob = _PyLong_New(size);
797    if (ob == NULL)
798        return NULL;
799
800    Py_SIZE(ob) = n > 0 ? size : -size;
801
802    for (i = 0; i < size-1; i++) {
803        d = 0;
804        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
805            md = r_short(p);
806            if (PyErr_Occurred()) {
807                Py_DECREF(ob);
808                return NULL;
809            }
810            if (md < 0 || md > PyLong_MARSHAL_BASE)
811                goto bad_digit;
812            d += (digit)md << j*PyLong_MARSHAL_SHIFT;
813        }
814        ob->ob_digit[i] = d;
815    }
816
817    d = 0;
818    for (j=0; j < shorts_in_top_digit; j++) {
819        md = r_short(p);
820        if (PyErr_Occurred()) {
821            Py_DECREF(ob);
822            return NULL;
823        }
824        if (md < 0 || md > PyLong_MARSHAL_BASE)
825            goto bad_digit;
826        /* topmost marshal digit should be nonzero */
827        if (md == 0 && j == shorts_in_top_digit - 1) {
828            Py_DECREF(ob);
829            PyErr_SetString(PyExc_ValueError,
830                "bad marshal data (unnormalized long data)");
831            return NULL;
832        }
833        d += (digit)md << j*PyLong_MARSHAL_SHIFT;
834    }
835    if (PyErr_Occurred()) {
836        Py_DECREF(ob);
837        return NULL;
838    }
839    /* top digit should be nonzero, else the resulting PyLong won't be
840       normalized */
841    ob->ob_digit[size-1] = d;
842    return (PyObject *)ob;
843  bad_digit:
844    Py_DECREF(ob);
845    PyErr_SetString(PyExc_ValueError,
846                    "bad marshal data (digit out of range in long)");
847    return NULL;
848}
849
850/* allocate the reflist index for a new object. Return -1 on failure */
851static Py_ssize_t
852r_ref_reserve(int flag, RFILE *p)
853{
854    if (flag) { /* currently only FLAG_REF is defined */
855        Py_ssize_t idx = PyList_GET_SIZE(p->refs);
856        if (idx >= 0x7ffffffe) {
857            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
858            return -1;
859        }
860        if (PyList_Append(p->refs, Py_None) < 0)
861            return -1;
862        return idx;
863    } else
864        return 0;
865}
866
867/* insert the new object 'o' to the reflist at previously
868 * allocated index 'idx'.
869 * 'o' can be NULL, in which case nothing is done.
870 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
871 * if 'o' was non-NULL, and the function fails, 'o' is released and
872 * NULL returned. This simplifies error checking at the call site since
873 * a single test for NULL for the function result is enough.
874 */
875static PyObject *
876r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
877{
878    if (o != NULL && flag) { /* currently only FLAG_REF is defined */
879        PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
880        Py_INCREF(o);
881        PyList_SET_ITEM(p->refs, idx, o);
882        Py_DECREF(tmp);
883    }
884    return o;
885}
886
887/* combination of both above, used when an object can be
888 * created whenever it is seen in the file, as opposed to
889 * after having loaded its sub-objects.
890 */
891static PyObject *
892r_ref(PyObject *o, int flag, RFILE *p)
893{
894    assert(flag & FLAG_REF);
895    if (o == NULL)
896        return NULL;
897    if (PyList_Append(p->refs, o) < 0) {
898        Py_DECREF(o); /* release the new object */
899        return NULL;
900    }
901    return o;
902}
903
904static PyObject *
905r_object(RFILE *p)
906{
907    /* NULL is a valid return value, it does not necessarily means that
908       an exception is set. */
909    PyObject *v, *v2;
910    Py_ssize_t idx = 0;
911    long i, n;
912    int type, code = r_byte(p);
913    int flag, is_interned = 0;
914    PyObject *retval = NULL;
915
916    if (code == EOF) {
917        PyErr_SetString(PyExc_EOFError,
918                        "EOF read where object expected");
919        return NULL;
920    }
921
922    p->depth++;
923
924    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
925        p->depth--;
926        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
927        return NULL;
928    }
929
930    flag = code & FLAG_REF;
931    type = code & ~FLAG_REF;
932
933#define R_REF(O) do{\
934    if (flag) \
935        O = r_ref(O, flag, p);\
936} while (0)
937
938    switch (type) {
939
940    case TYPE_NULL:
941        break;
942
943    case TYPE_NONE:
944        Py_INCREF(Py_None);
945        retval = Py_None;
946        break;
947
948    case TYPE_STOPITER:
949        Py_INCREF(PyExc_StopIteration);
950        retval = PyExc_StopIteration;
951        break;
952
953    case TYPE_ELLIPSIS:
954        Py_INCREF(Py_Ellipsis);
955        retval = Py_Ellipsis;
956        break;
957
958    case TYPE_FALSE:
959        Py_INCREF(Py_False);
960        retval = Py_False;
961        break;
962
963    case TYPE_TRUE:
964        Py_INCREF(Py_True);
965        retval = Py_True;
966        break;
967
968    case TYPE_INT:
969        n = r_long(p);
970        retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
971        R_REF(retval);
972        break;
973
974    case TYPE_LONG:
975        retval = r_PyLong(p);
976        R_REF(retval);
977        break;
978
979    case TYPE_FLOAT:
980        {
981            char buf[256];
982            const char *ptr;
983            double dx;
984            n = r_byte(p);
985            if (n == EOF) {
986                PyErr_SetString(PyExc_EOFError,
987                    "EOF read where object expected");
988                break;
989            }
990            ptr = r_string(n, p);
991            if (ptr == NULL)
992                break;
993            memcpy(buf, ptr, n);
994            buf[n] = '\0';
995            dx = PyOS_string_to_double(buf, NULL, NULL);
996            if (dx == -1.0 && PyErr_Occurred())
997                break;
998            retval = PyFloat_FromDouble(dx);
999            R_REF(retval);
1000            break;
1001        }
1002
1003    case TYPE_BINARY_FLOAT:
1004        {
1005            const unsigned char *buf;
1006            double x;
1007            buf = (const unsigned char *) r_string(8, p);
1008            if (buf == NULL)
1009                break;
1010            x = _PyFloat_Unpack8(buf, 1);
1011            if (x == -1.0 && PyErr_Occurred())
1012                break;
1013            retval = PyFloat_FromDouble(x);
1014            R_REF(retval);
1015            break;
1016        }
1017
1018    case TYPE_COMPLEX:
1019        {
1020            char buf[256];
1021            const char *ptr;
1022            Py_complex c;
1023            n = r_byte(p);
1024            if (n == EOF) {
1025                PyErr_SetString(PyExc_EOFError,
1026                    "EOF read where object expected");
1027                break;
1028            }
1029            ptr = r_string(n, p);
1030            if (ptr == NULL)
1031                break;
1032            memcpy(buf, ptr, n);
1033            buf[n] = '\0';
1034            c.real = PyOS_string_to_double(buf, NULL, NULL);
1035            if (c.real == -1.0 && PyErr_Occurred())
1036                break;
1037            n = r_byte(p);
1038            if (n == EOF) {
1039                PyErr_SetString(PyExc_EOFError,
1040                    "EOF read where object expected");
1041                break;
1042            }
1043            ptr = r_string(n, p);
1044            if (ptr == NULL)
1045                break;
1046            memcpy(buf, ptr, n);
1047            buf[n] = '\0';
1048            c.imag = PyOS_string_to_double(buf, NULL, NULL);
1049            if (c.imag == -1.0 && PyErr_Occurred())
1050                break;
1051            retval = PyComplex_FromCComplex(c);
1052            R_REF(retval);
1053            break;
1054        }
1055
1056    case TYPE_BINARY_COMPLEX:
1057        {
1058            const unsigned char *buf;
1059            Py_complex c;
1060            buf = (const unsigned char *) r_string(8, p);
1061            if (buf == NULL)
1062                break;
1063            c.real = _PyFloat_Unpack8(buf, 1);
1064            if (c.real == -1.0 && PyErr_Occurred())
1065                break;
1066            buf = (const unsigned char *) r_string(8, p);
1067            if (buf == NULL)
1068                break;
1069            c.imag = _PyFloat_Unpack8(buf, 1);
1070            if (c.imag == -1.0 && PyErr_Occurred())
1071                break;
1072            retval = PyComplex_FromCComplex(c);
1073            R_REF(retval);
1074            break;
1075        }
1076
1077    case TYPE_STRING:
1078        {
1079            const char *ptr;
1080            n = r_long(p);
1081            if (PyErr_Occurred())
1082                break;
1083            if (n < 0 || n > SIZE32_MAX) {
1084                PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1085                break;
1086            }
1087            v = PyBytes_FromStringAndSize((char *)NULL, n);
1088            if (v == NULL)
1089                break;
1090            ptr = r_string(n, p);
1091            if (ptr == NULL) {
1092                Py_DECREF(v);
1093                break;
1094            }
1095            memcpy(PyBytes_AS_STRING(v), ptr, n);
1096            retval = v;
1097            R_REF(retval);
1098            break;
1099        }
1100
1101    case TYPE_ASCII_INTERNED:
1102        is_interned = 1;
1103    case TYPE_ASCII:
1104        n = r_long(p);
1105        if (PyErr_Occurred())
1106            break;
1107        if (n < 0 || n > SIZE32_MAX) {
1108            PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
1109            break;
1110        }
1111        goto _read_ascii;
1112
1113    case TYPE_SHORT_ASCII_INTERNED:
1114        is_interned = 1;
1115    case TYPE_SHORT_ASCII:
1116        n = r_byte(p);
1117        if (n == EOF) {
1118            PyErr_SetString(PyExc_EOFError,
1119                "EOF read where object expected");
1120            break;
1121        }
1122    _read_ascii:
1123        {
1124            const char *ptr;
1125            ptr = r_string(n, p);
1126            if (ptr == NULL)
1127                break;
1128            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1129            if (v == NULL)
1130                break;
1131            if (is_interned)
1132                PyUnicode_InternInPlace(&v);
1133            retval = v;
1134            R_REF(retval);
1135            break;
1136        }
1137
1138    case TYPE_INTERNED:
1139        is_interned = 1;
1140    case TYPE_UNICODE:
1141        {
1142        const char *buffer;
1143
1144        n = r_long(p);
1145        if (PyErr_Occurred())
1146            break;
1147        if (n < 0 || n > SIZE32_MAX) {
1148            PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
1149            break;
1150        }
1151        if (n != 0) {
1152            buffer = r_string(n, p);
1153            if (buffer == NULL)
1154                break;
1155            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1156        }
1157        else {
1158            v = PyUnicode_New(0, 0);
1159        }
1160        if (v == NULL)
1161            break;
1162        if (is_interned)
1163            PyUnicode_InternInPlace(&v);
1164        retval = v;
1165        R_REF(retval);
1166        break;
1167        }
1168
1169    case TYPE_SMALL_TUPLE:
1170        n = (unsigned char) r_byte(p);
1171        if (PyErr_Occurred())
1172            break;
1173        goto _read_tuple;
1174    case TYPE_TUPLE:
1175        n = r_long(p);
1176        if (PyErr_Occurred())
1177            break;
1178        if (n < 0 || n > SIZE32_MAX) {
1179            PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1180            break;
1181        }
1182    _read_tuple:
1183        v = PyTuple_New(n);
1184        R_REF(v);
1185        if (v == NULL)
1186            break;
1187
1188        for (i = 0; i < n; i++) {
1189            v2 = r_object(p);
1190            if ( v2 == NULL ) {
1191                if (!PyErr_Occurred())
1192                    PyErr_SetString(PyExc_TypeError,
1193                        "NULL object in marshal data for tuple");
1194                Py_DECREF(v);
1195                v = NULL;
1196                break;
1197            }
1198            PyTuple_SET_ITEM(v, i, v2);
1199        }
1200        retval = v;
1201        break;
1202
1203    case TYPE_LIST:
1204        n = r_long(p);
1205        if (PyErr_Occurred())
1206            break;
1207        if (n < 0 || n > SIZE32_MAX) {
1208            PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1209            break;
1210        }
1211        v = PyList_New(n);
1212        R_REF(v);
1213        if (v == NULL)
1214            break;
1215        for (i = 0; i < n; i++) {
1216            v2 = r_object(p);
1217            if ( v2 == NULL ) {
1218                if (!PyErr_Occurred())
1219                    PyErr_SetString(PyExc_TypeError,
1220                        "NULL object in marshal data for list");
1221                Py_DECREF(v);
1222                v = NULL;
1223                break;
1224            }
1225            PyList_SET_ITEM(v, i, v2);
1226        }
1227        retval = v;
1228        break;
1229
1230    case TYPE_DICT:
1231        v = PyDict_New();
1232        R_REF(v);
1233        if (v == NULL)
1234            break;
1235        for (;;) {
1236            PyObject *key, *val;
1237            key = r_object(p);
1238            if (key == NULL)
1239                break;
1240            val = r_object(p);
1241            if (val == NULL) {
1242                Py_DECREF(key);
1243                break;
1244            }
1245            if (PyDict_SetItem(v, key, val) < 0) {
1246                Py_DECREF(key);
1247                Py_DECREF(val);
1248                break;
1249            }
1250            Py_DECREF(key);
1251            Py_DECREF(val);
1252        }
1253        if (PyErr_Occurred()) {
1254            Py_DECREF(v);
1255            v = NULL;
1256        }
1257        retval = v;
1258        break;
1259
1260    case TYPE_SET:
1261    case TYPE_FROZENSET:
1262        n = r_long(p);
1263        if (PyErr_Occurred())
1264            break;
1265        if (n < 0 || n > SIZE32_MAX) {
1266            PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1267            break;
1268        }
1269
1270        if (n == 0 && type == TYPE_FROZENSET) {
1271            /* call frozenset() to get the empty frozenset singleton */
1272            v = PyObject_CallFunction((PyObject*)&PyFrozenSet_Type, NULL);
1273            if (v == NULL)
1274                break;
1275            R_REF(v);
1276            retval = v;
1277        }
1278        else {
1279            v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1280            if (type == TYPE_SET) {
1281                R_REF(v);
1282            } else {
1283                /* must use delayed registration of frozensets because they must
1284                 * be init with a refcount of 1
1285                 */
1286                idx = r_ref_reserve(flag, p);
1287                if (idx < 0)
1288                    Py_CLEAR(v); /* signal error */
1289            }
1290            if (v == NULL)
1291                break;
1292
1293            for (i = 0; i < n; i++) {
1294                v2 = r_object(p);
1295                if ( v2 == NULL ) {
1296                    if (!PyErr_Occurred())
1297                        PyErr_SetString(PyExc_TypeError,
1298                            "NULL object in marshal data for set");
1299                    Py_DECREF(v);
1300                    v = NULL;
1301                    break;
1302                }
1303                if (PySet_Add(v, v2) == -1) {
1304                    Py_DECREF(v);
1305                    Py_DECREF(v2);
1306                    v = NULL;
1307                    break;
1308                }
1309                Py_DECREF(v2);
1310            }
1311            if (type != TYPE_SET)
1312                v = r_ref_insert(v, idx, flag, p);
1313            retval = v;
1314        }
1315        break;
1316
1317    case TYPE_CODE:
1318        {
1319            int argcount;
1320            int kwonlyargcount;
1321            int nlocals;
1322            int stacksize;
1323            int flags;
1324            PyObject *code = NULL;
1325            PyObject *consts = NULL;
1326            PyObject *names = NULL;
1327            PyObject *varnames = NULL;
1328            PyObject *freevars = NULL;
1329            PyObject *cellvars = NULL;
1330            PyObject *filename = NULL;
1331            PyObject *name = NULL;
1332            int firstlineno;
1333            PyObject *lnotab = NULL;
1334
1335            idx = r_ref_reserve(flag, p);
1336            if (idx < 0)
1337                break;
1338
1339            v = NULL;
1340
1341            /* XXX ignore long->int overflows for now */
1342            argcount = (int)r_long(p);
1343            if (PyErr_Occurred())
1344                goto code_error;
1345            kwonlyargcount = (int)r_long(p);
1346            if (PyErr_Occurred())
1347                goto code_error;
1348            nlocals = (int)r_long(p);
1349            if (PyErr_Occurred())
1350                goto code_error;
1351            stacksize = (int)r_long(p);
1352            if (PyErr_Occurred())
1353                goto code_error;
1354            flags = (int)r_long(p);
1355            if (PyErr_Occurred())
1356                goto code_error;
1357            code = r_object(p);
1358            if (code == NULL)
1359                goto code_error;
1360            consts = r_object(p);
1361            if (consts == NULL)
1362                goto code_error;
1363            names = r_object(p);
1364            if (names == NULL)
1365                goto code_error;
1366            varnames = r_object(p);
1367            if (varnames == NULL)
1368                goto code_error;
1369            freevars = r_object(p);
1370            if (freevars == NULL)
1371                goto code_error;
1372            cellvars = r_object(p);
1373            if (cellvars == NULL)
1374                goto code_error;
1375            filename = r_object(p);
1376            if (filename == NULL)
1377                goto code_error;
1378            if (PyUnicode_CheckExact(filename)) {
1379                if (p->current_filename != NULL) {
1380                    if (!PyUnicode_Compare(filename, p->current_filename)) {
1381                        Py_DECREF(filename);
1382                        Py_INCREF(p->current_filename);
1383                        filename = p->current_filename;
1384                    }
1385                }
1386                else {
1387                    p->current_filename = filename;
1388                }
1389            }
1390            name = r_object(p);
1391            if (name == NULL)
1392                goto code_error;
1393            firstlineno = (int)r_long(p);
1394            if (firstlineno == -1 && PyErr_Occurred())
1395                break;
1396            lnotab = r_object(p);
1397            if (lnotab == NULL)
1398                goto code_error;
1399
1400            v = (PyObject *) PyCode_New(
1401                            argcount, kwonlyargcount,
1402                            nlocals, stacksize, flags,
1403                            code, consts, names, varnames,
1404                            freevars, cellvars, filename, name,
1405                            firstlineno, lnotab);
1406            v = r_ref_insert(v, idx, flag, p);
1407
1408          code_error:
1409            Py_XDECREF(code);
1410            Py_XDECREF(consts);
1411            Py_XDECREF(names);
1412            Py_XDECREF(varnames);
1413            Py_XDECREF(freevars);
1414            Py_XDECREF(cellvars);
1415            Py_XDECREF(filename);
1416            Py_XDECREF(name);
1417            Py_XDECREF(lnotab);
1418        }
1419        retval = v;
1420        break;
1421
1422    case TYPE_REF:
1423        n = r_long(p);
1424        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1425            if (n == -1 && PyErr_Occurred())
1426                break;
1427            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1428            break;
1429        }
1430        v = PyList_GET_ITEM(p->refs, n);
1431        if (v == Py_None) {
1432            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1433            break;
1434        }
1435        Py_INCREF(v);
1436        retval = v;
1437        break;
1438
1439    default:
1440        /* Bogus data got written, which isn't ideal.
1441           This will let you keep working and recover. */
1442        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1443        break;
1444
1445    }
1446    p->depth--;
1447    return retval;
1448}
1449
1450static PyObject *
1451read_object(RFILE *p)
1452{
1453    PyObject *v;
1454    if (PyErr_Occurred()) {
1455        fprintf(stderr, "XXX readobject called with exception set\n");
1456        return NULL;
1457    }
1458    v = r_object(p);
1459    if (v == NULL && !PyErr_Occurred())
1460        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1461    return v;
1462}
1463
1464int
1465PyMarshal_ReadShortFromFile(FILE *fp)
1466{
1467    RFILE rf;
1468    int res;
1469    assert(fp);
1470    rf.readable = NULL;
1471    rf.fp = fp;
1472    rf.current_filename = NULL;
1473    rf.end = rf.ptr = NULL;
1474    rf.buf = NULL;
1475    res = r_short(&rf);
1476    if (rf.buf != NULL)
1477        PyMem_FREE(rf.buf);
1478    return res;
1479}
1480
1481long
1482PyMarshal_ReadLongFromFile(FILE *fp)
1483{
1484    RFILE rf;
1485    long res;
1486    rf.fp = fp;
1487    rf.readable = NULL;
1488    rf.current_filename = NULL;
1489    rf.ptr = rf.end = NULL;
1490    rf.buf = NULL;
1491    res = r_long(&rf);
1492    if (rf.buf != NULL)
1493        PyMem_FREE(rf.buf);
1494    return res;
1495}
1496
1497/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1498static off_t
1499getfilesize(FILE *fp)
1500{
1501    struct _Py_stat_struct st;
1502    if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1503        return -1;
1504#if SIZEOF_OFF_T == 4
1505    else if (st.st_size >= INT_MAX)
1506        return (off_t)INT_MAX;
1507#endif
1508    else
1509        return (off_t)st.st_size;
1510}
1511
1512/* If we can get the size of the file up-front, and it's reasonably small,
1513 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1514 * than reading a byte at a time from file; speeds .pyc imports.
1515 * CAUTION:  since this may read the entire remainder of the file, don't
1516 * call it unless you know you're done with the file.
1517 */
1518PyObject *
1519PyMarshal_ReadLastObjectFromFile(FILE *fp)
1520{
1521/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1522#define REASONABLE_FILE_LIMIT (1L << 18)
1523    off_t filesize;
1524    filesize = getfilesize(fp);
1525    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1526        char* pBuf = (char *)PyMem_MALLOC(filesize);
1527        if (pBuf != NULL) {
1528            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1529            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1530            PyMem_FREE(pBuf);
1531            return v;
1532        }
1533
1534    }
1535    /* We don't have fstat, or we do but the file is larger than
1536     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1537     */
1538    return PyMarshal_ReadObjectFromFile(fp);
1539
1540#undef REASONABLE_FILE_LIMIT
1541}
1542
1543PyObject *
1544PyMarshal_ReadObjectFromFile(FILE *fp)
1545{
1546    RFILE rf;
1547    PyObject *result;
1548    rf.fp = fp;
1549    rf.readable = NULL;
1550    rf.current_filename = NULL;
1551    rf.depth = 0;
1552    rf.ptr = rf.end = NULL;
1553    rf.buf = NULL;
1554    rf.refs = PyList_New(0);
1555    if (rf.refs == NULL)
1556        return NULL;
1557    result = r_object(&rf);
1558    Py_DECREF(rf.refs);
1559    if (rf.buf != NULL)
1560        PyMem_FREE(rf.buf);
1561    return result;
1562}
1563
1564PyObject *
1565PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1566{
1567    RFILE rf;
1568    PyObject *result;
1569    rf.fp = NULL;
1570    rf.readable = NULL;
1571    rf.current_filename = NULL;
1572    rf.ptr = (char *)str;
1573    rf.end = (char *)str + len;
1574    rf.buf = NULL;
1575    rf.depth = 0;
1576    rf.refs = PyList_New(0);
1577    if (rf.refs == NULL)
1578        return NULL;
1579    result = r_object(&rf);
1580    Py_DECREF(rf.refs);
1581    if (rf.buf != NULL)
1582        PyMem_FREE(rf.buf);
1583    return result;
1584}
1585
1586PyObject *
1587PyMarshal_WriteObjectToString(PyObject *x, int version)
1588{
1589    WFILE wf;
1590
1591    memset(&wf, 0, sizeof(wf));
1592    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1593    if (wf.str == NULL)
1594        return NULL;
1595    wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1596    wf.end = wf.ptr + PyBytes_Size(wf.str);
1597    wf.error = WFERR_OK;
1598    wf.version = version;
1599    if (w_init_refs(&wf, version)) {
1600        Py_DECREF(wf.str);
1601        return NULL;
1602    }
1603    w_object(x, &wf);
1604    w_clear_refs(&wf);
1605    if (wf.str != NULL) {
1606        char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1607        if (wf.ptr - base > PY_SSIZE_T_MAX) {
1608            Py_DECREF(wf.str);
1609            PyErr_SetString(PyExc_OverflowError,
1610                            "too much marshal data for a string");
1611            return NULL;
1612        }
1613        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1614            return NULL;
1615    }
1616    if (wf.error != WFERR_OK) {
1617        Py_XDECREF(wf.str);
1618        if (wf.error == WFERR_NOMEMORY)
1619            PyErr_NoMemory();
1620        else
1621            PyErr_SetString(PyExc_ValueError,
1622              (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1623               :"object too deeply nested to marshal");
1624        return NULL;
1625    }
1626    return wf.str;
1627}
1628
1629/* And an interface for Python programs... */
1630
1631static PyObject *
1632marshal_dump(PyObject *self, PyObject *args)
1633{
1634    /* XXX Quick hack -- need to do this differently */
1635    PyObject *x;
1636    PyObject *f;
1637    int version = Py_MARSHAL_VERSION;
1638    PyObject *s;
1639    PyObject *res;
1640    _Py_IDENTIFIER(write);
1641
1642    if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1643        return NULL;
1644    s = PyMarshal_WriteObjectToString(x, version);
1645    if (s == NULL)
1646        return NULL;
1647    res = _PyObject_CallMethodId(f, &PyId_write, "O", s);
1648    Py_DECREF(s);
1649    return res;
1650}
1651
1652PyDoc_STRVAR(dump_doc,
1653"dump(value, file[, version])\n\
1654\n\
1655Write the value on the open file. The value must be a supported type.\n\
1656The file must be an open file object such as sys.stdout or returned by\n\
1657open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1658\n\
1659If the value has (or contains an object that has) an unsupported type, a\n\
1660ValueError exception is raised - but garbage data will also be written\n\
1661to the file. The object will not be properly read back by load()\n\
1662\n\
1663The version argument indicates the data format that dump should use.");
1664
1665static PyObject *
1666marshal_load(PyObject *self, PyObject *f)
1667{
1668    PyObject *data, *result;
1669    _Py_IDENTIFIER(read);
1670    RFILE rf;
1671
1672    /*
1673     * Make a call to the read method, but read zero bytes.
1674     * This is to ensure that the object passed in at least
1675     * has a read method which returns bytes.
1676     * This can be removed if we guarantee good error handling
1677     * for r_string()
1678     */
1679    data = _PyObject_CallMethodId(f, &PyId_read, "i", 0);
1680    if (data == NULL)
1681        return NULL;
1682    if (!PyBytes_Check(data)) {
1683        PyErr_Format(PyExc_TypeError,
1684                     "f.read() returned not bytes but %.100s",
1685                     data->ob_type->tp_name);
1686        result = NULL;
1687    }
1688    else {
1689        rf.depth = 0;
1690        rf.fp = NULL;
1691        rf.readable = f;
1692        rf.current_filename = NULL;
1693        rf.ptr = rf.end = NULL;
1694        rf.buf = NULL;
1695        if ((rf.refs = PyList_New(0)) != NULL) {
1696            result = read_object(&rf);
1697            Py_DECREF(rf.refs);
1698            if (rf.buf != NULL)
1699                PyMem_FREE(rf.buf);
1700        } else
1701            result = NULL;
1702    }
1703    Py_DECREF(data);
1704    return result;
1705}
1706
1707PyDoc_STRVAR(load_doc,
1708"load(file)\n\
1709\n\
1710Read one value from the open file and return it. If no valid value is\n\
1711read (e.g. because the data has a different Python version's\n\
1712incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1713The file must be an open file object opened in binary mode ('rb' or\n\
1714'r+b').\n\
1715\n\
1716Note: If an object containing an unsupported type was marshalled with\n\
1717dump(), load() will substitute None for the unmarshallable type.");
1718
1719
1720static PyObject *
1721marshal_dumps(PyObject *self, PyObject *args)
1722{
1723    PyObject *x;
1724    int version = Py_MARSHAL_VERSION;
1725    if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1726        return NULL;
1727    return PyMarshal_WriteObjectToString(x, version);
1728}
1729
1730PyDoc_STRVAR(dumps_doc,
1731"dumps(value[, version])\n\
1732\n\
1733Return the string that would be written to a file by dump(value, file).\n\
1734The value must be a supported type. Raise a ValueError exception if\n\
1735value has (or contains an object that has) an unsupported type.\n\
1736\n\
1737The version argument indicates the data format that dumps should use.");
1738
1739
1740static PyObject *
1741marshal_loads(PyObject *self, PyObject *args)
1742{
1743    RFILE rf;
1744    Py_buffer p;
1745    char *s;
1746    Py_ssize_t n;
1747    PyObject* result;
1748    if (!PyArg_ParseTuple(args, "y*:loads", &p))
1749        return NULL;
1750    s = p.buf;
1751    n = p.len;
1752    rf.fp = NULL;
1753    rf.readable = NULL;
1754    rf.current_filename = NULL;
1755    rf.ptr = s;
1756    rf.end = s + n;
1757    rf.depth = 0;
1758    if ((rf.refs = PyList_New(0)) == NULL)
1759        return NULL;
1760    result = read_object(&rf);
1761    PyBuffer_Release(&p);
1762    Py_DECREF(rf.refs);
1763    return result;
1764}
1765
1766PyDoc_STRVAR(loads_doc,
1767"loads(bytes)\n\
1768\n\
1769Convert the bytes object to a value. If no valid value is found, raise\n\
1770EOFError, ValueError or TypeError. Extra characters in the input are\n\
1771ignored.");
1772
1773static PyMethodDef marshal_methods[] = {
1774    {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
1775    {"load",            marshal_load,   METH_O,         load_doc},
1776    {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
1777    {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
1778    {NULL,              NULL}           /* sentinel */
1779};
1780
1781
1782PyDoc_STRVAR(module_doc,
1783"This module contains functions that can read and write Python values in\n\
1784a binary format. The format is specific to Python, but independent of\n\
1785machine architecture issues.\n\
1786\n\
1787Not all Python object types are supported; in general, only objects\n\
1788whose value is independent from a particular invocation of Python can be\n\
1789written and read by this module. The following types are supported:\n\
1790None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1791tuples, lists, sets, dictionaries, and code objects, where it\n\
1792should be understood that tuples, lists and dictionaries are only\n\
1793supported as long as the values contained therein are themselves\n\
1794supported; and recursive lists and dictionaries should not be written\n\
1795(they will cause infinite loops).\n\
1796\n\
1797Variables:\n\
1798\n\
1799version -- indicates the format that the module uses. Version 0 is the\n\
1800    historical format, version 1 shares interned strings and version 2\n\
1801    uses a binary format for floating point numbers.\n\
1802    Version 3 shares common object references (New in version 3.4).\n\
1803\n\
1804Functions:\n\
1805\n\
1806dump() -- write value to a file\n\
1807load() -- read value from a file\n\
1808dumps() -- write value to a string\n\
1809loads() -- read value from a string");
1810
1811
1812
1813static struct PyModuleDef marshalmodule = {
1814    PyModuleDef_HEAD_INIT,
1815    "marshal",
1816    module_doc,
1817    0,
1818    marshal_methods,
1819    NULL,
1820    NULL,
1821    NULL,
1822    NULL
1823};
1824
1825PyMODINIT_FUNC
1826PyMarshal_Init(void)
1827{
1828    PyObject *mod = PyModule_Create(&marshalmodule);
1829    if (mod == NULL)
1830        return NULL;
1831    PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1832    return mod;
1833}
1834