marshal.c revision 2527796a22404d5b8cb0e498a965c6b4a743caac
1
2/* Write Python objects to files and read them back.
3   This is intended for writing and reading compiled Python code only;
4   a true persistent storage facility would be much harder, since
5   it would have to take circular links and sharing into account. */
6
7#define PY_SSIZE_T_CLEAN
8
9#include "Python.h"
10#include "longintrepr.h"
11#include "code.h"
12#include "marshal.h"
13
14#define ABS(x) ((x) < 0 ? -(x) : (x))
15
16/* High water mark to determine when the marshalled object is dangerously deep
17 * and risks coring the interpreter.  When the object stack gets this deep,
18 * raise an exception instead of continuing.
19 * On Windows debug builds, reduce this value.
20 */
21#if defined(MS_WINDOWS) && defined(_DEBUG)
22#define MAX_MARSHAL_STACK_DEPTH 1500
23#else
24#define MAX_MARSHAL_STACK_DEPTH 2000
25#endif
26
27#define TYPE_NULL               '0'
28#define TYPE_NONE               'N'
29#define TYPE_FALSE              'F'
30#define TYPE_TRUE               'T'
31#define TYPE_STOPITER           'S'
32#define TYPE_ELLIPSIS           '.'
33#define TYPE_INT                'i'
34#define TYPE_INT64              'I'
35#define TYPE_FLOAT              'f'
36#define TYPE_BINARY_FLOAT       'g'
37#define TYPE_COMPLEX            'x'
38#define TYPE_BINARY_COMPLEX     'y'
39#define TYPE_LONG               'l'
40#define TYPE_STRING             's'
41#define TYPE_TUPLE              '('
42#define TYPE_LIST               '['
43#define TYPE_DICT               '{'
44#define TYPE_CODE               'c'
45#define TYPE_UNICODE            'u'
46#define TYPE_UNKNOWN            '?'
47#define TYPE_SET                '<'
48#define TYPE_FROZENSET          '>'
49
50#define WFERR_OK 0
51#define WFERR_UNMARSHALLABLE 1
52#define WFERR_NESTEDTOODEEP 2
53#define WFERR_NOMEMORY 3
54
55typedef struct {
56    FILE *fp;
57    int error;  /* see WFERR_* values */
58    int depth;
59    /* If fp == NULL, the following are valid: */
60    PyObject *readable;    /* Stream-like object being read from */
61    PyObject *str;
62    PyObject *current_filename;
63    char *ptr;
64    char *end;
65    int version;
66} WFILE;
67
68#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
69                      else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
70                           else w_more(c, p)
71
72static void
73w_more(int c, WFILE *p)
74{
75    Py_ssize_t size, newsize;
76    if (p->str == NULL)
77        return; /* An error already occurred */
78    size = PyBytes_Size(p->str);
79    newsize = size + size + 1024;
80    if (newsize > 32*1024*1024) {
81        newsize = size + (size >> 3);           /* 12.5% overallocation */
82    }
83    if (_PyBytes_Resize(&p->str, newsize) != 0) {
84        p->ptr = p->end = NULL;
85    }
86    else {
87        p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size;
88        p->end =
89            PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize;
90        *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
91    }
92}
93
94static void
95w_string(char *s, int n, WFILE *p)
96{
97    if (p->fp != NULL) {
98        fwrite(s, 1, n, p->fp);
99    }
100    else {
101        while (--n >= 0) {
102            w_byte(*s, p);
103            s++;
104        }
105    }
106}
107
108static void
109w_short(int x, WFILE *p)
110{
111    w_byte((char)( x      & 0xff), p);
112    w_byte((char)((x>> 8) & 0xff), p);
113}
114
115static void
116w_long(long x, WFILE *p)
117{
118    w_byte((char)( x      & 0xff), p);
119    w_byte((char)((x>> 8) & 0xff), p);
120    w_byte((char)((x>>16) & 0xff), p);
121    w_byte((char)((x>>24) & 0xff), p);
122}
123
124#if SIZEOF_LONG > 4
125static void
126w_long64(long x, WFILE *p)
127{
128    w_long(x, p);
129    w_long(x>>32, p);
130}
131#endif
132
133/* We assume that Python longs are stored internally in base some power of
134   2**15; for the sake of portability we'll always read and write them in base
135   exactly 2**15. */
136
137#define PyLong_MARSHAL_SHIFT 15
138#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
139#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
140#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
141#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
142#endif
143#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
144
145static void
146w_PyLong(const PyLongObject *ob, WFILE *p)
147{
148    Py_ssize_t i, j, n, l;
149    digit d;
150
151    w_byte(TYPE_LONG, p);
152    if (Py_SIZE(ob) == 0) {
153        w_long((long)0, p);
154        return;
155    }
156
157    /* set l to number of base PyLong_MARSHAL_BASE digits */
158    n = ABS(Py_SIZE(ob));
159    l = (n-1) * PyLong_MARSHAL_RATIO;
160    d = ob->ob_digit[n-1];
161    assert(d != 0); /* a PyLong is always normalized */
162    do {
163        d >>= PyLong_MARSHAL_SHIFT;
164        l++;
165    } while (d != 0);
166    w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
167
168    for (i=0; i < n-1; i++) {
169        d = ob->ob_digit[i];
170        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
171            w_short(d & PyLong_MARSHAL_MASK, p);
172            d >>= PyLong_MARSHAL_SHIFT;
173        }
174        assert (d == 0);
175    }
176    d = ob->ob_digit[n-1];
177    do {
178        w_short(d & PyLong_MARSHAL_MASK, p);
179        d >>= PyLong_MARSHAL_SHIFT;
180    } while (d != 0);
181}
182
183static void
184w_object(PyObject *v, WFILE *p)
185{
186    Py_ssize_t i, n;
187
188    p->depth++;
189
190    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
191        p->error = WFERR_NESTEDTOODEEP;
192    }
193    else if (v == NULL) {
194        w_byte(TYPE_NULL, p);
195    }
196    else if (v == Py_None) {
197        w_byte(TYPE_NONE, p);
198    }
199    else if (v == PyExc_StopIteration) {
200        w_byte(TYPE_STOPITER, p);
201    }
202    else if (v == Py_Ellipsis) {
203        w_byte(TYPE_ELLIPSIS, p);
204    }
205    else if (v == Py_False) {
206        w_byte(TYPE_FALSE, p);
207    }
208    else if (v == Py_True) {
209        w_byte(TYPE_TRUE, p);
210    }
211    else if (PyLong_CheckExact(v)) {
212        long x = PyLong_AsLong(v);
213        if ((x == -1)  && PyErr_Occurred()) {
214            PyLongObject *ob = (PyLongObject *)v;
215            PyErr_Clear();
216            w_PyLong(ob, p);
217        }
218        else {
219#if SIZEOF_LONG > 4
220            long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
221            if (y && y != -1) {
222                w_byte(TYPE_INT64, p);
223                w_long64(x, p);
224            }
225            else
226#endif
227            {
228                w_byte(TYPE_INT, p);
229                w_long(x, p);
230            }
231        }
232    }
233    else if (PyFloat_CheckExact(v)) {
234        if (p->version > 1) {
235            unsigned char buf[8];
236            if (_PyFloat_Pack8(PyFloat_AsDouble(v),
237                               buf, 1) < 0) {
238                p->error = WFERR_UNMARSHALLABLE;
239                return;
240            }
241            w_byte(TYPE_BINARY_FLOAT, p);
242            w_string((char*)buf, 8, p);
243        }
244        else {
245            char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
246                                              'g', 17, 0, NULL);
247            if (!buf) {
248                p->error = WFERR_NOMEMORY;
249                return;
250            }
251            n = strlen(buf);
252            w_byte(TYPE_FLOAT, p);
253            w_byte((int)n, p);
254            w_string(buf, (int)n, p);
255            PyMem_Free(buf);
256        }
257    }
258    else if (PyComplex_CheckExact(v)) {
259        if (p->version > 1) {
260            unsigned char buf[8];
261            if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
262                               buf, 1) < 0) {
263                p->error = WFERR_UNMARSHALLABLE;
264                return;
265            }
266            w_byte(TYPE_BINARY_COMPLEX, p);
267            w_string((char*)buf, 8, p);
268            if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
269                               buf, 1) < 0) {
270                p->error = WFERR_UNMARSHALLABLE;
271                return;
272            }
273            w_string((char*)buf, 8, p);
274        }
275        else {
276            char *buf;
277            w_byte(TYPE_COMPLEX, p);
278            buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
279                                        'g', 17, 0, NULL);
280            if (!buf) {
281                p->error = WFERR_NOMEMORY;
282                return;
283            }
284            n = strlen(buf);
285            w_byte((int)n, p);
286            w_string(buf, (int)n, p);
287            PyMem_Free(buf);
288            buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
289                                        'g', 17, 0, NULL);
290            if (!buf) {
291                p->error = WFERR_NOMEMORY;
292                return;
293            }
294            n = strlen(buf);
295            w_byte((int)n, p);
296            w_string(buf, (int)n, p);
297            PyMem_Free(buf);
298        }
299    }
300    else if (PyBytes_CheckExact(v)) {
301        w_byte(TYPE_STRING, p);
302        n = PyBytes_GET_SIZE(v);
303        if (n > INT_MAX) {
304            /* huge strings are not supported */
305            p->depth--;
306            p->error = WFERR_UNMARSHALLABLE;
307            return;
308        }
309        w_long((long)n, p);
310        w_string(PyBytes_AS_STRING(v), (int)n, p);
311    }
312    else if (PyUnicode_CheckExact(v)) {
313        PyObject *utf8;
314        utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
315                                    PyUnicode_GET_SIZE(v),
316                                    "surrogatepass");
317        if (utf8 == NULL) {
318            p->depth--;
319            p->error = WFERR_UNMARSHALLABLE;
320            return;
321        }
322        w_byte(TYPE_UNICODE, p);
323        n = PyBytes_GET_SIZE(utf8);
324        if (n > INT_MAX) {
325            p->depth--;
326            p->error = WFERR_UNMARSHALLABLE;
327            return;
328        }
329        w_long((long)n, p);
330        w_string(PyBytes_AS_STRING(utf8), (int)n, p);
331        Py_DECREF(utf8);
332    }
333    else if (PyTuple_CheckExact(v)) {
334        w_byte(TYPE_TUPLE, p);
335        n = PyTuple_Size(v);
336        w_long((long)n, p);
337        for (i = 0; i < n; i++) {
338            w_object(PyTuple_GET_ITEM(v, i), p);
339        }
340    }
341    else if (PyList_CheckExact(v)) {
342        w_byte(TYPE_LIST, p);
343        n = PyList_GET_SIZE(v);
344        w_long((long)n, p);
345        for (i = 0; i < n; i++) {
346            w_object(PyList_GET_ITEM(v, i), p);
347        }
348    }
349    else if (PyDict_CheckExact(v)) {
350        Py_ssize_t pos;
351        PyObject *key, *value;
352        w_byte(TYPE_DICT, p);
353        /* This one is NULL object terminated! */
354        pos = 0;
355        while (PyDict_Next(v, &pos, &key, &value)) {
356            w_object(key, p);
357            w_object(value, p);
358        }
359        w_object((PyObject *)NULL, p);
360    }
361    else if (PyAnySet_CheckExact(v)) {
362        PyObject *value, *it;
363
364        if (PyObject_TypeCheck(v, &PySet_Type))
365            w_byte(TYPE_SET, p);
366        else
367            w_byte(TYPE_FROZENSET, p);
368        n = PyObject_Size(v);
369        if (n == -1) {
370            p->depth--;
371            p->error = WFERR_UNMARSHALLABLE;
372            return;
373        }
374        w_long((long)n, p);
375        it = PyObject_GetIter(v);
376        if (it == NULL) {
377            p->depth--;
378            p->error = WFERR_UNMARSHALLABLE;
379            return;
380        }
381        while ((value = PyIter_Next(it)) != NULL) {
382            w_object(value, p);
383            Py_DECREF(value);
384        }
385        Py_DECREF(it);
386        if (PyErr_Occurred()) {
387            p->depth--;
388            p->error = WFERR_UNMARSHALLABLE;
389            return;
390        }
391    }
392    else if (PyCode_Check(v)) {
393        PyCodeObject *co = (PyCodeObject *)v;
394        w_byte(TYPE_CODE, p);
395        w_long(co->co_argcount, p);
396        w_long(co->co_kwonlyargcount, p);
397        w_long(co->co_nlocals, p);
398        w_long(co->co_stacksize, p);
399        w_long(co->co_flags, p);
400        w_object(co->co_code, p);
401        w_object(co->co_consts, p);
402        w_object(co->co_names, p);
403        w_object(co->co_varnames, p);
404        w_object(co->co_freevars, p);
405        w_object(co->co_cellvars, p);
406        w_object(co->co_filename, p);
407        w_object(co->co_name, p);
408        w_long(co->co_firstlineno, p);
409        w_object(co->co_lnotab, p);
410    }
411    else if (PyObject_CheckBuffer(v)) {
412        /* Write unknown buffer-style objects as a string */
413        char *s;
414        PyBufferProcs *pb = v->ob_type->tp_as_buffer;
415        Py_buffer view;
416        if ((*pb->bf_getbuffer)(v, &view, PyBUF_SIMPLE) != 0) {
417            w_byte(TYPE_UNKNOWN, p);
418            p->error = WFERR_UNMARSHALLABLE;
419        }
420        w_byte(TYPE_STRING, p);
421        n = view.len;
422        s = view.buf;
423        if (n > INT_MAX) {
424            p->depth--;
425            p->error = WFERR_UNMARSHALLABLE;
426            return;
427        }
428        w_long((long)n, p);
429        w_string(s, (int)n, p);
430        if (pb->bf_releasebuffer != NULL)
431            (*pb->bf_releasebuffer)(v, &view);
432    }
433    else {
434        w_byte(TYPE_UNKNOWN, p);
435        p->error = WFERR_UNMARSHALLABLE;
436    }
437    p->depth--;
438}
439
440/* version currently has no effect for writing longs. */
441void
442PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
443{
444    WFILE wf;
445    wf.fp = fp;
446    wf.error = WFERR_OK;
447    wf.depth = 0;
448    wf.version = version;
449    w_long(x, &wf);
450}
451
452void
453PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
454{
455    WFILE wf;
456    wf.fp = fp;
457    wf.error = WFERR_OK;
458    wf.depth = 0;
459    wf.version = version;
460    w_object(x, &wf);
461}
462
463typedef WFILE RFILE; /* Same struct with different invariants */
464
465#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
466
467static int
468r_string(char *s, int n, RFILE *p)
469{
470    char *ptr;
471    int read, left;
472
473    if (!p->readable) {
474        if (p->fp != NULL)
475            /* The result fits into int because it must be <=n. */
476            read = (int) fread(s, 1, n, p->fp);
477        else {
478            left = (int)(p->end - p->ptr);
479            read = (left < n) ? left : n;
480            memcpy(s, p->ptr, read);
481            p->ptr += read;
482        }
483    }
484    else {
485        PyObject *data = PyObject_CallMethod(p->readable, "read", "i", n);
486        read = 0;
487        if (data != NULL) {
488            if (!PyBytes_Check(data)) {
489                PyErr_Format(PyExc_TypeError,
490                             "f.read() returned not bytes but %.100s",
491                             data->ob_type->tp_name);
492            }
493            else {
494                read = PyBytes_GET_SIZE(data);
495                if (read > 0) {
496                    ptr = PyBytes_AS_STRING(data);
497                    memcpy(s, ptr, read);
498                }
499            }
500            Py_DECREF(data);
501        }
502    }
503    if (!PyErr_Occurred() && (read < n)) {
504        PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
505    }
506    return read;
507}
508
509
510static int
511r_byte(RFILE *p)
512{
513    int c = EOF;
514    unsigned char ch;
515    int n;
516
517    if (!p->readable)
518        c = p->fp ? getc(p->fp) : rs_byte(p);
519    else {
520        n = r_string((char *) &ch, 1, p);
521        if (n > 0)
522            c = ch;
523    }
524    return c;
525}
526
527static int
528r_short(RFILE *p)
529{
530    register short x;
531    unsigned char buffer[2];
532
533    r_string((char *) buffer, 2, p);
534    x = buffer[0];
535    x |= buffer[1] << 8;
536    /* Sign-extension, in case short greater than 16 bits */
537    x |= -(x & 0x8000);
538    return x;
539}
540
541static long
542r_long(RFILE *p)
543{
544    register long x;
545    unsigned char buffer[4];
546
547    r_string((char *) buffer, 4, p);
548    x = buffer[0];
549    x |= (long)buffer[1] << 8;
550    x |= (long)buffer[2] << 16;
551    x |= (long)buffer[3] << 24;
552#if SIZEOF_LONG > 4
553    /* Sign extension for 64-bit machines */
554    x |= -(x & 0x80000000L);
555#endif
556    return x;
557}
558
559/* r_long64 deals with the TYPE_INT64 code.  On a machine with
560   sizeof(long) > 4, it returns a Python int object, else a Python long
561   object.  Note that w_long64 writes out TYPE_INT if 32 bits is enough,
562   so there's no inefficiency here in returning a PyLong on 32-bit boxes
563   for everything written via TYPE_INT64 (i.e., if an int is written via
564   TYPE_INT64, it *needs* more than 32 bits).
565*/
566static PyObject *
567r_long64(RFILE *p)
568{
569    PyObject *result = NULL;
570    long lo4 = r_long(p);
571    long hi4 = r_long(p);
572
573    if (!PyErr_Occurred()) {
574#if SIZEOF_LONG > 4
575        long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
576        result = PyLong_FromLong(x);
577#else
578        unsigned char buf[8];
579        int one = 1;
580        int is_little_endian = (int)*(char*)&one;
581        if (is_little_endian) {
582            memcpy(buf, &lo4, 4);
583            memcpy(buf+4, &hi4, 4);
584        }
585        else {
586            memcpy(buf, &hi4, 4);
587            memcpy(buf+4, &lo4, 4);
588        }
589        result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
590#endif
591    }
592    return result;
593}
594
595static PyObject *
596r_PyLong(RFILE *p)
597{
598    PyLongObject *ob;
599    int size, i, j, md, shorts_in_top_digit;
600    long n;
601    digit d;
602
603    n = r_long(p);
604    if (PyErr_Occurred())
605        return NULL;
606    if (n == 0)
607        return (PyObject *)_PyLong_New(0);
608    if (n < -INT_MAX || n > INT_MAX) {
609        PyErr_SetString(PyExc_ValueError,
610                       "bad marshal data (long size out of range)");
611        return NULL;
612    }
613
614    size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
615    shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
616    ob = _PyLong_New(size);
617    if (ob == NULL)
618        return NULL;
619    Py_SIZE(ob) = n > 0 ? size : -size;
620
621    for (i = 0; i < size-1; i++) {
622        d = 0;
623        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
624            md = r_short(p);
625            if (PyErr_Occurred())
626                break;
627            if (md < 0 || md > PyLong_MARSHAL_BASE)
628                goto bad_digit;
629            d += (digit)md << j*PyLong_MARSHAL_SHIFT;
630        }
631        ob->ob_digit[i] = d;
632    }
633    d = 0;
634    for (j=0; j < shorts_in_top_digit; j++) {
635        md = r_short(p);
636        if (PyErr_Occurred())
637            break;
638        if (md < 0 || md > PyLong_MARSHAL_BASE)
639            goto bad_digit;
640        /* topmost marshal digit should be nonzero */
641        if (md == 0 && j == shorts_in_top_digit - 1) {
642            Py_DECREF(ob);
643            PyErr_SetString(PyExc_ValueError,
644                "bad marshal data (unnormalized long data)");
645            return NULL;
646        }
647        d += (digit)md << j*PyLong_MARSHAL_SHIFT;
648    }
649    if (PyErr_Occurred()) {
650        Py_DECREF(ob);
651        return NULL;
652    }
653    /* top digit should be nonzero, else the resulting PyLong won't be
654       normalized */
655    ob->ob_digit[size-1] = d;
656    return (PyObject *)ob;
657  bad_digit:
658    Py_DECREF(ob);
659    PyErr_SetString(PyExc_ValueError,
660                    "bad marshal data (digit out of range in long)");
661    return NULL;
662}
663
664
665static PyObject *
666r_object(RFILE *p)
667{
668    /* NULL is a valid return value, it does not necessarily means that
669       an exception is set. */
670    PyObject *v, *v2;
671    long i, n;
672    int type = r_byte(p);
673    PyObject *retval;
674
675    p->depth++;
676
677    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
678        p->depth--;
679        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
680        return NULL;
681    }
682
683    switch (type) {
684
685    case EOF:
686        PyErr_SetString(PyExc_EOFError,
687                        "EOF read where object expected");
688        retval = NULL;
689        break;
690
691    case TYPE_NULL:
692        retval = NULL;
693        break;
694
695    case TYPE_NONE:
696        Py_INCREF(Py_None);
697        retval = Py_None;
698        break;
699
700    case TYPE_STOPITER:
701        Py_INCREF(PyExc_StopIteration);
702        retval = PyExc_StopIteration;
703        break;
704
705    case TYPE_ELLIPSIS:
706        Py_INCREF(Py_Ellipsis);
707        retval = Py_Ellipsis;
708        break;
709
710    case TYPE_FALSE:
711        Py_INCREF(Py_False);
712        retval = Py_False;
713        break;
714
715    case TYPE_TRUE:
716        Py_INCREF(Py_True);
717        retval = Py_True;
718        break;
719
720    case TYPE_INT:
721        n = r_long(p);
722        retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
723        break;
724
725    case TYPE_INT64:
726        retval = r_long64(p);
727        break;
728
729    case TYPE_LONG:
730        retval = r_PyLong(p);
731        break;
732
733    case TYPE_FLOAT:
734        {
735            char buf[256];
736            double dx;
737            retval = NULL;
738            n = r_byte(p);
739            if (n == EOF || r_string(buf, (int)n, p) != n) {
740                PyErr_SetString(PyExc_EOFError,
741                    "EOF read where object expected");
742                break;
743            }
744            buf[n] = '\0';
745            dx = PyOS_string_to_double(buf, NULL, NULL);
746            if (dx == -1.0 && PyErr_Occurred())
747                break;
748            retval = PyFloat_FromDouble(dx);
749            break;
750        }
751
752    case TYPE_BINARY_FLOAT:
753        {
754            unsigned char buf[8];
755            double x;
756            if (r_string((char*)buf, 8, p) != 8) {
757                PyErr_SetString(PyExc_EOFError,
758                    "EOF read where object expected");
759                retval = NULL;
760                break;
761            }
762            x = _PyFloat_Unpack8(buf, 1);
763            if (x == -1.0 && PyErr_Occurred()) {
764                retval = NULL;
765                break;
766            }
767            retval = PyFloat_FromDouble(x);
768            break;
769        }
770
771    case TYPE_COMPLEX:
772        {
773            char buf[256];
774            Py_complex c;
775            retval = NULL;
776            n = r_byte(p);
777            if (n == EOF || r_string(buf, (int)n, p) != n) {
778                PyErr_SetString(PyExc_EOFError,
779                    "EOF read where object expected");
780                break;
781            }
782            buf[n] = '\0';
783            c.real = PyOS_string_to_double(buf, NULL, NULL);
784            if (c.real == -1.0 && PyErr_Occurred())
785                break;
786            n = r_byte(p);
787            if (n == EOF || r_string(buf, (int)n, p) != n) {
788                PyErr_SetString(PyExc_EOFError,
789                    "EOF read where object expected");
790                break;
791            }
792            buf[n] = '\0';
793            c.imag = PyOS_string_to_double(buf, NULL, NULL);
794            if (c.imag == -1.0 && PyErr_Occurred())
795                break;
796            retval = PyComplex_FromCComplex(c);
797            break;
798        }
799
800    case TYPE_BINARY_COMPLEX:
801        {
802            unsigned char buf[8];
803            Py_complex c;
804            if (r_string((char*)buf, 8, p) != 8) {
805                PyErr_SetString(PyExc_EOFError,
806                    "EOF read where object expected");
807                retval = NULL;
808                break;
809            }
810            c.real = _PyFloat_Unpack8(buf, 1);
811            if (c.real == -1.0 && PyErr_Occurred()) {
812                retval = NULL;
813                break;
814            }
815            if (r_string((char*)buf, 8, p) != 8) {
816                PyErr_SetString(PyExc_EOFError,
817                    "EOF read where object expected");
818                retval = NULL;
819                break;
820            }
821            c.imag = _PyFloat_Unpack8(buf, 1);
822            if (c.imag == -1.0 && PyErr_Occurred()) {
823                retval = NULL;
824                break;
825            }
826            retval = PyComplex_FromCComplex(c);
827            break;
828        }
829
830    case TYPE_STRING:
831        n = r_long(p);
832        if (PyErr_Occurred()) {
833            retval = NULL;
834            break;
835        }
836        if (n < 0 || n > INT_MAX) {
837            PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
838            retval = NULL;
839            break;
840        }
841        v = PyBytes_FromStringAndSize((char *)NULL, n);
842        if (v == NULL) {
843            retval = NULL;
844            break;
845        }
846        if (r_string(PyBytes_AS_STRING(v), (int)n, p) != n) {
847            Py_DECREF(v);
848            PyErr_SetString(PyExc_EOFError,
849                            "EOF read where object expected");
850            retval = NULL;
851            break;
852        }
853        retval = v;
854        break;
855
856    case TYPE_UNICODE:
857        {
858        char *buffer;
859
860        n = r_long(p);
861        if (PyErr_Occurred()) {
862            retval = NULL;
863            break;
864        }
865        if (n < 0 || n > INT_MAX) {
866            PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
867            retval = NULL;
868            break;
869        }
870        buffer = PyMem_NEW(char, n);
871        if (buffer == NULL) {
872            retval = PyErr_NoMemory();
873            break;
874        }
875        if (r_string(buffer, (int)n, p) != n) {
876            PyMem_DEL(buffer);
877            PyErr_SetString(PyExc_EOFError,
878                "EOF read where object expected");
879            retval = NULL;
880            break;
881        }
882        v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
883        PyMem_DEL(buffer);
884        retval = v;
885        break;
886        }
887
888    case TYPE_TUPLE:
889        n = r_long(p);
890        if (PyErr_Occurred()) {
891            retval = NULL;
892            break;
893        }
894        if (n < 0 || n > INT_MAX) {
895            PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
896            retval = NULL;
897            break;
898        }
899        v = PyTuple_New((int)n);
900        if (v == NULL) {
901            retval = NULL;
902            break;
903        }
904        for (i = 0; i < n; i++) {
905            v2 = r_object(p);
906            if ( v2 == NULL ) {
907                if (!PyErr_Occurred())
908                    PyErr_SetString(PyExc_TypeError,
909                        "NULL object in marshal data for tuple");
910                Py_DECREF(v);
911                v = NULL;
912                break;
913            }
914            PyTuple_SET_ITEM(v, (int)i, v2);
915        }
916        retval = v;
917        break;
918
919    case TYPE_LIST:
920        n = r_long(p);
921        if (PyErr_Occurred()) {
922            retval = NULL;
923            break;
924        }
925        if (n < 0 || n > INT_MAX) {
926            PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
927            retval = NULL;
928            break;
929        }
930        v = PyList_New((int)n);
931        if (v == NULL) {
932            retval = NULL;
933            break;
934        }
935        for (i = 0; i < n; i++) {
936            v2 = r_object(p);
937            if ( v2 == NULL ) {
938                if (!PyErr_Occurred())
939                    PyErr_SetString(PyExc_TypeError,
940                        "NULL object in marshal data for list");
941                Py_DECREF(v);
942                v = NULL;
943                break;
944            }
945            PyList_SET_ITEM(v, (int)i, v2);
946        }
947        retval = v;
948        break;
949
950    case TYPE_DICT:
951        v = PyDict_New();
952        if (v == NULL) {
953            retval = NULL;
954            break;
955        }
956        for (;;) {
957            PyObject *key, *val;
958            key = r_object(p);
959            if (key == NULL)
960                break;
961            val = r_object(p);
962            if (val != NULL)
963                PyDict_SetItem(v, key, val);
964            Py_DECREF(key);
965            Py_XDECREF(val);
966        }
967        if (PyErr_Occurred()) {
968            Py_DECREF(v);
969            v = NULL;
970        }
971        retval = v;
972        break;
973
974    case TYPE_SET:
975    case TYPE_FROZENSET:
976        n = r_long(p);
977        if (PyErr_Occurred()) {
978            retval = NULL;
979            break;
980        }
981        if (n < 0 || n > INT_MAX) {
982            PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
983            retval = NULL;
984            break;
985        }
986        v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
987        if (v == NULL) {
988            retval = NULL;
989            break;
990        }
991        for (i = 0; i < n; i++) {
992            v2 = r_object(p);
993            if ( v2 == NULL ) {
994                if (!PyErr_Occurred())
995                    PyErr_SetString(PyExc_TypeError,
996                        "NULL object in marshal data for set");
997                Py_DECREF(v);
998                v = NULL;
999                break;
1000            }
1001            if (PySet_Add(v, v2) == -1) {
1002                Py_DECREF(v);
1003                Py_DECREF(v2);
1004                v = NULL;
1005                break;
1006            }
1007            Py_DECREF(v2);
1008        }
1009        retval = v;
1010        break;
1011
1012    case TYPE_CODE:
1013        {
1014            int argcount;
1015            int kwonlyargcount;
1016            int nlocals;
1017            int stacksize;
1018            int flags;
1019            PyObject *code = NULL;
1020            PyObject *consts = NULL;
1021            PyObject *names = NULL;
1022            PyObject *varnames = NULL;
1023            PyObject *freevars = NULL;
1024            PyObject *cellvars = NULL;
1025            PyObject *filename = NULL;
1026            PyObject *name = NULL;
1027            int firstlineno;
1028            PyObject *lnotab = NULL;
1029
1030            v = NULL;
1031
1032            /* XXX ignore long->int overflows for now */
1033            argcount = (int)r_long(p);
1034            if (PyErr_Occurred())
1035                goto code_error;
1036            kwonlyargcount = (int)r_long(p);
1037            if (PyErr_Occurred())
1038                goto code_error;
1039            nlocals = (int)r_long(p);
1040            if (PyErr_Occurred())
1041                goto code_error;
1042            stacksize = (int)r_long(p);
1043            if (PyErr_Occurred())
1044                goto code_error;
1045            flags = (int)r_long(p);
1046            if (PyErr_Occurred())
1047                goto code_error;
1048            code = r_object(p);
1049            if (code == NULL)
1050                goto code_error;
1051            consts = r_object(p);
1052            if (consts == NULL)
1053                goto code_error;
1054            names = r_object(p);
1055            if (names == NULL)
1056                goto code_error;
1057            varnames = r_object(p);
1058            if (varnames == NULL)
1059                goto code_error;
1060            freevars = r_object(p);
1061            if (freevars == NULL)
1062                goto code_error;
1063            cellvars = r_object(p);
1064            if (cellvars == NULL)
1065                goto code_error;
1066            filename = r_object(p);
1067            if (filename == NULL)
1068                goto code_error;
1069            if (PyUnicode_CheckExact(filename)) {
1070                if (p->current_filename != NULL) {
1071                    if (!PyUnicode_Compare(filename, p->current_filename)) {
1072                        Py_DECREF(filename);
1073                        Py_INCREF(p->current_filename);
1074                        filename = p->current_filename;
1075                    }
1076                }
1077                else {
1078                    p->current_filename = filename;
1079                }
1080            }
1081            name = r_object(p);
1082            if (name == NULL)
1083                goto code_error;
1084            firstlineno = (int)r_long(p);
1085            lnotab = r_object(p);
1086            if (lnotab == NULL)
1087                goto code_error;
1088
1089            v = (PyObject *) PyCode_New(
1090                            argcount, kwonlyargcount,
1091                            nlocals, stacksize, flags,
1092                            code, consts, names, varnames,
1093                            freevars, cellvars, filename, name,
1094                            firstlineno, lnotab);
1095
1096          code_error:
1097            Py_XDECREF(code);
1098            Py_XDECREF(consts);
1099            Py_XDECREF(names);
1100            Py_XDECREF(varnames);
1101            Py_XDECREF(freevars);
1102            Py_XDECREF(cellvars);
1103            Py_XDECREF(filename);
1104            Py_XDECREF(name);
1105            Py_XDECREF(lnotab);
1106        }
1107        retval = v;
1108        break;
1109
1110    default:
1111        /* Bogus data got written, which isn't ideal.
1112           This will let you keep working and recover. */
1113        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1114        retval = NULL;
1115        break;
1116
1117    }
1118    p->depth--;
1119    return retval;
1120}
1121
1122static PyObject *
1123read_object(RFILE *p)
1124{
1125    PyObject *v;
1126    if (PyErr_Occurred()) {
1127        fprintf(stderr, "XXX readobject called with exception set\n");
1128        return NULL;
1129    }
1130    v = r_object(p);
1131    if (v == NULL && !PyErr_Occurred())
1132        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1133    return v;
1134}
1135
1136int
1137PyMarshal_ReadShortFromFile(FILE *fp)
1138{
1139    RFILE rf;
1140    assert(fp);
1141    rf.readable = NULL;
1142    rf.fp = fp;
1143    rf.current_filename = NULL;
1144    rf.end = rf.ptr = NULL;
1145    return r_short(&rf);
1146}
1147
1148long
1149PyMarshal_ReadLongFromFile(FILE *fp)
1150{
1151    RFILE rf;
1152    rf.fp = fp;
1153    rf.readable = NULL;
1154    rf.current_filename = NULL;
1155    rf.ptr = rf.end = NULL;
1156    return r_long(&rf);
1157}
1158
1159#ifdef HAVE_FSTAT
1160/* Return size of file in bytes; < 0 if unknown. */
1161static off_t
1162getfilesize(FILE *fp)
1163{
1164    struct stat st;
1165    if (fstat(fileno(fp), &st) != 0)
1166        return -1;
1167    else
1168        return st.st_size;
1169}
1170#endif
1171
1172/* If we can get the size of the file up-front, and it's reasonably small,
1173 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1174 * than reading a byte at a time from file; speeds .pyc imports.
1175 * CAUTION:  since this may read the entire remainder of the file, don't
1176 * call it unless you know you're done with the file.
1177 */
1178PyObject *
1179PyMarshal_ReadLastObjectFromFile(FILE *fp)
1180{
1181/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1182#define REASONABLE_FILE_LIMIT (1L << 18)
1183#ifdef HAVE_FSTAT
1184    off_t filesize;
1185    filesize = getfilesize(fp);
1186    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1187        char* pBuf = (char *)PyMem_MALLOC(filesize);
1188        if (pBuf != NULL) {
1189            PyObject* v;
1190            size_t n;
1191            /* filesize must fit into an int, because it
1192               is smaller than REASONABLE_FILE_LIMIT */
1193            n = fread(pBuf, 1, (int)filesize, fp);
1194            v = PyMarshal_ReadObjectFromString(pBuf, n);
1195            PyMem_FREE(pBuf);
1196            return v;
1197        }
1198
1199    }
1200#endif
1201    /* We don't have fstat, or we do but the file is larger than
1202     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1203     */
1204    return PyMarshal_ReadObjectFromFile(fp);
1205
1206#undef REASONABLE_FILE_LIMIT
1207}
1208
1209PyObject *
1210PyMarshal_ReadObjectFromFile(FILE *fp)
1211{
1212    RFILE rf;
1213    PyObject *result;
1214    rf.fp = fp;
1215    rf.readable = NULL;
1216    rf.current_filename = NULL;
1217    rf.depth = 0;
1218    rf.ptr = rf.end = NULL;
1219    result = r_object(&rf);
1220    return result;
1221}
1222
1223PyObject *
1224PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1225{
1226    RFILE rf;
1227    PyObject *result;
1228    rf.fp = NULL;
1229    rf.readable = NULL;
1230    rf.current_filename = NULL;
1231    rf.ptr = str;
1232    rf.end = str + len;
1233    rf.depth = 0;
1234    result = r_object(&rf);
1235    return result;
1236}
1237
1238PyObject *
1239PyMarshal_WriteObjectToString(PyObject *x, int version)
1240{
1241    WFILE wf;
1242    PyObject *res = NULL;
1243
1244    wf.fp = NULL;
1245    wf.readable = NULL;
1246    wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1247    if (wf.str == NULL)
1248        return NULL;
1249    wf.ptr = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1250    wf.end = wf.ptr + PyBytes_Size(wf.str);
1251    wf.error = WFERR_OK;
1252    wf.depth = 0;
1253    wf.version = version;
1254    w_object(x, &wf);
1255    if (wf.str != NULL) {
1256        char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1257        if (wf.ptr - base > PY_SSIZE_T_MAX) {
1258            Py_DECREF(wf.str);
1259            PyErr_SetString(PyExc_OverflowError,
1260                            "too much marshal data for a string");
1261            return NULL;
1262        }
1263        if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1264            return NULL;
1265    }
1266    if (wf.error != WFERR_OK) {
1267        Py_XDECREF(wf.str);
1268        if (wf.error == WFERR_NOMEMORY)
1269            PyErr_NoMemory();
1270        else
1271            PyErr_SetString(PyExc_ValueError,
1272              (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1273               :"object too deeply nested to marshal");
1274        return NULL;
1275    }
1276    if (wf.str != NULL) {
1277        /* XXX Quick hack -- need to do this differently */
1278        res = PyBytes_FromObject(wf.str);
1279        Py_DECREF(wf.str);
1280    }
1281    return res;
1282}
1283
1284/* And an interface for Python programs... */
1285
1286static PyObject *
1287marshal_dump(PyObject *self, PyObject *args)
1288{
1289    /* XXX Quick hack -- need to do this differently */
1290    PyObject *x;
1291    PyObject *f;
1292    int version = Py_MARSHAL_VERSION;
1293    PyObject *s;
1294    PyObject *res;
1295    if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1296        return NULL;
1297    s = PyMarshal_WriteObjectToString(x, version);
1298    if (s == NULL)
1299        return NULL;
1300    res = PyObject_CallMethod(f, "write", "O", s);
1301    Py_DECREF(s);
1302    return res;
1303}
1304
1305PyDoc_STRVAR(dump_doc,
1306"dump(value, file[, version])\n\
1307\n\
1308Write the value on the open file. The value must be a supported type.\n\
1309The file must be an open file object such as sys.stdout or returned by\n\
1310open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1311\n\
1312If the value has (or contains an object that has) an unsupported type, a\n\
1313ValueError exception is raised — but garbage data will also be written\n\
1314to the file. The object will not be properly read back by load()\n\
1315\n\
1316The version argument indicates the data format that dump should use.");
1317
1318static PyObject *
1319marshal_load(PyObject *self, PyObject *f)
1320{
1321    PyObject *data, *result;
1322    RFILE rf;
1323
1324    /*
1325     * Make a call to the read method, but read zero bytes.
1326     * This is to ensure that the object passed in at least
1327     * has a read method which returns bytes.
1328     */
1329    data = PyObject_CallMethod(f, "read", "i", 0);
1330    if (data == NULL)
1331        return NULL;
1332    if (!PyBytes_Check(data)) {
1333        PyErr_Format(PyExc_TypeError,
1334                     "f.read() returned not bytes but %.100s",
1335                     data->ob_type->tp_name);
1336        result = NULL;
1337    }
1338    else {
1339        rf.depth = 0;
1340        rf.fp = NULL;
1341        rf.readable = f;
1342        rf.current_filename = NULL;
1343        result = read_object(&rf);
1344    }
1345    Py_DECREF(data);
1346    return result;
1347}
1348
1349PyDoc_STRVAR(load_doc,
1350"load(file)\n\
1351\n\
1352Read one value from the open file and return it. If no valid value is\n\
1353read (e.g. because the data has a different Python version’s\n\
1354incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1355The file must be an open file object opened in binary mode ('rb' or\n\
1356'r+b').\n\
1357\n\
1358Note: If an object containing an unsupported type was marshalled with\n\
1359dump(), load() will substitute None for the unmarshallable type.");
1360
1361
1362static PyObject *
1363marshal_dumps(PyObject *self, PyObject *args)
1364{
1365    PyObject *x;
1366    int version = Py_MARSHAL_VERSION;
1367    if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1368        return NULL;
1369    return PyMarshal_WriteObjectToString(x, version);
1370}
1371
1372PyDoc_STRVAR(dumps_doc,
1373"dumps(value[, version])\n\
1374\n\
1375Return the string that would be written to a file by dump(value, file).\n\
1376The value must be a supported type. Raise a ValueError exception if\n\
1377value has (or contains an object that has) an unsupported type.\n\
1378\n\
1379The version argument indicates the data format that dumps should use.");
1380
1381
1382static PyObject *
1383marshal_loads(PyObject *self, PyObject *args)
1384{
1385    RFILE rf;
1386    Py_buffer p;
1387    char *s;
1388    Py_ssize_t n;
1389    PyObject* result;
1390    if (!PyArg_ParseTuple(args, "s*:loads", &p))
1391        return NULL;
1392    s = p.buf;
1393    n = p.len;
1394    rf.fp = NULL;
1395    rf.readable = NULL;
1396    rf.current_filename = NULL;
1397    rf.ptr = s;
1398    rf.end = s + n;
1399    rf.depth = 0;
1400    result = read_object(&rf);
1401    PyBuffer_Release(&p);
1402    return result;
1403}
1404
1405PyDoc_STRVAR(loads_doc,
1406"loads(string)\n\
1407\n\
1408Convert the string to a value. If no valid value is found, raise\n\
1409EOFError, ValueError or TypeError. Extra characters in the string are\n\
1410ignored.");
1411
1412static PyMethodDef marshal_methods[] = {
1413    {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
1414    {"load",            marshal_load,   METH_O,         load_doc},
1415    {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
1416    {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
1417    {NULL,              NULL}           /* sentinel */
1418};
1419
1420
1421PyDoc_STRVAR(module_doc,
1422"This module contains functions that can read and write Python values in\n\
1423a binary format. The format is specific to Python, but independent of\n\
1424machine architecture issues.\n\
1425\n\
1426Not all Python object types are supported; in general, only objects\n\
1427whose value is independent from a particular invocation of Python can be\n\
1428written and read by this module. The following types are supported:\n\
1429None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1430tuples, lists, sets, dictionaries, and code objects, where it\n\
1431should be understood that tuples, lists and dictionaries are only\n\
1432supported as long as the values contained therein are themselves\n\
1433supported; and recursive lists and dictionaries should not be written\n\
1434(they will cause infinite loops).\n\
1435\n\
1436Variables:\n\
1437\n\
1438version -- indicates the format that the module uses. Version 0 is the\n\
1439    historical format, version 1 shares interned strings and version 2\n\
1440    uses a binary format for floating point numbers.\n\
1441\n\
1442Functions:\n\
1443\n\
1444dump() -- write value to a file\n\
1445load() -- read value from a file\n\
1446dumps() -- write value to a string\n\
1447loads() -- read value from a string");
1448
1449
1450
1451static struct PyModuleDef marshalmodule = {
1452    PyModuleDef_HEAD_INIT,
1453    "marshal",
1454    module_doc,
1455    0,
1456    marshal_methods,
1457    NULL,
1458    NULL,
1459    NULL,
1460    NULL
1461};
1462
1463PyMODINIT_FUNC
1464PyMarshal_Init(void)
1465{
1466    PyObject *mod = PyModule_Create(&marshalmodule);
1467    if (mod == NULL)
1468        return NULL;
1469    PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1470    return mod;
1471}
1472