1
2/* Write Python objects to files and read them back.
3   This is intended for writing and reading compiled Python code only;
4   a true persistent storage facility would be much harder, since
5   it would have to take circular links and sharing into account. */
6
7#define PY_SSIZE_T_CLEAN
8
9#include "Python.h"
10#include "longintrepr.h"
11#include "code.h"
12#include "marshal.h"
13
14#define ABS(x) ((x) < 0 ? -(x) : (x))
15
16/* High water mark to determine when the marshalled object is dangerously deep
17 * and risks coring the interpreter.  When the object stack gets this deep,
18 * raise an exception instead of continuing.
19 * On Windows debug builds, reduce this value.
20 */
21#if defined(MS_WINDOWS) && defined(_DEBUG)
22#define MAX_MARSHAL_STACK_DEPTH 1000
23#else
24#define MAX_MARSHAL_STACK_DEPTH 2000
25#endif
26
27#define TYPE_NULL               '0'
28#define TYPE_NONE               'N'
29#define TYPE_FALSE              'F'
30#define TYPE_TRUE               'T'
31#define TYPE_STOPITER           'S'
32#define TYPE_ELLIPSIS           '.'
33#define TYPE_INT                'i'
34#define TYPE_INT64              'I'
35#define TYPE_FLOAT              'f'
36#define TYPE_BINARY_FLOAT       'g'
37#define TYPE_COMPLEX            'x'
38#define TYPE_BINARY_COMPLEX     'y'
39#define TYPE_LONG               'l'
40#define TYPE_STRING             's'
41#define TYPE_INTERNED           't'
42#define TYPE_STRINGREF          'R'
43#define TYPE_TUPLE              '('
44#define TYPE_LIST               '['
45#define TYPE_DICT               '{'
46#define TYPE_CODE               'c'
47#define TYPE_UNICODE            'u'
48#define TYPE_UNKNOWN            '?'
49#define TYPE_SET                '<'
50#define TYPE_FROZENSET          '>'
51
52#define WFERR_OK 0
53#define WFERR_UNMARSHALLABLE 1
54#define WFERR_NESTEDTOODEEP 2
55#define WFERR_NOMEMORY 3
56
57typedef struct {
58    FILE *fp;
59    int error;  /* see WFERR_* values */
60    int depth;
61    /* If fp == NULL, the following are valid: */
62    PyObject *str;
63    char *ptr;
64    char *end;
65    PyObject *strings; /* dict on marshal, list on unmarshal */
66    int version;
67} WFILE;
68
69#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
70                      else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
71                           else w_more(c, p)
72
73static void
74w_more(int c, WFILE *p)
75{
76    Py_ssize_t size, newsize;
77    if (p->str == NULL)
78        return; /* An error already occurred */
79    size = PyString_Size(p->str);
80    newsize = size + size + 1024;
81    if (newsize > 32*1024*1024) {
82        newsize = size + (size >> 3);           /* 12.5% overallocation */
83    }
84    if (_PyString_Resize(&p->str, newsize) != 0) {
85        p->ptr = p->end = NULL;
86    }
87    else {
88        p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
89        p->end =
90            PyString_AS_STRING((PyStringObject *)p->str) + newsize;
91        *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
92    }
93}
94
95static void
96w_string(const char *s, Py_ssize_t n, WFILE *p)
97{
98    if (p->fp != NULL) {
99        fwrite(s, 1, n, p->fp);
100    }
101    else {
102        while (--n >= 0) {
103            w_byte(*s, p);
104            s++;
105        }
106    }
107}
108
109static void
110w_short(int x, WFILE *p)
111{
112    w_byte((char)( x      & 0xff), p);
113    w_byte((char)((x>> 8) & 0xff), p);
114}
115
116static void
117w_long(long x, WFILE *p)
118{
119    w_byte((char)( x      & 0xff), p);
120    w_byte((char)((x>> 8) & 0xff), p);
121    w_byte((char)((x>>16) & 0xff), p);
122    w_byte((char)((x>>24) & 0xff), p);
123}
124
125#if SIZEOF_LONG > 4
126static void
127w_long64(long x, WFILE *p)
128{
129    w_long(x, p);
130    w_long(x>>32, p);
131}
132#endif
133
134#define SIZE32_MAX  0x7FFFFFFF
135
136#if SIZEOF_SIZE_T > 4
137# define W_SIZE(n, p)  do {                     \
138        if ((n) > SIZE32_MAX) {                 \
139            (p)->depth--;                       \
140            (p)->error = WFERR_UNMARSHALLABLE;  \
141            return;                             \
142        }                                       \
143        w_long((long)(n), p);                   \
144    } while(0)
145#else
146# define W_SIZE  w_long
147#endif
148
149static void
150w_pstring(const char *s, Py_ssize_t n, WFILE *p)
151{
152        W_SIZE(n, p);
153        w_string(s, n, p);
154}
155
156/* We assume that Python longs are stored internally in base some power of
157   2**15; for the sake of portability we'll always read and write them in base
158   exactly 2**15. */
159
160#define PyLong_MARSHAL_SHIFT 15
161#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
162#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
163#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
164#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
165#endif
166#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
167
168static void
169w_PyLong(const PyLongObject *ob, WFILE *p)
170{
171    Py_ssize_t i, j, n, l;
172    digit d;
173
174    w_byte(TYPE_LONG, p);
175    if (Py_SIZE(ob) == 0) {
176        w_long((long)0, p);
177        return;
178    }
179
180    /* set l to number of base PyLong_MARSHAL_BASE digits */
181    n = ABS(Py_SIZE(ob));
182    l = (n-1) * PyLong_MARSHAL_RATIO;
183    d = ob->ob_digit[n-1];
184    assert(d != 0); /* a PyLong is always normalized */
185    do {
186        d >>= PyLong_MARSHAL_SHIFT;
187        l++;
188    } while (d != 0);
189    if (l > SIZE32_MAX) {
190        p->depth--;
191        p->error = WFERR_UNMARSHALLABLE;
192        return;
193    }
194    w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
195
196    for (i=0; i < n-1; i++) {
197        d = ob->ob_digit[i];
198        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
199            w_short(d & PyLong_MARSHAL_MASK, p);
200            d >>= PyLong_MARSHAL_SHIFT;
201        }
202        assert (d == 0);
203    }
204    d = ob->ob_digit[n-1];
205    do {
206        w_short(d & PyLong_MARSHAL_MASK, p);
207        d >>= PyLong_MARSHAL_SHIFT;
208    } while (d != 0);
209}
210
211static void
212w_object(PyObject *v, WFILE *p)
213{
214    Py_ssize_t i, n;
215
216    p->depth++;
217
218    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
219        p->error = WFERR_NESTEDTOODEEP;
220    }
221    else if (v == NULL) {
222        w_byte(TYPE_NULL, p);
223    }
224    else if (v == Py_None) {
225        w_byte(TYPE_NONE, p);
226    }
227    else if (v == PyExc_StopIteration) {
228        w_byte(TYPE_STOPITER, p);
229    }
230    else if (v == Py_Ellipsis) {
231        w_byte(TYPE_ELLIPSIS, p);
232    }
233    else if (v == Py_False) {
234        w_byte(TYPE_FALSE, p);
235    }
236    else if (v == Py_True) {
237        w_byte(TYPE_TRUE, p);
238    }
239    else if (PyInt_CheckExact(v)) {
240        long x = PyInt_AS_LONG((PyIntObject *)v);
241#if SIZEOF_LONG > 4
242        long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
243        if (y && y != -1) {
244            w_byte(TYPE_INT64, p);
245            w_long64(x, p);
246        }
247        else
248#endif
249            {
250            w_byte(TYPE_INT, p);
251            w_long(x, p);
252        }
253    }
254    else if (PyLong_CheckExact(v)) {
255        PyLongObject *ob = (PyLongObject *)v;
256        w_PyLong(ob, p);
257    }
258    else if (PyFloat_CheckExact(v)) {
259        if (p->version > 1) {
260            unsigned char buf[8];
261            if (_PyFloat_Pack8(PyFloat_AsDouble(v),
262                               buf, 1) < 0) {
263                p->error = WFERR_UNMARSHALLABLE;
264                return;
265            }
266            w_byte(TYPE_BINARY_FLOAT, p);
267            w_string((char*)buf, 8, p);
268        }
269        else {
270            char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
271                                              'g', 17, 0, NULL);
272            if (!buf) {
273                p->error = WFERR_NOMEMORY;
274                return;
275            }
276            n = strlen(buf);
277            w_byte(TYPE_FLOAT, p);
278            w_byte((int)n, p);
279            w_string(buf, n, p);
280            PyMem_Free(buf);
281        }
282    }
283#ifndef WITHOUT_COMPLEX
284    else if (PyComplex_CheckExact(v)) {
285        if (p->version > 1) {
286            unsigned char buf[8];
287            if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
288                               buf, 1) < 0) {
289                p->error = WFERR_UNMARSHALLABLE;
290                return;
291            }
292            w_byte(TYPE_BINARY_COMPLEX, p);
293            w_string((char*)buf, 8, p);
294            if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
295                               buf, 1) < 0) {
296                p->error = WFERR_UNMARSHALLABLE;
297                return;
298            }
299            w_string((char*)buf, 8, p);
300        }
301        else {
302            char *buf;
303            w_byte(TYPE_COMPLEX, p);
304            buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
305                                        'g', 17, 0, NULL);
306            if (!buf) {
307                p->error = WFERR_NOMEMORY;
308                return;
309            }
310            n = strlen(buf);
311            w_byte((int)n, p);
312            w_string(buf, n, p);
313            PyMem_Free(buf);
314            buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
315                                        'g', 17, 0, NULL);
316            if (!buf) {
317                p->error = WFERR_NOMEMORY;
318                return;
319            }
320            n = strlen(buf);
321            w_byte((int)n, p);
322            w_string(buf, n, p);
323            PyMem_Free(buf);
324        }
325    }
326#endif
327    else if (PyString_CheckExact(v)) {
328        if (p->strings && PyString_CHECK_INTERNED(v)) {
329            PyObject *o = PyDict_GetItem(p->strings, v);
330            if (o) {
331                long w = PyInt_AsLong(o);
332                w_byte(TYPE_STRINGREF, p);
333                w_long(w, p);
334                goto exit;
335            }
336            else {
337                int ok;
338                o = PyInt_FromSsize_t(PyDict_Size(p->strings));
339                ok = o &&
340                     PyDict_SetItem(p->strings, v, o) >= 0;
341                Py_XDECREF(o);
342                if (!ok) {
343                    p->depth--;
344                    p->error = WFERR_UNMARSHALLABLE;
345                    return;
346                }
347                w_byte(TYPE_INTERNED, p);
348            }
349        }
350        else {
351            w_byte(TYPE_STRING, p);
352        }
353        w_pstring(PyBytes_AS_STRING(v), PyString_GET_SIZE(v), p);
354    }
355#ifdef Py_USING_UNICODE
356    else if (PyUnicode_CheckExact(v)) {
357        PyObject *utf8;
358        utf8 = PyUnicode_AsUTF8String(v);
359        if (utf8 == NULL) {
360            p->depth--;
361            p->error = WFERR_UNMARSHALLABLE;
362            return;
363        }
364        w_byte(TYPE_UNICODE, p);
365        w_pstring(PyString_AS_STRING(utf8), PyString_GET_SIZE(utf8), p);
366        Py_DECREF(utf8);
367    }
368#endif
369    else if (PyTuple_CheckExact(v)) {
370        w_byte(TYPE_TUPLE, p);
371        n = PyTuple_Size(v);
372        W_SIZE(n, p);
373        for (i = 0; i < n; i++) {
374            w_object(PyTuple_GET_ITEM(v, i), p);
375        }
376    }
377    else if (PyList_CheckExact(v)) {
378        w_byte(TYPE_LIST, p);
379        n = PyList_GET_SIZE(v);
380        W_SIZE(n, p);
381        for (i = 0; i < n; i++) {
382            w_object(PyList_GET_ITEM(v, i), p);
383        }
384    }
385    else if (PyDict_CheckExact(v)) {
386        Py_ssize_t pos;
387        PyObject *key, *value;
388        w_byte(TYPE_DICT, p);
389        /* This one is NULL object terminated! */
390        pos = 0;
391        while (PyDict_Next(v, &pos, &key, &value)) {
392            w_object(key, p);
393            w_object(value, p);
394        }
395        w_object((PyObject *)NULL, p);
396    }
397    else if (PyAnySet_CheckExact(v)) {
398        PyObject *value, *it;
399
400        if (PyObject_TypeCheck(v, &PySet_Type))
401            w_byte(TYPE_SET, p);
402        else
403            w_byte(TYPE_FROZENSET, p);
404        n = PyObject_Size(v);
405        if (n == -1) {
406            p->depth--;
407            p->error = WFERR_UNMARSHALLABLE;
408            return;
409        }
410        W_SIZE(n, p);
411        it = PyObject_GetIter(v);
412        if (it == NULL) {
413            p->depth--;
414            p->error = WFERR_UNMARSHALLABLE;
415            return;
416        }
417        while ((value = PyIter_Next(it)) != NULL) {
418            w_object(value, p);
419            Py_DECREF(value);
420        }
421        Py_DECREF(it);
422        if (PyErr_Occurred()) {
423            p->depth--;
424            p->error = WFERR_UNMARSHALLABLE;
425            return;
426        }
427    }
428    else if (PyCode_Check(v)) {
429        PyCodeObject *co = (PyCodeObject *)v;
430        w_byte(TYPE_CODE, p);
431        w_long(co->co_argcount, p);
432        w_long(co->co_nlocals, p);
433        w_long(co->co_stacksize, p);
434        w_long(co->co_flags, p);
435        w_object(co->co_code, p);
436        w_object(co->co_consts, p);
437        w_object(co->co_names, p);
438        w_object(co->co_varnames, p);
439        w_object(co->co_freevars, p);
440        w_object(co->co_cellvars, p);
441        w_object(co->co_filename, p);
442        w_object(co->co_name, p);
443        w_long(co->co_firstlineno, p);
444        w_object(co->co_lnotab, p);
445    }
446    else if (PyObject_CheckReadBuffer(v)) {
447        /* Write unknown buffer-style objects as a string */
448        char *s;
449        PyBufferProcs *pb = v->ob_type->tp_as_buffer;
450        w_byte(TYPE_STRING, p);
451        n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
452        w_pstring(s, n, p);
453    }
454    else {
455        w_byte(TYPE_UNKNOWN, p);
456        p->error = WFERR_UNMARSHALLABLE;
457    }
458   exit:
459    p->depth--;
460}
461
462/* version currently has no effect for writing longs. */
463void
464PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
465{
466    WFILE wf;
467    wf.fp = fp;
468    wf.error = WFERR_OK;
469    wf.depth = 0;
470    wf.strings = NULL;
471    wf.version = version;
472    w_long(x, &wf);
473}
474
475void
476PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
477{
478    WFILE wf;
479    wf.fp = fp;
480    wf.error = WFERR_OK;
481    wf.depth = 0;
482    wf.strings = (version > 0) ? PyDict_New() : NULL;
483    wf.version = version;
484    w_object(x, &wf);
485    Py_XDECREF(wf.strings);
486}
487
488typedef WFILE RFILE; /* Same struct with different invariants */
489
490#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
491
492#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
493
494static Py_ssize_t
495r_string(char *s, Py_ssize_t n, RFILE *p)
496{
497    if (p->fp != NULL)
498        /* The result fits into int because it must be <=n. */
499        return fread(s, 1, n, p->fp);
500    if (p->end - p->ptr < n)
501        n = p->end - p->ptr;
502    memcpy(s, p->ptr, n);
503    p->ptr += n;
504    return n;
505}
506
507static int
508r_short(RFILE *p)
509{
510    register short x;
511    x = r_byte(p);
512    x |= r_byte(p) << 8;
513    /* Sign-extension, in case short greater than 16 bits */
514    x |= -(x & 0x8000);
515    return x;
516}
517
518static long
519r_long(RFILE *p)
520{
521    register long x;
522    register FILE *fp = p->fp;
523    if (fp) {
524        x = getc(fp);
525        x |= (long)getc(fp) << 8;
526        x |= (long)getc(fp) << 16;
527        x |= (long)getc(fp) << 24;
528    }
529    else {
530        x = rs_byte(p);
531        x |= (long)rs_byte(p) << 8;
532        x |= (long)rs_byte(p) << 16;
533        x |= (long)rs_byte(p) << 24;
534    }
535#if SIZEOF_LONG > 4
536    /* Sign extension for 64-bit machines */
537    x |= -(x & 0x80000000L);
538#endif
539    return x;
540}
541
542/* r_long64 deals with the TYPE_INT64 code.  On a machine with
543   sizeof(long) > 4, it returns a Python int object, else a Python long
544   object.  Note that w_long64 writes out TYPE_INT if 32 bits is enough,
545   so there's no inefficiency here in returning a PyLong on 32-bit boxes
546   for everything written via TYPE_INT64 (i.e., if an int is written via
547   TYPE_INT64, it *needs* more than 32 bits).
548*/
549static PyObject *
550r_long64(RFILE *p)
551{
552    long lo4 = r_long(p);
553    long hi4 = r_long(p);
554#if SIZEOF_LONG > 4
555    long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
556    return PyInt_FromLong(x);
557#else
558    unsigned char buf[8];
559    int one = 1;
560    int is_little_endian = (int)*(char*)&one;
561    if (is_little_endian) {
562        memcpy(buf, &lo4, 4);
563        memcpy(buf+4, &hi4, 4);
564    }
565    else {
566        memcpy(buf, &hi4, 4);
567        memcpy(buf+4, &lo4, 4);
568    }
569    return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
570#endif
571}
572
573static PyObject *
574r_PyLong(RFILE *p)
575{
576    PyLongObject *ob;
577    long n, size, i;
578    int j, md, shorts_in_top_digit;
579    digit d;
580
581    n = r_long(p);
582    if (n == 0)
583        return (PyObject *)_PyLong_New(0);
584    if (n < -SIZE32_MAX || n > SIZE32_MAX) {
585        PyErr_SetString(PyExc_ValueError,
586                       "bad marshal data (long size out of range)");
587        return NULL;
588    }
589
590    size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
591    shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
592    ob = _PyLong_New(size);
593    if (ob == NULL)
594        return NULL;
595    Py_SIZE(ob) = n > 0 ? size : -size;
596
597    for (i = 0; i < size-1; i++) {
598        d = 0;
599        for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
600            md = r_short(p);
601            if (md < 0 || md > PyLong_MARSHAL_BASE)
602                goto bad_digit;
603            d += (digit)md << j*PyLong_MARSHAL_SHIFT;
604        }
605        ob->ob_digit[i] = d;
606    }
607    d = 0;
608    for (j=0; j < shorts_in_top_digit; j++) {
609        md = r_short(p);
610        if (md < 0 || md > PyLong_MARSHAL_BASE)
611            goto bad_digit;
612        /* topmost marshal digit should be nonzero */
613        if (md == 0 && j == shorts_in_top_digit - 1) {
614            Py_DECREF(ob);
615            PyErr_SetString(PyExc_ValueError,
616                "bad marshal data (unnormalized long data)");
617            return NULL;
618        }
619        d += (digit)md << j*PyLong_MARSHAL_SHIFT;
620    }
621    /* top digit should be nonzero, else the resulting PyLong won't be
622       normalized */
623    ob->ob_digit[size-1] = d;
624    return (PyObject *)ob;
625  bad_digit:
626    Py_DECREF(ob);
627    PyErr_SetString(PyExc_ValueError,
628                    "bad marshal data (digit out of range in long)");
629    return NULL;
630}
631
632
633static PyObject *
634r_object(RFILE *p)
635{
636    /* NULL is a valid return value, it does not necessarily means that
637       an exception is set. */
638    PyObject *v, *v2;
639    long i, n;
640    int type = r_byte(p);
641    PyObject *retval;
642
643    p->depth++;
644
645    if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
646        p->depth--;
647        PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
648        return NULL;
649    }
650
651    switch (type) {
652
653    case EOF:
654        PyErr_SetString(PyExc_EOFError,
655                        "EOF read where object expected");
656        retval = NULL;
657        break;
658
659    case TYPE_NULL:
660        retval = NULL;
661        break;
662
663    case TYPE_NONE:
664        Py_INCREF(Py_None);
665        retval = Py_None;
666        break;
667
668    case TYPE_STOPITER:
669        Py_INCREF(PyExc_StopIteration);
670        retval = PyExc_StopIteration;
671        break;
672
673    case TYPE_ELLIPSIS:
674        Py_INCREF(Py_Ellipsis);
675        retval = Py_Ellipsis;
676        break;
677
678    case TYPE_FALSE:
679        Py_INCREF(Py_False);
680        retval = Py_False;
681        break;
682
683    case TYPE_TRUE:
684        Py_INCREF(Py_True);
685        retval = Py_True;
686        break;
687
688    case TYPE_INT:
689        retval = PyInt_FromLong(r_long(p));
690        break;
691
692    case TYPE_INT64:
693        retval = r_long64(p);
694        break;
695
696    case TYPE_LONG:
697        retval = r_PyLong(p);
698        break;
699
700    case TYPE_FLOAT:
701        {
702            char buf[256];
703            double dx;
704            n = r_byte(p);
705            if (n == EOF || r_string(buf, n, p) != n) {
706                PyErr_SetString(PyExc_EOFError,
707                    "EOF read where object expected");
708                retval = NULL;
709                break;
710            }
711            buf[n] = '\0';
712            dx = PyOS_string_to_double(buf, NULL, NULL);
713            if (dx == -1.0 && PyErr_Occurred()) {
714                retval = NULL;
715                break;
716            }
717            retval = PyFloat_FromDouble(dx);
718            break;
719        }
720
721    case TYPE_BINARY_FLOAT:
722        {
723            unsigned char buf[8];
724            double x;
725            if (r_string((char*)buf, 8, p) != 8) {
726                PyErr_SetString(PyExc_EOFError,
727                    "EOF read where object expected");
728                retval = NULL;
729                break;
730            }
731            x = _PyFloat_Unpack8(buf, 1);
732            if (x == -1.0 && PyErr_Occurred()) {
733                retval = NULL;
734                break;
735            }
736            retval = PyFloat_FromDouble(x);
737            break;
738        }
739
740#ifndef WITHOUT_COMPLEX
741    case TYPE_COMPLEX:
742        {
743            char buf[256];
744            Py_complex c;
745            n = r_byte(p);
746            if (n == EOF || r_string(buf, n, p) != n) {
747                PyErr_SetString(PyExc_EOFError,
748                    "EOF read where object expected");
749                retval = NULL;
750                break;
751            }
752            buf[n] = '\0';
753            c.real = PyOS_string_to_double(buf, NULL, NULL);
754            if (c.real == -1.0 && PyErr_Occurred()) {
755                retval = NULL;
756                break;
757            }
758            n = r_byte(p);
759            if (n == EOF || r_string(buf, n, p) != n) {
760                PyErr_SetString(PyExc_EOFError,
761                    "EOF read where object expected");
762                retval = NULL;
763                break;
764            }
765            buf[n] = '\0';
766            c.imag = PyOS_string_to_double(buf, NULL, NULL);
767            if (c.imag == -1.0 && PyErr_Occurred()) {
768                retval = NULL;
769                break;
770            }
771            retval = PyComplex_FromCComplex(c);
772            break;
773        }
774
775    case TYPE_BINARY_COMPLEX:
776        {
777            unsigned char buf[8];
778            Py_complex c;
779            if (r_string((char*)buf, 8, p) != 8) {
780                PyErr_SetString(PyExc_EOFError,
781                    "EOF read where object expected");
782                retval = NULL;
783                break;
784            }
785            c.real = _PyFloat_Unpack8(buf, 1);
786            if (c.real == -1.0 && PyErr_Occurred()) {
787                retval = NULL;
788                break;
789            }
790            if (r_string((char*)buf, 8, p) != 8) {
791                PyErr_SetString(PyExc_EOFError,
792                    "EOF read where object expected");
793                retval = NULL;
794                break;
795            }
796            c.imag = _PyFloat_Unpack8(buf, 1);
797            if (c.imag == -1.0 && PyErr_Occurred()) {
798                retval = NULL;
799                break;
800            }
801            retval = PyComplex_FromCComplex(c);
802            break;
803        }
804#endif
805
806    case TYPE_INTERNED:
807    case TYPE_STRING:
808        n = r_long(p);
809        if (n < 0 || n > SIZE32_MAX) {
810            PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
811            retval = NULL;
812            break;
813        }
814        v = PyString_FromStringAndSize((char *)NULL, n);
815        if (v == NULL) {
816            retval = NULL;
817            break;
818        }
819        if (r_string(PyString_AS_STRING(v), n, p) != n) {
820            Py_DECREF(v);
821            PyErr_SetString(PyExc_EOFError,
822                            "EOF read where object expected");
823            retval = NULL;
824            break;
825        }
826        if (type == TYPE_INTERNED) {
827            PyString_InternInPlace(&v);
828            if (PyList_Append(p->strings, v) < 0) {
829                retval = NULL;
830                break;
831            }
832        }
833        retval = v;
834        break;
835
836    case TYPE_STRINGREF:
837        n = r_long(p);
838        if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
839            PyErr_SetString(PyExc_ValueError, "bad marshal data (string ref out of range)");
840            retval = NULL;
841            break;
842        }
843        v = PyList_GET_ITEM(p->strings, n);
844        Py_INCREF(v);
845        retval = v;
846        break;
847
848#ifdef Py_USING_UNICODE
849    case TYPE_UNICODE:
850        {
851        char *buffer;
852
853        n = r_long(p);
854        if (n < 0 || n > SIZE32_MAX) {
855            PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
856            retval = NULL;
857            break;
858        }
859        buffer = PyMem_NEW(char, n);
860        if (buffer == NULL) {
861            retval = PyErr_NoMemory();
862            break;
863        }
864        if (r_string(buffer, n, p) != n) {
865            PyMem_DEL(buffer);
866            PyErr_SetString(PyExc_EOFError,
867                "EOF read where object expected");
868            retval = NULL;
869            break;
870        }
871        v = PyUnicode_DecodeUTF8(buffer, n, NULL);
872        PyMem_DEL(buffer);
873        retval = v;
874        break;
875        }
876#endif
877
878    case TYPE_TUPLE:
879        n = r_long(p);
880        if (n < 0 || n > SIZE32_MAX) {
881            PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
882            retval = NULL;
883            break;
884        }
885        v = PyTuple_New(n);
886        if (v == NULL) {
887            retval = NULL;
888            break;
889        }
890        for (i = 0; i < n; i++) {
891            v2 = r_object(p);
892            if ( v2 == NULL ) {
893                if (!PyErr_Occurred())
894                    PyErr_SetString(PyExc_TypeError,
895                        "NULL object in marshal data for tuple");
896                Py_DECREF(v);
897                v = NULL;
898                break;
899            }
900            PyTuple_SET_ITEM(v, i, v2);
901        }
902        retval = v;
903        break;
904
905    case TYPE_LIST:
906        n = r_long(p);
907        if (n < 0 || n > SIZE32_MAX) {
908            PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
909            retval = NULL;
910            break;
911        }
912        v = PyList_New(n);
913        if (v == NULL) {
914            retval = NULL;
915            break;
916        }
917        for (i = 0; i < n; i++) {
918            v2 = r_object(p);
919            if ( v2 == NULL ) {
920                if (!PyErr_Occurred())
921                    PyErr_SetString(PyExc_TypeError,
922                        "NULL object in marshal data for list");
923                Py_DECREF(v);
924                v = NULL;
925                break;
926            }
927            PyList_SET_ITEM(v, i, v2);
928        }
929        retval = v;
930        break;
931
932    case TYPE_DICT:
933        v = PyDict_New();
934        if (v == NULL) {
935            retval = NULL;
936            break;
937        }
938        for (;;) {
939            PyObject *key, *val;
940            key = r_object(p);
941            if (key == NULL)
942                break;
943            val = r_object(p);
944            if (val != NULL)
945                PyDict_SetItem(v, key, val);
946            Py_DECREF(key);
947            Py_XDECREF(val);
948        }
949        if (PyErr_Occurred()) {
950            Py_DECREF(v);
951            v = NULL;
952        }
953        retval = v;
954        break;
955
956    case TYPE_SET:
957    case TYPE_FROZENSET:
958        n = r_long(p);
959        if (n < 0 || n > SIZE32_MAX) {
960            PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
961            retval = NULL;
962            break;
963        }
964        v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
965        if (v == NULL) {
966            retval = NULL;
967            break;
968        }
969        for (i = 0; i < n; i++) {
970            v2 = r_object(p);
971            if ( v2 == NULL ) {
972                if (!PyErr_Occurred())
973                    PyErr_SetString(PyExc_TypeError,
974                        "NULL object in marshal data for set");
975                Py_DECREF(v);
976                v = NULL;
977                break;
978            }
979            if (PySet_Add(v, v2) == -1) {
980                Py_DECREF(v);
981                Py_DECREF(v2);
982                v = NULL;
983                break;
984            }
985            Py_DECREF(v2);
986        }
987        retval = v;
988        break;
989
990    case TYPE_CODE:
991        if (PyEval_GetRestricted()) {
992            PyErr_SetString(PyExc_RuntimeError,
993                "cannot unmarshal code objects in "
994                "restricted execution mode");
995            retval = NULL;
996            break;
997        }
998        else {
999            int argcount;
1000            int nlocals;
1001            int stacksize;
1002            int flags;
1003            PyObject *code = NULL;
1004            PyObject *consts = NULL;
1005            PyObject *names = NULL;
1006            PyObject *varnames = NULL;
1007            PyObject *freevars = NULL;
1008            PyObject *cellvars = NULL;
1009            PyObject *filename = NULL;
1010            PyObject *name = NULL;
1011            int firstlineno;
1012            PyObject *lnotab = NULL;
1013
1014            v = NULL;
1015
1016            /* XXX ignore long->int overflows for now */
1017            argcount = (int)r_long(p);
1018            nlocals = (int)r_long(p);
1019            stacksize = (int)r_long(p);
1020            flags = (int)r_long(p);
1021            code = r_object(p);
1022            if (code == NULL)
1023                goto code_error;
1024            consts = r_object(p);
1025            if (consts == NULL)
1026                goto code_error;
1027            names = r_object(p);
1028            if (names == NULL)
1029                goto code_error;
1030            varnames = r_object(p);
1031            if (varnames == NULL)
1032                goto code_error;
1033            freevars = r_object(p);
1034            if (freevars == NULL)
1035                goto code_error;
1036            cellvars = r_object(p);
1037            if (cellvars == NULL)
1038                goto code_error;
1039            filename = r_object(p);
1040            if (filename == NULL)
1041                goto code_error;
1042            name = r_object(p);
1043            if (name == NULL)
1044                goto code_error;
1045            firstlineno = (int)r_long(p);
1046            lnotab = r_object(p);
1047            if (lnotab == NULL)
1048                goto code_error;
1049
1050            v = (PyObject *) PyCode_New(
1051                            argcount, nlocals, stacksize, flags,
1052                            code, consts, names, varnames,
1053                            freevars, cellvars, filename, name,
1054                            firstlineno, lnotab);
1055
1056          code_error:
1057            Py_XDECREF(code);
1058            Py_XDECREF(consts);
1059            Py_XDECREF(names);
1060            Py_XDECREF(varnames);
1061            Py_XDECREF(freevars);
1062            Py_XDECREF(cellvars);
1063            Py_XDECREF(filename);
1064            Py_XDECREF(name);
1065            Py_XDECREF(lnotab);
1066
1067        }
1068        retval = v;
1069        break;
1070
1071    default:
1072        /* Bogus data got written, which isn't ideal.
1073           This will let you keep working and recover. */
1074        PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1075        retval = NULL;
1076        break;
1077
1078    }
1079    p->depth--;
1080    return retval;
1081}
1082
1083static PyObject *
1084read_object(RFILE *p)
1085{
1086    PyObject *v;
1087    if (PyErr_Occurred()) {
1088        fprintf(stderr, "XXX readobject called with exception set\n");
1089        return NULL;
1090    }
1091    v = r_object(p);
1092    if (v == NULL && !PyErr_Occurred())
1093        PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1094    return v;
1095}
1096
1097int
1098PyMarshal_ReadShortFromFile(FILE *fp)
1099{
1100    RFILE rf;
1101    assert(fp);
1102    rf.fp = fp;
1103    rf.strings = NULL;
1104    rf.end = rf.ptr = NULL;
1105    return r_short(&rf);
1106}
1107
1108long
1109PyMarshal_ReadLongFromFile(FILE *fp)
1110{
1111    RFILE rf;
1112    rf.fp = fp;
1113    rf.strings = NULL;
1114    rf.ptr = rf.end = NULL;
1115    return r_long(&rf);
1116}
1117
1118#ifdef HAVE_FSTAT
1119/* Return size of file in bytes; < 0 if unknown. */
1120static off_t
1121getfilesize(FILE *fp)
1122{
1123    struct stat st;
1124    if (fstat(fileno(fp), &st) != 0)
1125        return -1;
1126    else
1127        return st.st_size;
1128}
1129#endif
1130
1131/* If we can get the size of the file up-front, and it's reasonably small,
1132 * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1133 * than reading a byte at a time from file; speeds .pyc imports.
1134 * CAUTION:  since this may read the entire remainder of the file, don't
1135 * call it unless you know you're done with the file.
1136 */
1137PyObject *
1138PyMarshal_ReadLastObjectFromFile(FILE *fp)
1139{
1140/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1141#define REASONABLE_FILE_LIMIT (1L << 18)
1142#ifdef HAVE_FSTAT
1143    off_t filesize;
1144    filesize = getfilesize(fp);
1145    if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1146        char* pBuf = (char *)PyMem_MALLOC(filesize);
1147        if (pBuf != NULL) {
1148            size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1149            PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1150            PyMem_FREE(pBuf);
1151            return v;
1152        }
1153
1154    }
1155#endif
1156    /* We don't have fstat, or we do but the file is larger than
1157     * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1158     */
1159    return PyMarshal_ReadObjectFromFile(fp);
1160
1161#undef REASONABLE_FILE_LIMIT
1162}
1163
1164PyObject *
1165PyMarshal_ReadObjectFromFile(FILE *fp)
1166{
1167    RFILE rf;
1168    PyObject *result;
1169    rf.fp = fp;
1170    rf.strings = PyList_New(0);
1171    rf.depth = 0;
1172    rf.ptr = rf.end = NULL;
1173    result = r_object(&rf);
1174    Py_DECREF(rf.strings);
1175    return result;
1176}
1177
1178PyObject *
1179PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1180{
1181    RFILE rf;
1182    PyObject *result;
1183    rf.fp = NULL;
1184    rf.ptr = str;
1185    rf.end = str + len;
1186    rf.strings = PyList_New(0);
1187    rf.depth = 0;
1188    result = r_object(&rf);
1189    Py_DECREF(rf.strings);
1190    return result;
1191}
1192
1193static void
1194set_error(int error)
1195{
1196    switch (error) {
1197    case WFERR_NOMEMORY:
1198        PyErr_NoMemory();
1199        break;
1200    case WFERR_UNMARSHALLABLE:
1201        PyErr_SetString(PyExc_ValueError, "unmarshallable object");
1202        break;
1203    case WFERR_NESTEDTOODEEP:
1204    default:
1205        PyErr_SetString(PyExc_ValueError,
1206            "object too deeply nested to marshal");
1207        break;
1208    }
1209}
1210
1211PyObject *
1212PyMarshal_WriteObjectToString(PyObject *x, int version)
1213{
1214    WFILE wf;
1215    wf.fp = NULL;
1216    wf.str = PyString_FromStringAndSize((char *)NULL, 50);
1217    if (wf.str == NULL)
1218        return NULL;
1219    wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
1220    wf.end = wf.ptr + PyString_Size(wf.str);
1221    wf.error = WFERR_OK;
1222    wf.depth = 0;
1223    wf.version = version;
1224    wf.strings = (version > 0) ? PyDict_New() : NULL;
1225    w_object(x, &wf);
1226    Py_XDECREF(wf.strings);
1227    if (wf.str != NULL) {
1228        char *base = PyString_AS_STRING((PyStringObject *)wf.str);
1229        if (wf.ptr - base > PY_SSIZE_T_MAX) {
1230            Py_DECREF(wf.str);
1231            PyErr_SetString(PyExc_OverflowError,
1232                            "too much marshall data for a string");
1233            return NULL;
1234        }
1235        if (_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)))
1236            return NULL;
1237    }
1238    if (wf.error != WFERR_OK) {
1239        Py_XDECREF(wf.str);
1240        set_error(wf.error);
1241        return NULL;
1242    }
1243    return wf.str;
1244}
1245
1246/* And an interface for Python programs... */
1247
1248static PyObject *
1249marshal_dump(PyObject *self, PyObject *args)
1250{
1251    WFILE wf;
1252    PyObject *x;
1253    PyObject *f;
1254    int version = Py_MARSHAL_VERSION;
1255    if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1256        return NULL;
1257    if (!PyFile_Check(f)) {
1258        PyErr_SetString(PyExc_TypeError,
1259                        "marshal.dump() 2nd arg must be file");
1260        return NULL;
1261    }
1262    wf.fp = PyFile_AsFile(f);
1263    wf.str = NULL;
1264    wf.ptr = wf.end = NULL;
1265    wf.error = WFERR_OK;
1266    wf.depth = 0;
1267    wf.strings = (version > 0) ? PyDict_New() : 0;
1268    wf.version = version;
1269    w_object(x, &wf);
1270    Py_XDECREF(wf.strings);
1271    if (wf.error != WFERR_OK) {
1272        set_error(wf.error);
1273        return NULL;
1274    }
1275    Py_INCREF(Py_None);
1276    return Py_None;
1277}
1278
1279PyDoc_STRVAR(dump_doc,
1280"dump(value, file[, version])\n\
1281\n\
1282Write the value on the open file. The value must be a supported type.\n\
1283The file must be an open file object such as sys.stdout or returned by\n\
1284open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1285\n\
1286If the value has (or contains an object that has) an unsupported type, a\n\
1287ValueError exception is raised — but garbage data will also be written\n\
1288to the file. The object will not be properly read back by load()\n\
1289\n\
1290New in version 2.4: The version argument indicates the data format that\n\
1291dump should use.");
1292
1293static PyObject *
1294marshal_load(PyObject *self, PyObject *f)
1295{
1296    RFILE rf;
1297    PyObject *result;
1298    if (!PyFile_Check(f)) {
1299        PyErr_SetString(PyExc_TypeError,
1300                        "marshal.load() arg must be file");
1301        return NULL;
1302    }
1303    rf.fp = PyFile_AsFile(f);
1304    rf.strings = PyList_New(0);
1305    rf.depth = 0;
1306    result = read_object(&rf);
1307    Py_DECREF(rf.strings);
1308    return result;
1309}
1310
1311PyDoc_STRVAR(load_doc,
1312"load(file)\n\
1313\n\
1314Read one value from the open file and return it. If no valid value is\n\
1315read (e.g. because the data has a different Python version’s\n\
1316incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1317The file must be an open file object opened in binary mode ('rb' or\n\
1318'r+b').\n\
1319\n\
1320Note: If an object containing an unsupported type was marshalled with\n\
1321dump(), load() will substitute None for the unmarshallable type.");
1322
1323
1324static PyObject *
1325marshal_dumps(PyObject *self, PyObject *args)
1326{
1327    PyObject *x;
1328    int version = Py_MARSHAL_VERSION;
1329    if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1330        return NULL;
1331    return PyMarshal_WriteObjectToString(x, version);
1332}
1333
1334PyDoc_STRVAR(dumps_doc,
1335"dumps(value[, version])\n\
1336\n\
1337Return the string that would be written to a file by dump(value, file).\n\
1338The value must be a supported type. Raise a ValueError exception if\n\
1339value has (or contains an object that has) an unsupported type.\n\
1340\n\
1341New in version 2.4: The version argument indicates the data format that\n\
1342dumps should use.");
1343
1344
1345static PyObject *
1346marshal_loads(PyObject *self, PyObject *args)
1347{
1348    RFILE rf;
1349    char *s;
1350    Py_ssize_t n;
1351    PyObject* result;
1352    if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
1353        return NULL;
1354    rf.fp = NULL;
1355    rf.ptr = s;
1356    rf.end = s + n;
1357    rf.strings = PyList_New(0);
1358    rf.depth = 0;
1359    result = read_object(&rf);
1360    Py_DECREF(rf.strings);
1361    return result;
1362}
1363
1364PyDoc_STRVAR(loads_doc,
1365"loads(string)\n\
1366\n\
1367Convert the string to a value. If no valid value is found, raise\n\
1368EOFError, ValueError or TypeError. Extra characters in the string are\n\
1369ignored.");
1370
1371static PyMethodDef marshal_methods[] = {
1372    {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
1373    {"load",            marshal_load,   METH_O,         load_doc},
1374    {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
1375    {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
1376    {NULL,              NULL}           /* sentinel */
1377};
1378
1379PyDoc_STRVAR(marshal_doc,
1380"This module contains functions that can read and write Python values in\n\
1381a binary format. The format is specific to Python, but independent of\n\
1382machine architecture issues.\n\
1383\n\
1384Not all Python object types are supported; in general, only objects\n\
1385whose value is independent from a particular invocation of Python can be\n\
1386written and read by this module. The following types are supported:\n\
1387None, integers, long integers, floating point numbers, strings, Unicode\n\
1388objects, tuples, lists, sets, dictionaries, and code objects, where it\n\
1389should be understood that tuples, lists and dictionaries are only\n\
1390supported as long as the values contained therein are themselves\n\
1391supported; and recursive lists and dictionaries should not be written\n\
1392(they will cause infinite loops).\n\
1393\n\
1394Variables:\n\
1395\n\
1396version -- indicates the format that the module uses. Version 0 is the\n\
1397    historical format, version 1 (added in Python 2.4) shares interned\n\
1398    strings and version 2 (added in Python 2.5) uses a binary format for\n\
1399    floating point numbers. (New in version 2.4)\n\
1400\n\
1401Functions:\n\
1402\n\
1403dump() -- write value to a file\n\
1404load() -- read value from a file\n\
1405dumps() -- write value to a string\n\
1406loads() -- read value from a string");
1407
1408
1409PyMODINIT_FUNC
1410PyMarshal_Init(void)
1411{
1412    PyObject *mod = Py_InitModule3("marshal", marshal_methods,
1413        marshal_doc);
1414    if (mod == NULL)
1415        return;
1416    PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1417}
1418