marshal.c revision b2677c739729ace08c4185d1e459210373477497
1 2/* Write Python objects to files and read them back. 3 This is intended for writing and reading compiled Python code only; 4 a true persistent storage facility would be much harder, since 5 it would have to take circular links and sharing into account. */ 6 7#define PY_SSIZE_T_CLEAN 8 9#include "Python.h" 10#include "longintrepr.h" 11#include "code.h" 12#include "marshal.h" 13 14#define ABS(x) ((x) < 0 ? -(x) : (x)) 15 16/* High water mark to determine when the marshalled object is dangerously deep 17 * and risks coring the interpreter. When the object stack gets this deep, 18 * raise an exception instead of continuing. 19 * On Windows debug builds, reduce this value. 20 */ 21#if defined(MS_WINDOWS) && defined(_DEBUG) 22#define MAX_MARSHAL_STACK_DEPTH 1500 23#else 24#define MAX_MARSHAL_STACK_DEPTH 2000 25#endif 26 27#define TYPE_NULL '0' 28#define TYPE_NONE 'N' 29#define TYPE_FALSE 'F' 30#define TYPE_TRUE 'T' 31#define TYPE_STOPITER 'S' 32#define TYPE_ELLIPSIS '.' 33#define TYPE_INT 'i' 34#define TYPE_INT64 'I' 35#define TYPE_FLOAT 'f' 36#define TYPE_BINARY_FLOAT 'g' 37#define TYPE_COMPLEX 'x' 38#define TYPE_BINARY_COMPLEX 'y' 39#define TYPE_LONG 'l' 40#define TYPE_STRING 's' 41#define TYPE_TUPLE '(' 42#define TYPE_LIST '[' 43#define TYPE_DICT '{' 44#define TYPE_CODE 'c' 45#define TYPE_UNICODE 'u' 46#define TYPE_UNKNOWN '?' 47#define TYPE_SET '<' 48#define TYPE_FROZENSET '>' 49 50#define WFERR_OK 0 51#define WFERR_UNMARSHALLABLE 1 52#define WFERR_NESTEDTOODEEP 2 53#define WFERR_NOMEMORY 3 54 55typedef struct { 56 FILE *fp; 57 int error; /* see WFERR_* values */ 58 int depth; 59 /* If fp == NULL, the following are valid: */ 60 PyObject *readable; /* Stream-like object being read from */ 61 PyObject *str; 62 PyObject *current_filename; 63 char *ptr; 64 char *end; 65 int version; 66} WFILE; 67 68#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ 69 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \ 70 else w_more(c, p) 71 72static void 73w_more(int c, WFILE *p) 74{ 75 Py_ssize_t size, newsize; 76 if (p->str == NULL) 77 return; /* An error already occurred */ 78 size = PyBytes_Size(p->str); 79 newsize = size + size + 1024; 80 if (newsize > 32*1024*1024) { 81 newsize = size + (size >> 3); /* 12.5% overallocation */ 82 } 83 if (_PyBytes_Resize(&p->str, newsize) != 0) { 84 p->ptr = p->end = NULL; 85 } 86 else { 87 p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size; 88 p->end = 89 PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize; 90 *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char); 91 } 92} 93 94static void 95w_string(char *s, int n, WFILE *p) 96{ 97 if (p->fp != NULL) { 98 fwrite(s, 1, n, p->fp); 99 } 100 else { 101 while (--n >= 0) { 102 w_byte(*s, p); 103 s++; 104 } 105 } 106} 107 108static void 109w_short(int x, WFILE *p) 110{ 111 w_byte((char)( x & 0xff), p); 112 w_byte((char)((x>> 8) & 0xff), p); 113} 114 115static void 116w_long(long x, WFILE *p) 117{ 118 w_byte((char)( x & 0xff), p); 119 w_byte((char)((x>> 8) & 0xff), p); 120 w_byte((char)((x>>16) & 0xff), p); 121 w_byte((char)((x>>24) & 0xff), p); 122} 123 124#if SIZEOF_LONG > 4 125static void 126w_long64(long x, WFILE *p) 127{ 128 w_long(x, p); 129 w_long(x>>32, p); 130} 131#endif 132 133/* We assume that Python longs are stored internally in base some power of 134 2**15; for the sake of portability we'll always read and write them in base 135 exactly 2**15. */ 136 137#define PyLong_MARSHAL_SHIFT 15 138#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT) 139#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1) 140#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0 141#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT" 142#endif 143#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) 144 145static void 146w_PyLong(const PyLongObject *ob, WFILE *p) 147{ 148 Py_ssize_t i, j, n, l; 149 digit d; 150 151 w_byte(TYPE_LONG, p); 152 if (Py_SIZE(ob) == 0) { 153 w_long((long)0, p); 154 return; 155 } 156 157 /* set l to number of base PyLong_MARSHAL_BASE digits */ 158 n = ABS(Py_SIZE(ob)); 159 l = (n-1) * PyLong_MARSHAL_RATIO; 160 d = ob->ob_digit[n-1]; 161 assert(d != 0); /* a PyLong is always normalized */ 162 do { 163 d >>= PyLong_MARSHAL_SHIFT; 164 l++; 165 } while (d != 0); 166 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); 167 168 for (i=0; i < n-1; i++) { 169 d = ob->ob_digit[i]; 170 for (j=0; j < PyLong_MARSHAL_RATIO; j++) { 171 w_short(d & PyLong_MARSHAL_MASK, p); 172 d >>= PyLong_MARSHAL_SHIFT; 173 } 174 assert (d == 0); 175 } 176 d = ob->ob_digit[n-1]; 177 do { 178 w_short(d & PyLong_MARSHAL_MASK, p); 179 d >>= PyLong_MARSHAL_SHIFT; 180 } while (d != 0); 181} 182 183static void 184w_object(PyObject *v, WFILE *p) 185{ 186 Py_ssize_t i, n; 187 188 p->depth++; 189 190 if (p->depth > MAX_MARSHAL_STACK_DEPTH) { 191 p->error = WFERR_NESTEDTOODEEP; 192 } 193 else if (v == NULL) { 194 w_byte(TYPE_NULL, p); 195 } 196 else if (v == Py_None) { 197 w_byte(TYPE_NONE, p); 198 } 199 else if (v == PyExc_StopIteration) { 200 w_byte(TYPE_STOPITER, p); 201 } 202 else if (v == Py_Ellipsis) { 203 w_byte(TYPE_ELLIPSIS, p); 204 } 205 else if (v == Py_False) { 206 w_byte(TYPE_FALSE, p); 207 } 208 else if (v == Py_True) { 209 w_byte(TYPE_TRUE, p); 210 } 211 else if (PyLong_CheckExact(v)) { 212 long x = PyLong_AsLong(v); 213 if ((x == -1) && PyErr_Occurred()) { 214 PyLongObject *ob = (PyLongObject *)v; 215 PyErr_Clear(); 216 w_PyLong(ob, p); 217 } 218 else { 219#if SIZEOF_LONG > 4 220 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); 221 if (y && y != -1) { 222 w_byte(TYPE_INT64, p); 223 w_long64(x, p); 224 } 225 else 226#endif 227 { 228 w_byte(TYPE_INT, p); 229 w_long(x, p); 230 } 231 } 232 } 233 else if (PyFloat_CheckExact(v)) { 234 if (p->version > 1) { 235 unsigned char buf[8]; 236 if (_PyFloat_Pack8(PyFloat_AsDouble(v), 237 buf, 1) < 0) { 238 p->error = WFERR_UNMARSHALLABLE; 239 return; 240 } 241 w_byte(TYPE_BINARY_FLOAT, p); 242 w_string((char*)buf, 8, p); 243 } 244 else { 245 char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), 246 'g', 17, 0, NULL); 247 if (!buf) { 248 p->error = WFERR_NOMEMORY; 249 return; 250 } 251 n = strlen(buf); 252 w_byte(TYPE_FLOAT, p); 253 w_byte((int)n, p); 254 w_string(buf, (int)n, p); 255 PyMem_Free(buf); 256 } 257 } 258 else if (PyComplex_CheckExact(v)) { 259 if (p->version > 1) { 260 unsigned char buf[8]; 261 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v), 262 buf, 1) < 0) { 263 p->error = WFERR_UNMARSHALLABLE; 264 return; 265 } 266 w_byte(TYPE_BINARY_COMPLEX, p); 267 w_string((char*)buf, 8, p); 268 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), 269 buf, 1) < 0) { 270 p->error = WFERR_UNMARSHALLABLE; 271 return; 272 } 273 w_string((char*)buf, 8, p); 274 } 275 else { 276 char *buf; 277 w_byte(TYPE_COMPLEX, p); 278 buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), 279 'g', 17, 0, NULL); 280 if (!buf) { 281 p->error = WFERR_NOMEMORY; 282 return; 283 } 284 n = strlen(buf); 285 w_byte((int)n, p); 286 w_string(buf, (int)n, p); 287 PyMem_Free(buf); 288 buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), 289 'g', 17, 0, NULL); 290 if (!buf) { 291 p->error = WFERR_NOMEMORY; 292 return; 293 } 294 n = strlen(buf); 295 w_byte((int)n, p); 296 w_string(buf, (int)n, p); 297 PyMem_Free(buf); 298 } 299 } 300 else if (PyBytes_CheckExact(v)) { 301 w_byte(TYPE_STRING, p); 302 n = PyBytes_GET_SIZE(v); 303 if (n > INT_MAX) { 304 /* huge strings are not supported */ 305 p->depth--; 306 p->error = WFERR_UNMARSHALLABLE; 307 return; 308 } 309 w_long((long)n, p); 310 w_string(PyBytes_AS_STRING(v), (int)n, p); 311 } 312 else if (PyUnicode_CheckExact(v)) { 313 PyObject *utf8; 314 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); 315 if (utf8 == NULL) { 316 p->depth--; 317 p->error = WFERR_UNMARSHALLABLE; 318 return; 319 } 320 w_byte(TYPE_UNICODE, p); 321 n = PyBytes_GET_SIZE(utf8); 322 if (n > INT_MAX) { 323 p->depth--; 324 p->error = WFERR_UNMARSHALLABLE; 325 return; 326 } 327 w_long((long)n, p); 328 w_string(PyBytes_AS_STRING(utf8), (int)n, p); 329 Py_DECREF(utf8); 330 } 331 else if (PyTuple_CheckExact(v)) { 332 w_byte(TYPE_TUPLE, p); 333 n = PyTuple_Size(v); 334 w_long((long)n, p); 335 for (i = 0; i < n; i++) { 336 w_object(PyTuple_GET_ITEM(v, i), p); 337 } 338 } 339 else if (PyList_CheckExact(v)) { 340 w_byte(TYPE_LIST, p); 341 n = PyList_GET_SIZE(v); 342 w_long((long)n, p); 343 for (i = 0; i < n; i++) { 344 w_object(PyList_GET_ITEM(v, i), p); 345 } 346 } 347 else if (PyDict_CheckExact(v)) { 348 Py_ssize_t pos; 349 PyObject *key, *value; 350 w_byte(TYPE_DICT, p); 351 /* This one is NULL object terminated! */ 352 pos = 0; 353 while (PyDict_Next(v, &pos, &key, &value)) { 354 w_object(key, p); 355 w_object(value, p); 356 } 357 w_object((PyObject *)NULL, p); 358 } 359 else if (PyAnySet_CheckExact(v)) { 360 PyObject *value, *it; 361 362 if (PyObject_TypeCheck(v, &PySet_Type)) 363 w_byte(TYPE_SET, p); 364 else 365 w_byte(TYPE_FROZENSET, p); 366 n = PyObject_Size(v); 367 if (n == -1) { 368 p->depth--; 369 p->error = WFERR_UNMARSHALLABLE; 370 return; 371 } 372 w_long((long)n, p); 373 it = PyObject_GetIter(v); 374 if (it == NULL) { 375 p->depth--; 376 p->error = WFERR_UNMARSHALLABLE; 377 return; 378 } 379 while ((value = PyIter_Next(it)) != NULL) { 380 w_object(value, p); 381 Py_DECREF(value); 382 } 383 Py_DECREF(it); 384 if (PyErr_Occurred()) { 385 p->depth--; 386 p->error = WFERR_UNMARSHALLABLE; 387 return; 388 } 389 } 390 else if (PyCode_Check(v)) { 391 PyCodeObject *co = (PyCodeObject *)v; 392 w_byte(TYPE_CODE, p); 393 w_long(co->co_argcount, p); 394 w_long(co->co_kwonlyargcount, p); 395 w_long(co->co_nlocals, p); 396 w_long(co->co_stacksize, p); 397 w_long(co->co_flags, p); 398 w_object(co->co_code, p); 399 w_object(co->co_consts, p); 400 w_object(co->co_names, p); 401 w_object(co->co_varnames, p); 402 w_object(co->co_freevars, p); 403 w_object(co->co_cellvars, p); 404 w_object(co->co_filename, p); 405 w_object(co->co_name, p); 406 w_long(co->co_firstlineno, p); 407 w_object(co->co_lnotab, p); 408 } 409 else if (PyObject_CheckBuffer(v)) { 410 /* Write unknown buffer-style objects as a string */ 411 char *s; 412 Py_buffer view; 413 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) { 414 w_byte(TYPE_UNKNOWN, p); 415 p->depth--; 416 p->error = WFERR_UNMARSHALLABLE; 417 return; 418 } 419 w_byte(TYPE_STRING, p); 420 n = view.len; 421 s = view.buf; 422 if (n > INT_MAX) { 423 p->depth--; 424 p->error = WFERR_UNMARSHALLABLE; 425 return; 426 } 427 w_long((long)n, p); 428 w_string(s, (int)n, p); 429 PyBuffer_Release(&view); 430 } 431 else { 432 w_byte(TYPE_UNKNOWN, p); 433 p->error = WFERR_UNMARSHALLABLE; 434 } 435 p->depth--; 436} 437 438/* version currently has no effect for writing longs. */ 439void 440PyMarshal_WriteLongToFile(long x, FILE *fp, int version) 441{ 442 WFILE wf; 443 wf.fp = fp; 444 wf.error = WFERR_OK; 445 wf.depth = 0; 446 wf.version = version; 447 w_long(x, &wf); 448} 449 450void 451PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) 452{ 453 WFILE wf; 454 wf.fp = fp; 455 wf.error = WFERR_OK; 456 wf.depth = 0; 457 wf.version = version; 458 w_object(x, &wf); 459} 460 461typedef WFILE RFILE; /* Same struct with different invariants */ 462 463#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) 464 465static int 466r_string(char *s, int n, RFILE *p) 467{ 468 char *ptr; 469 int read, left; 470 471 if (!p->readable) { 472 if (p->fp != NULL) 473 /* The result fits into int because it must be <=n. */ 474 read = (int) fread(s, 1, n, p->fp); 475 else { 476 left = (int)(p->end - p->ptr); 477 read = (left < n) ? left : n; 478 memcpy(s, p->ptr, read); 479 p->ptr += read; 480 } 481 } 482 else { 483 _Py_IDENTIFIER(read); 484 485 PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "i", n); 486 read = 0; 487 if (data != NULL) { 488 if (!PyBytes_Check(data)) { 489 PyErr_Format(PyExc_TypeError, 490 "f.read() returned not bytes but %.100s", 491 data->ob_type->tp_name); 492 } 493 else { 494 read = PyBytes_GET_SIZE(data); 495 if (read > 0) { 496 ptr = PyBytes_AS_STRING(data); 497 memcpy(s, ptr, read); 498 } 499 } 500 Py_DECREF(data); 501 } 502 } 503 if (!PyErr_Occurred() && (read < n)) { 504 PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); 505 } 506 return read; 507} 508 509 510static int 511r_byte(RFILE *p) 512{ 513 int c = EOF; 514 unsigned char ch; 515 int n; 516 517 if (!p->readable) 518 c = p->fp ? getc(p->fp) : rs_byte(p); 519 else { 520 n = r_string((char *) &ch, 1, p); 521 if (n > 0) 522 c = ch; 523 } 524 return c; 525} 526 527static int 528r_short(RFILE *p) 529{ 530 register short x; 531 unsigned char buffer[2]; 532 533 r_string((char *) buffer, 2, p); 534 x = buffer[0]; 535 x |= buffer[1] << 8; 536 /* Sign-extension, in case short greater than 16 bits */ 537 x |= -(x & 0x8000); 538 return x; 539} 540 541static long 542r_long(RFILE *p) 543{ 544 register long x; 545 unsigned char buffer[4]; 546 547 r_string((char *) buffer, 4, p); 548 x = buffer[0]; 549 x |= (long)buffer[1] << 8; 550 x |= (long)buffer[2] << 16; 551 x |= (long)buffer[3] << 24; 552#if SIZEOF_LONG > 4 553 /* Sign extension for 64-bit machines */ 554 x |= -(x & 0x80000000L); 555#endif 556 return x; 557} 558 559/* r_long64 deals with the TYPE_INT64 code. On a machine with 560 sizeof(long) > 4, it returns a Python int object, else a Python long 561 object. Note that w_long64 writes out TYPE_INT if 32 bits is enough, 562 so there's no inefficiency here in returning a PyLong on 32-bit boxes 563 for everything written via TYPE_INT64 (i.e., if an int is written via 564 TYPE_INT64, it *needs* more than 32 bits). 565*/ 566static PyObject * 567r_long64(RFILE *p) 568{ 569 PyObject *result = NULL; 570 long lo4 = r_long(p); 571 long hi4 = r_long(p); 572 573 if (!PyErr_Occurred()) { 574#if SIZEOF_LONG > 4 575 long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); 576 result = PyLong_FromLong(x); 577#else 578 unsigned char buf[8]; 579 int one = 1; 580 int is_little_endian = (int)*(char*)&one; 581 if (is_little_endian) { 582 memcpy(buf, &lo4, 4); 583 memcpy(buf+4, &hi4, 4); 584 } 585 else { 586 memcpy(buf, &hi4, 4); 587 memcpy(buf+4, &lo4, 4); 588 } 589 result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1); 590#endif 591 } 592 return result; 593} 594 595static PyObject * 596r_PyLong(RFILE *p) 597{ 598 PyLongObject *ob; 599 int size, i, j, md, shorts_in_top_digit; 600 long n; 601 digit d; 602 603 n = r_long(p); 604 if (PyErr_Occurred()) 605 return NULL; 606 if (n == 0) 607 return (PyObject *)_PyLong_New(0); 608 if (n < -INT_MAX || n > INT_MAX) { 609 PyErr_SetString(PyExc_ValueError, 610 "bad marshal data (long size out of range)"); 611 return NULL; 612 } 613 614 size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO; 615 shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO; 616 ob = _PyLong_New(size); 617 if (ob == NULL) 618 return NULL; 619 Py_SIZE(ob) = n > 0 ? size : -size; 620 621 for (i = 0; i < size-1; i++) { 622 d = 0; 623 for (j=0; j < PyLong_MARSHAL_RATIO; j++) { 624 md = r_short(p); 625 if (PyErr_Occurred()) 626 break; 627 if (md < 0 || md > PyLong_MARSHAL_BASE) 628 goto bad_digit; 629 d += (digit)md << j*PyLong_MARSHAL_SHIFT; 630 } 631 ob->ob_digit[i] = d; 632 } 633 d = 0; 634 for (j=0; j < shorts_in_top_digit; j++) { 635 md = r_short(p); 636 if (PyErr_Occurred()) 637 break; 638 if (md < 0 || md > PyLong_MARSHAL_BASE) 639 goto bad_digit; 640 /* topmost marshal digit should be nonzero */ 641 if (md == 0 && j == shorts_in_top_digit - 1) { 642 Py_DECREF(ob); 643 PyErr_SetString(PyExc_ValueError, 644 "bad marshal data (unnormalized long data)"); 645 return NULL; 646 } 647 d += (digit)md << j*PyLong_MARSHAL_SHIFT; 648 } 649 if (PyErr_Occurred()) { 650 Py_DECREF(ob); 651 return NULL; 652 } 653 /* top digit should be nonzero, else the resulting PyLong won't be 654 normalized */ 655 ob->ob_digit[size-1] = d; 656 return (PyObject *)ob; 657 bad_digit: 658 Py_DECREF(ob); 659 PyErr_SetString(PyExc_ValueError, 660 "bad marshal data (digit out of range in long)"); 661 return NULL; 662} 663 664 665static PyObject * 666r_object(RFILE *p) 667{ 668 /* NULL is a valid return value, it does not necessarily means that 669 an exception is set. */ 670 PyObject *v, *v2; 671 long i, n; 672 int type = r_byte(p); 673 PyObject *retval; 674 675 p->depth++; 676 677 if (p->depth > MAX_MARSHAL_STACK_DEPTH) { 678 p->depth--; 679 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded"); 680 return NULL; 681 } 682 683 switch (type) { 684 685 case EOF: 686 PyErr_SetString(PyExc_EOFError, 687 "EOF read where object expected"); 688 retval = NULL; 689 break; 690 691 case TYPE_NULL: 692 retval = NULL; 693 break; 694 695 case TYPE_NONE: 696 Py_INCREF(Py_None); 697 retval = Py_None; 698 break; 699 700 case TYPE_STOPITER: 701 Py_INCREF(PyExc_StopIteration); 702 retval = PyExc_StopIteration; 703 break; 704 705 case TYPE_ELLIPSIS: 706 Py_INCREF(Py_Ellipsis); 707 retval = Py_Ellipsis; 708 break; 709 710 case TYPE_FALSE: 711 Py_INCREF(Py_False); 712 retval = Py_False; 713 break; 714 715 case TYPE_TRUE: 716 Py_INCREF(Py_True); 717 retval = Py_True; 718 break; 719 720 case TYPE_INT: 721 n = r_long(p); 722 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); 723 break; 724 725 case TYPE_INT64: 726 retval = r_long64(p); 727 break; 728 729 case TYPE_LONG: 730 retval = r_PyLong(p); 731 break; 732 733 case TYPE_FLOAT: 734 { 735 char buf[256]; 736 double dx; 737 retval = NULL; 738 n = r_byte(p); 739 if (n == EOF || r_string(buf, (int)n, p) != n) { 740 PyErr_SetString(PyExc_EOFError, 741 "EOF read where object expected"); 742 break; 743 } 744 buf[n] = '\0'; 745 dx = PyOS_string_to_double(buf, NULL, NULL); 746 if (dx == -1.0 && PyErr_Occurred()) 747 break; 748 retval = PyFloat_FromDouble(dx); 749 break; 750 } 751 752 case TYPE_BINARY_FLOAT: 753 { 754 unsigned char buf[8]; 755 double x; 756 if (r_string((char*)buf, 8, p) != 8) { 757 PyErr_SetString(PyExc_EOFError, 758 "EOF read where object expected"); 759 retval = NULL; 760 break; 761 } 762 x = _PyFloat_Unpack8(buf, 1); 763 if (x == -1.0 && PyErr_Occurred()) { 764 retval = NULL; 765 break; 766 } 767 retval = PyFloat_FromDouble(x); 768 break; 769 } 770 771 case TYPE_COMPLEX: 772 { 773 char buf[256]; 774 Py_complex c; 775 retval = NULL; 776 n = r_byte(p); 777 if (n == EOF || r_string(buf, (int)n, p) != n) { 778 PyErr_SetString(PyExc_EOFError, 779 "EOF read where object expected"); 780 break; 781 } 782 buf[n] = '\0'; 783 c.real = PyOS_string_to_double(buf, NULL, NULL); 784 if (c.real == -1.0 && PyErr_Occurred()) 785 break; 786 n = r_byte(p); 787 if (n == EOF || r_string(buf, (int)n, p) != n) { 788 PyErr_SetString(PyExc_EOFError, 789 "EOF read where object expected"); 790 break; 791 } 792 buf[n] = '\0'; 793 c.imag = PyOS_string_to_double(buf, NULL, NULL); 794 if (c.imag == -1.0 && PyErr_Occurred()) 795 break; 796 retval = PyComplex_FromCComplex(c); 797 break; 798 } 799 800 case TYPE_BINARY_COMPLEX: 801 { 802 unsigned char buf[8]; 803 Py_complex c; 804 if (r_string((char*)buf, 8, p) != 8) { 805 PyErr_SetString(PyExc_EOFError, 806 "EOF read where object expected"); 807 retval = NULL; 808 break; 809 } 810 c.real = _PyFloat_Unpack8(buf, 1); 811 if (c.real == -1.0 && PyErr_Occurred()) { 812 retval = NULL; 813 break; 814 } 815 if (r_string((char*)buf, 8, p) != 8) { 816 PyErr_SetString(PyExc_EOFError, 817 "EOF read where object expected"); 818 retval = NULL; 819 break; 820 } 821 c.imag = _PyFloat_Unpack8(buf, 1); 822 if (c.imag == -1.0 && PyErr_Occurred()) { 823 retval = NULL; 824 break; 825 } 826 retval = PyComplex_FromCComplex(c); 827 break; 828 } 829 830 case TYPE_STRING: 831 n = r_long(p); 832 if (PyErr_Occurred()) { 833 retval = NULL; 834 break; 835 } 836 if (n < 0 || n > INT_MAX) { 837 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); 838 retval = NULL; 839 break; 840 } 841 v = PyBytes_FromStringAndSize((char *)NULL, n); 842 if (v == NULL) { 843 retval = NULL; 844 break; 845 } 846 if (r_string(PyBytes_AS_STRING(v), (int)n, p) != n) { 847 Py_DECREF(v); 848 PyErr_SetString(PyExc_EOFError, 849 "EOF read where object expected"); 850 retval = NULL; 851 break; 852 } 853 retval = v; 854 break; 855 856 case TYPE_UNICODE: 857 { 858 char *buffer; 859 860 n = r_long(p); 861 if (PyErr_Occurred()) { 862 retval = NULL; 863 break; 864 } 865 if (n < 0 || n > INT_MAX) { 866 PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); 867 retval = NULL; 868 break; 869 } 870 buffer = PyMem_NEW(char, n); 871 if (buffer == NULL) { 872 retval = PyErr_NoMemory(); 873 break; 874 } 875 if (r_string(buffer, (int)n, p) != n) { 876 PyMem_DEL(buffer); 877 PyErr_SetString(PyExc_EOFError, 878 "EOF read where object expected"); 879 retval = NULL; 880 break; 881 } 882 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); 883 PyMem_DEL(buffer); 884 retval = v; 885 break; 886 } 887 888 case TYPE_TUPLE: 889 n = r_long(p); 890 if (PyErr_Occurred()) { 891 retval = NULL; 892 break; 893 } 894 if (n < 0 || n > INT_MAX) { 895 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); 896 retval = NULL; 897 break; 898 } 899 v = PyTuple_New((int)n); 900 if (v == NULL) { 901 retval = NULL; 902 break; 903 } 904 for (i = 0; i < n; i++) { 905 v2 = r_object(p); 906 if ( v2 == NULL ) { 907 if (!PyErr_Occurred()) 908 PyErr_SetString(PyExc_TypeError, 909 "NULL object in marshal data for tuple"); 910 Py_DECREF(v); 911 v = NULL; 912 break; 913 } 914 PyTuple_SET_ITEM(v, (int)i, v2); 915 } 916 retval = v; 917 break; 918 919 case TYPE_LIST: 920 n = r_long(p); 921 if (PyErr_Occurred()) { 922 retval = NULL; 923 break; 924 } 925 if (n < 0 || n > INT_MAX) { 926 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); 927 retval = NULL; 928 break; 929 } 930 v = PyList_New((int)n); 931 if (v == NULL) { 932 retval = NULL; 933 break; 934 } 935 for (i = 0; i < n; i++) { 936 v2 = r_object(p); 937 if ( v2 == NULL ) { 938 if (!PyErr_Occurred()) 939 PyErr_SetString(PyExc_TypeError, 940 "NULL object in marshal data for list"); 941 Py_DECREF(v); 942 v = NULL; 943 break; 944 } 945 PyList_SET_ITEM(v, (int)i, v2); 946 } 947 retval = v; 948 break; 949 950 case TYPE_DICT: 951 v = PyDict_New(); 952 if (v == NULL) { 953 retval = NULL; 954 break; 955 } 956 for (;;) { 957 PyObject *key, *val; 958 key = r_object(p); 959 if (key == NULL) 960 break; 961 val = r_object(p); 962 if (val != NULL) 963 PyDict_SetItem(v, key, val); 964 Py_DECREF(key); 965 Py_XDECREF(val); 966 } 967 if (PyErr_Occurred()) { 968 Py_DECREF(v); 969 v = NULL; 970 } 971 retval = v; 972 break; 973 974 case TYPE_SET: 975 case TYPE_FROZENSET: 976 n = r_long(p); 977 if (PyErr_Occurred()) { 978 retval = NULL; 979 break; 980 } 981 if (n < 0 || n > INT_MAX) { 982 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); 983 retval = NULL; 984 break; 985 } 986 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); 987 if (v == NULL) { 988 retval = NULL; 989 break; 990 } 991 for (i = 0; i < n; i++) { 992 v2 = r_object(p); 993 if ( v2 == NULL ) { 994 if (!PyErr_Occurred()) 995 PyErr_SetString(PyExc_TypeError, 996 "NULL object in marshal data for set"); 997 Py_DECREF(v); 998 v = NULL; 999 break; 1000 } 1001 if (PySet_Add(v, v2) == -1) { 1002 Py_DECREF(v); 1003 Py_DECREF(v2); 1004 v = NULL; 1005 break; 1006 } 1007 Py_DECREF(v2); 1008 } 1009 retval = v; 1010 break; 1011 1012 case TYPE_CODE: 1013 { 1014 int argcount; 1015 int kwonlyargcount; 1016 int nlocals; 1017 int stacksize; 1018 int flags; 1019 PyObject *code = NULL; 1020 PyObject *consts = NULL; 1021 PyObject *names = NULL; 1022 PyObject *varnames = NULL; 1023 PyObject *freevars = NULL; 1024 PyObject *cellvars = NULL; 1025 PyObject *filename = NULL; 1026 PyObject *name = NULL; 1027 int firstlineno; 1028 PyObject *lnotab = NULL; 1029 1030 v = NULL; 1031 1032 /* XXX ignore long->int overflows for now */ 1033 argcount = (int)r_long(p); 1034 if (PyErr_Occurred()) 1035 goto code_error; 1036 kwonlyargcount = (int)r_long(p); 1037 if (PyErr_Occurred()) 1038 goto code_error; 1039 nlocals = (int)r_long(p); 1040 if (PyErr_Occurred()) 1041 goto code_error; 1042 stacksize = (int)r_long(p); 1043 if (PyErr_Occurred()) 1044 goto code_error; 1045 flags = (int)r_long(p); 1046 if (PyErr_Occurred()) 1047 goto code_error; 1048 code = r_object(p); 1049 if (code == NULL) 1050 goto code_error; 1051 consts = r_object(p); 1052 if (consts == NULL) 1053 goto code_error; 1054 names = r_object(p); 1055 if (names == NULL) 1056 goto code_error; 1057 varnames = r_object(p); 1058 if (varnames == NULL) 1059 goto code_error; 1060 freevars = r_object(p); 1061 if (freevars == NULL) 1062 goto code_error; 1063 cellvars = r_object(p); 1064 if (cellvars == NULL) 1065 goto code_error; 1066 filename = r_object(p); 1067 if (filename == NULL) 1068 goto code_error; 1069 if (PyUnicode_CheckExact(filename)) { 1070 if (p->current_filename != NULL) { 1071 if (!PyUnicode_Compare(filename, p->current_filename)) { 1072 Py_DECREF(filename); 1073 Py_INCREF(p->current_filename); 1074 filename = p->current_filename; 1075 } 1076 } 1077 else { 1078 p->current_filename = filename; 1079 } 1080 } 1081 name = r_object(p); 1082 if (name == NULL) 1083 goto code_error; 1084 firstlineno = (int)r_long(p); 1085 lnotab = r_object(p); 1086 if (lnotab == NULL) 1087 goto code_error; 1088 1089 v = (PyObject *) PyCode_New( 1090 argcount, kwonlyargcount, 1091 nlocals, stacksize, flags, 1092 code, consts, names, varnames, 1093 freevars, cellvars, filename, name, 1094 firstlineno, lnotab); 1095 1096 code_error: 1097 Py_XDECREF(code); 1098 Py_XDECREF(consts); 1099 Py_XDECREF(names); 1100 Py_XDECREF(varnames); 1101 Py_XDECREF(freevars); 1102 Py_XDECREF(cellvars); 1103 Py_XDECREF(filename); 1104 Py_XDECREF(name); 1105 Py_XDECREF(lnotab); 1106 } 1107 retval = v; 1108 break; 1109 1110 default: 1111 /* Bogus data got written, which isn't ideal. 1112 This will let you keep working and recover. */ 1113 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)"); 1114 retval = NULL; 1115 break; 1116 1117 } 1118 p->depth--; 1119 return retval; 1120} 1121 1122static PyObject * 1123read_object(RFILE *p) 1124{ 1125 PyObject *v; 1126 if (PyErr_Occurred()) { 1127 fprintf(stderr, "XXX readobject called with exception set\n"); 1128 return NULL; 1129 } 1130 v = r_object(p); 1131 if (v == NULL && !PyErr_Occurred()) 1132 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object"); 1133 return v; 1134} 1135 1136int 1137PyMarshal_ReadShortFromFile(FILE *fp) 1138{ 1139 RFILE rf; 1140 assert(fp); 1141 rf.readable = NULL; 1142 rf.fp = fp; 1143 rf.current_filename = NULL; 1144 rf.end = rf.ptr = NULL; 1145 return r_short(&rf); 1146} 1147 1148long 1149PyMarshal_ReadLongFromFile(FILE *fp) 1150{ 1151 RFILE rf; 1152 rf.fp = fp; 1153 rf.readable = NULL; 1154 rf.current_filename = NULL; 1155 rf.ptr = rf.end = NULL; 1156 return r_long(&rf); 1157} 1158 1159#ifdef HAVE_FSTAT 1160/* Return size of file in bytes; < 0 if unknown. */ 1161static off_t 1162getfilesize(FILE *fp) 1163{ 1164 struct stat st; 1165 if (fstat(fileno(fp), &st) != 0) 1166 return -1; 1167 else 1168 return st.st_size; 1169} 1170#endif 1171 1172/* If we can get the size of the file up-front, and it's reasonably small, 1173 * read it in one gulp and delegate to ...FromString() instead. Much quicker 1174 * than reading a byte at a time from file; speeds .pyc imports. 1175 * CAUTION: since this may read the entire remainder of the file, don't 1176 * call it unless you know you're done with the file. 1177 */ 1178PyObject * 1179PyMarshal_ReadLastObjectFromFile(FILE *fp) 1180{ 1181/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */ 1182#define REASONABLE_FILE_LIMIT (1L << 18) 1183#ifdef HAVE_FSTAT 1184 off_t filesize; 1185 filesize = getfilesize(fp); 1186 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) { 1187 char* pBuf = (char *)PyMem_MALLOC(filesize); 1188 if (pBuf != NULL) { 1189 PyObject* v; 1190 size_t n; 1191 /* filesize must fit into an int, because it 1192 is smaller than REASONABLE_FILE_LIMIT */ 1193 n = fread(pBuf, 1, (int)filesize, fp); 1194 v = PyMarshal_ReadObjectFromString(pBuf, n); 1195 PyMem_FREE(pBuf); 1196 return v; 1197 } 1198 1199 } 1200#endif 1201 /* We don't have fstat, or we do but the file is larger than 1202 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time. 1203 */ 1204 return PyMarshal_ReadObjectFromFile(fp); 1205 1206#undef REASONABLE_FILE_LIMIT 1207} 1208 1209PyObject * 1210PyMarshal_ReadObjectFromFile(FILE *fp) 1211{ 1212 RFILE rf; 1213 PyObject *result; 1214 rf.fp = fp; 1215 rf.readable = NULL; 1216 rf.current_filename = NULL; 1217 rf.depth = 0; 1218 rf.ptr = rf.end = NULL; 1219 result = r_object(&rf); 1220 return result; 1221} 1222 1223PyObject * 1224PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) 1225{ 1226 RFILE rf; 1227 PyObject *result; 1228 rf.fp = NULL; 1229 rf.readable = NULL; 1230 rf.current_filename = NULL; 1231 rf.ptr = str; 1232 rf.end = str + len; 1233 rf.depth = 0; 1234 result = r_object(&rf); 1235 return result; 1236} 1237 1238PyObject * 1239PyMarshal_WriteObjectToString(PyObject *x, int version) 1240{ 1241 WFILE wf; 1242 PyObject *res = NULL; 1243 1244 wf.fp = NULL; 1245 wf.readable = NULL; 1246 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50); 1247 if (wf.str == NULL) 1248 return NULL; 1249 wf.ptr = PyBytes_AS_STRING((PyBytesObject *)wf.str); 1250 wf.end = wf.ptr + PyBytes_Size(wf.str); 1251 wf.error = WFERR_OK; 1252 wf.depth = 0; 1253 wf.version = version; 1254 w_object(x, &wf); 1255 if (wf.str != NULL) { 1256 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str); 1257 if (wf.ptr - base > PY_SSIZE_T_MAX) { 1258 Py_DECREF(wf.str); 1259 PyErr_SetString(PyExc_OverflowError, 1260 "too much marshal data for a string"); 1261 return NULL; 1262 } 1263 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0) 1264 return NULL; 1265 } 1266 if (wf.error != WFERR_OK) { 1267 Py_XDECREF(wf.str); 1268 if (wf.error == WFERR_NOMEMORY) 1269 PyErr_NoMemory(); 1270 else 1271 PyErr_SetString(PyExc_ValueError, 1272 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object" 1273 :"object too deeply nested to marshal"); 1274 return NULL; 1275 } 1276 if (wf.str != NULL) { 1277 /* XXX Quick hack -- need to do this differently */ 1278 res = PyBytes_FromObject(wf.str); 1279 Py_DECREF(wf.str); 1280 } 1281 return res; 1282} 1283 1284/* And an interface for Python programs... */ 1285 1286static PyObject * 1287marshal_dump(PyObject *self, PyObject *args) 1288{ 1289 /* XXX Quick hack -- need to do this differently */ 1290 PyObject *x; 1291 PyObject *f; 1292 int version = Py_MARSHAL_VERSION; 1293 PyObject *s; 1294 PyObject *res; 1295 _Py_IDENTIFIER(write); 1296 1297 if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) 1298 return NULL; 1299 s = PyMarshal_WriteObjectToString(x, version); 1300 if (s == NULL) 1301 return NULL; 1302 res = _PyObject_CallMethodId(f, &PyId_write, "O", s); 1303 Py_DECREF(s); 1304 return res; 1305} 1306 1307PyDoc_STRVAR(dump_doc, 1308"dump(value, file[, version])\n\ 1309\n\ 1310Write the value on the open file. The value must be a supported type.\n\ 1311The file must be an open file object such as sys.stdout or returned by\n\ 1312open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\ 1313\n\ 1314If the value has (or contains an object that has) an unsupported type, a\n\ 1315ValueError exception is raised — but garbage data will also be written\n\ 1316to the file. The object will not be properly read back by load()\n\ 1317\n\ 1318The version argument indicates the data format that dump should use."); 1319 1320static PyObject * 1321marshal_load(PyObject *self, PyObject *f) 1322{ 1323 PyObject *data, *result; 1324 _Py_IDENTIFIER(read); 1325 RFILE rf; 1326 1327 /* 1328 * Make a call to the read method, but read zero bytes. 1329 * This is to ensure that the object passed in at least 1330 * has a read method which returns bytes. 1331 */ 1332 data = _PyObject_CallMethodId(f, &PyId_read, "i", 0); 1333 if (data == NULL) 1334 return NULL; 1335 if (!PyBytes_Check(data)) { 1336 PyErr_Format(PyExc_TypeError, 1337 "f.read() returned not bytes but %.100s", 1338 data->ob_type->tp_name); 1339 result = NULL; 1340 } 1341 else { 1342 rf.depth = 0; 1343 rf.fp = NULL; 1344 rf.readable = f; 1345 rf.current_filename = NULL; 1346 result = read_object(&rf); 1347 } 1348 Py_DECREF(data); 1349 return result; 1350} 1351 1352PyDoc_STRVAR(load_doc, 1353"load(file)\n\ 1354\n\ 1355Read one value from the open file and return it. If no valid value is\n\ 1356read (e.g. because the data has a different Python version’s\n\ 1357incompatible marshal format), raise EOFError, ValueError or TypeError.\n\ 1358The file must be an open file object opened in binary mode ('rb' or\n\ 1359'r+b').\n\ 1360\n\ 1361Note: If an object containing an unsupported type was marshalled with\n\ 1362dump(), load() will substitute None for the unmarshallable type."); 1363 1364 1365static PyObject * 1366marshal_dumps(PyObject *self, PyObject *args) 1367{ 1368 PyObject *x; 1369 int version = Py_MARSHAL_VERSION; 1370 if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version)) 1371 return NULL; 1372 return PyMarshal_WriteObjectToString(x, version); 1373} 1374 1375PyDoc_STRVAR(dumps_doc, 1376"dumps(value[, version])\n\ 1377\n\ 1378Return the string that would be written to a file by dump(value, file).\n\ 1379The value must be a supported type. Raise a ValueError exception if\n\ 1380value has (or contains an object that has) an unsupported type.\n\ 1381\n\ 1382The version argument indicates the data format that dumps should use."); 1383 1384 1385static PyObject * 1386marshal_loads(PyObject *self, PyObject *args) 1387{ 1388 RFILE rf; 1389 Py_buffer p; 1390 char *s; 1391 Py_ssize_t n; 1392 PyObject* result; 1393 if (!PyArg_ParseTuple(args, "s*:loads", &p)) 1394 return NULL; 1395 s = p.buf; 1396 n = p.len; 1397 rf.fp = NULL; 1398 rf.readable = NULL; 1399 rf.current_filename = NULL; 1400 rf.ptr = s; 1401 rf.end = s + n; 1402 rf.depth = 0; 1403 result = read_object(&rf); 1404 PyBuffer_Release(&p); 1405 return result; 1406} 1407 1408PyDoc_STRVAR(loads_doc, 1409"loads(string)\n\ 1410\n\ 1411Convert the string to a value. If no valid value is found, raise\n\ 1412EOFError, ValueError or TypeError. Extra characters in the string are\n\ 1413ignored."); 1414 1415static PyMethodDef marshal_methods[] = { 1416 {"dump", marshal_dump, METH_VARARGS, dump_doc}, 1417 {"load", marshal_load, METH_O, load_doc}, 1418 {"dumps", marshal_dumps, METH_VARARGS, dumps_doc}, 1419 {"loads", marshal_loads, METH_VARARGS, loads_doc}, 1420 {NULL, NULL} /* sentinel */ 1421}; 1422 1423 1424PyDoc_STRVAR(module_doc, 1425"This module contains functions that can read and write Python values in\n\ 1426a binary format. The format is specific to Python, but independent of\n\ 1427machine architecture issues.\n\ 1428\n\ 1429Not all Python object types are supported; in general, only objects\n\ 1430whose value is independent from a particular invocation of Python can be\n\ 1431written and read by this module. The following types are supported:\n\ 1432None, integers, floating point numbers, strings, bytes, bytearrays,\n\ 1433tuples, lists, sets, dictionaries, and code objects, where it\n\ 1434should be understood that tuples, lists and dictionaries are only\n\ 1435supported as long as the values contained therein are themselves\n\ 1436supported; and recursive lists and dictionaries should not be written\n\ 1437(they will cause infinite loops).\n\ 1438\n\ 1439Variables:\n\ 1440\n\ 1441version -- indicates the format that the module uses. Version 0 is the\n\ 1442 historical format, version 1 shares interned strings and version 2\n\ 1443 uses a binary format for floating point numbers.\n\ 1444\n\ 1445Functions:\n\ 1446\n\ 1447dump() -- write value to a file\n\ 1448load() -- read value from a file\n\ 1449dumps() -- write value to a string\n\ 1450loads() -- read value from a string"); 1451 1452 1453 1454static struct PyModuleDef marshalmodule = { 1455 PyModuleDef_HEAD_INIT, 1456 "marshal", 1457 module_doc, 1458 0, 1459 marshal_methods, 1460 NULL, 1461 NULL, 1462 NULL, 1463 NULL 1464}; 1465 1466PyMODINIT_FUNC 1467PyMarshal_Init(void) 1468{ 1469 PyObject *mod = PyModule_Create(&marshalmodule); 1470 if (mod == NULL) 1471 return NULL; 1472 PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); 1473 return mod; 1474} 1475