zipimport.c revision 3f528f0c1b79f32b1c00348f53f4b5b2007f16c5
1#include "Python.h" 2#include "structmember.h" 3#include "osdefs.h" 4#include "marshal.h" 5#include <time.h> 6 7 8#define IS_SOURCE 0x0 9#define IS_BYTECODE 0x1 10#define IS_PACKAGE 0x2 11 12struct st_zip_searchorder { 13 char suffix[14]; 14 int type; 15}; 16 17/* zip_searchorder defines how we search for a module in the Zip 18 archive: we first search for a package __init__, then for 19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries 20 are swapped by initzipimport() if we run in optimized mode. Also, 21 '/' is replaced by SEP there. */ 22static struct st_zip_searchorder zip_searchorder[] = { 23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE}, 24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE}, 25 {"/__init__.py", IS_PACKAGE | IS_SOURCE}, 26 {".pyc", IS_BYTECODE}, 27 {".pyo", IS_BYTECODE}, 28 {".py", IS_SOURCE}, 29 {"", 0} 30}; 31 32/* zipimporter object definition and support */ 33 34typedef struct _zipimporter ZipImporter; 35 36struct _zipimporter { 37 PyObject_HEAD 38 PyObject *archive; /* pathname of the Zip archive, 39 decoded from the filesystem encoding */ 40 PyObject *prefix; /* file prefix: "a/sub/directory/", 41 encoded to the filesystem encoding */ 42 PyObject *files; /* dict with file info {path: toc_entry} */ 43}; 44 45static PyObject *ZipImportError; 46/* read_directory() cache */ 47static PyObject *zip_directory_cache = NULL; 48 49/* forward decls */ 50static PyObject *read_directory(PyObject *archive); 51static PyObject *get_data(PyObject *archive, PyObject *toc_entry); 52static PyObject *get_module_code(ZipImporter *self, PyObject *fullname, 53 int *p_ispackage, PyObject **p_modpath); 54 55 56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) 57 58 59/* zipimporter.__init__ 60 Split the "subdirectory" from the Zip archive path, lookup a matching 61 entry in sys.path_importer_cache, fetch the file directory from there 62 if found, or else read it from the archive. */ 63static int 64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) 65{ 66 PyObject *pathobj, *files; 67 Py_UCS4 *path, *p, *prefix, buf[MAXPATHLEN+2]; 68 Py_ssize_t len; 69 70 if (!_PyArg_NoKeywords("zipimporter()", kwds)) 71 return -1; 72 73 if (!PyArg_ParseTuple(args, "O&:zipimporter", 74 PyUnicode_FSDecoder, &pathobj)) 75 return -1; 76 77 if (PyUnicode_READY(pathobj) == -1) 78 return -1; 79 80 /* copy path to buf */ 81 len = PyUnicode_GET_LENGTH(pathobj); 82 if (len == 0) { 83 PyErr_SetString(ZipImportError, "archive path is empty"); 84 goto error; 85 } 86 if (len >= MAXPATHLEN) { 87 PyErr_SetString(ZipImportError, 88 "archive path too long"); 89 goto error; 90 } 91 if (!PyUnicode_AsUCS4(pathobj, buf, Py_ARRAY_LENGTH(buf), 1)) 92 goto error; 93 94#ifdef ALTSEP 95 for (p = buf; *p; p++) { 96 if (*p == ALTSEP) 97 *p = SEP; 98 } 99#endif 100 101 path = NULL; 102 prefix = NULL; 103 for (;;) { 104 struct stat statbuf; 105 int rv; 106 107 if (pathobj == NULL) { 108 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 109 buf, len); 110 if (pathobj == NULL) 111 goto error; 112 } 113 rv = _Py_stat(pathobj, &statbuf); 114 if (rv == 0) { 115 /* it exists */ 116 if (S_ISREG(statbuf.st_mode)) 117 /* it's a file */ 118 path = buf; 119 break; 120 } 121 else if (PyErr_Occurred()) 122 goto error; 123 /* back up one path element */ 124 p = Py_UCS4_strrchr(buf, SEP); 125 if (prefix != NULL) 126 *prefix = SEP; 127 if (p == NULL) 128 break; 129 *p = '\0'; 130 len = p - buf; 131 prefix = p; 132 Py_CLEAR(pathobj); 133 } 134 if (path == NULL) { 135 PyErr_SetString(ZipImportError, "not a Zip file"); 136 goto error; 137 } 138 139 files = PyDict_GetItem(zip_directory_cache, pathobj); 140 if (files == NULL) { 141 files = read_directory(pathobj); 142 if (files == NULL) 143 goto error; 144 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0) 145 goto error; 146 } 147 else 148 Py_INCREF(files); 149 self->files = files; 150 151 self->archive = pathobj; 152 pathobj = NULL; 153 154 if (prefix != NULL) { 155 prefix++; 156 len = Py_UCS4_strlen(prefix); 157 if (prefix[len-1] != SEP) { 158 /* add trailing SEP */ 159 prefix[len] = SEP; 160 prefix[len + 1] = '\0'; 161 len++; 162 } 163 } 164 else 165 len = 0; 166 self->prefix = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 167 prefix, len); 168 if (self->prefix == NULL) 169 goto error; 170 171 return 0; 172 173error: 174 Py_XDECREF(pathobj); 175 return -1; 176} 177 178/* GC support. */ 179static int 180zipimporter_traverse(PyObject *obj, visitproc visit, void *arg) 181{ 182 ZipImporter *self = (ZipImporter *)obj; 183 Py_VISIT(self->files); 184 return 0; 185} 186 187static void 188zipimporter_dealloc(ZipImporter *self) 189{ 190 PyObject_GC_UnTrack(self); 191 Py_XDECREF(self->archive); 192 Py_XDECREF(self->prefix); 193 Py_XDECREF(self->files); 194 Py_TYPE(self)->tp_free((PyObject *)self); 195} 196 197static PyObject * 198zipimporter_repr(ZipImporter *self) 199{ 200 if (self->archive == NULL) 201 return PyUnicode_FromString("<zipimporter object \"???\">"); 202 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0) 203 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">", 204 self->archive, SEP, self->prefix); 205 else 206 return PyUnicode_FromFormat("<zipimporter object \"%U\">", 207 self->archive); 208} 209 210/* return fullname.split(".")[-1] */ 211static PyObject * 212get_subname(PyObject *fullname) 213{ 214 Py_ssize_t len; 215 Py_UCS4 *subname, *fullname_ucs4; 216 fullname_ucs4 = PyUnicode_AsUCS4Copy(fullname); 217 if (!fullname_ucs4) 218 return NULL; 219 subname = Py_UCS4_strrchr(fullname_ucs4, '.'); 220 if (subname == NULL) { 221 PyMem_Free(fullname_ucs4); 222 Py_INCREF(fullname); 223 return fullname; 224 } else { 225 PyObject *result; 226 subname++; 227 len = PyUnicode_GET_LENGTH(fullname); 228 len -= subname - fullname_ucs4; 229 result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 230 subname, len); 231 PyMem_Free(fullname_ucs4); 232 return result; 233 } 234} 235 236/* Given a (sub)modulename, write the potential file path in the 237 archive (without extension) to the path buffer. Return the 238 length of the resulting string. 239 240 return self.prefix + name.replace('.', os.sep) */ 241static PyObject* 242make_filename(PyObject *prefix, PyObject *name) 243{ 244 PyObject *pathobj; 245 Py_UCS4 *p, *buf; 246 Py_ssize_t len; 247 248 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1; 249 p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len); 250 if (buf == NULL) { 251 PyErr_NoMemory(); 252 return NULL; 253 } 254 255 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) 256 return NULL; 257 p += PyUnicode_GET_LENGTH(prefix); 258 len -= PyUnicode_GET_LENGTH(prefix); 259 if (!PyUnicode_AsUCS4(name, p, len, 1)) 260 return NULL; 261 for (; *p; p++) { 262 if (*p == '.') 263 *p = SEP; 264 } 265 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 266 buf, p-buf); 267 PyMem_Free(buf); 268 return pathobj; 269} 270 271enum zi_module_info { 272 MI_ERROR, 273 MI_NOT_FOUND, 274 MI_MODULE, 275 MI_PACKAGE 276}; 277 278/* Return some information about a module. */ 279static enum zi_module_info 280get_module_info(ZipImporter *self, PyObject *fullname) 281{ 282 PyObject *subname; 283 PyObject *path, *fullpath, *item; 284 struct st_zip_searchorder *zso; 285 286 subname = get_subname(fullname); 287 if (subname == NULL) 288 return MI_ERROR; 289 290 path = make_filename(self->prefix, subname); 291 Py_DECREF(subname); 292 if (path == NULL) 293 return MI_ERROR; 294 295 for (zso = zip_searchorder; *zso->suffix; zso++) { 296 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix); 297 if (fullpath == NULL) { 298 Py_DECREF(path); 299 return MI_ERROR; 300 } 301 item = PyDict_GetItem(self->files, fullpath); 302 Py_DECREF(fullpath); 303 if (item != NULL) { 304 Py_DECREF(path); 305 if (zso->type & IS_PACKAGE) 306 return MI_PACKAGE; 307 else 308 return MI_MODULE; 309 } 310 } 311 Py_DECREF(path); 312 return MI_NOT_FOUND; 313} 314 315/* Check whether we can satisfy the import of the module named by 316 'fullname'. Return self if we can, None if we can't. */ 317static PyObject * 318zipimporter_find_module(PyObject *obj, PyObject *args) 319{ 320 ZipImporter *self = (ZipImporter *)obj; 321 PyObject *path = NULL; 322 PyObject *fullname; 323 enum zi_module_info mi; 324 325 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", 326 &fullname, &path)) 327 return NULL; 328 329 mi = get_module_info(self, fullname); 330 if (mi == MI_ERROR) 331 return NULL; 332 if (mi == MI_NOT_FOUND) { 333 Py_INCREF(Py_None); 334 return Py_None; 335 } 336 Py_INCREF(self); 337 return (PyObject *)self; 338} 339 340/* Load and return the module named by 'fullname'. */ 341static PyObject * 342zipimporter_load_module(PyObject *obj, PyObject *args) 343{ 344 ZipImporter *self = (ZipImporter *)obj; 345 PyObject *code = NULL, *mod, *dict; 346 PyObject *fullname; 347 PyObject *modpath = NULL; 348 int ispackage; 349 350 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module", 351 &fullname)) 352 return NULL; 353 if (PyUnicode_READY(fullname) == -1) 354 return NULL; 355 356 code = get_module_code(self, fullname, &ispackage, &modpath); 357 if (code == NULL) 358 goto error; 359 360 mod = PyImport_AddModuleObject(fullname); 361 if (mod == NULL) 362 goto error; 363 dict = PyModule_GetDict(mod); 364 365 /* mod.__loader__ = self */ 366 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0) 367 goto error; 368 369 if (ispackage) { 370 /* add __path__ to the module *before* the code gets 371 executed */ 372 PyObject *pkgpath, *fullpath; 373 PyObject *subname = get_subname(fullname); 374 int err; 375 376 fullpath = PyUnicode_FromFormat("%U%c%U%U", 377 self->archive, SEP, 378 self->prefix, subname); 379 Py_DECREF(subname); 380 if (fullpath == NULL) 381 goto error; 382 383 pkgpath = Py_BuildValue("[N]", fullpath); 384 if (pkgpath == NULL) 385 goto error; 386 err = PyDict_SetItemString(dict, "__path__", pkgpath); 387 Py_DECREF(pkgpath); 388 if (err != 0) 389 goto error; 390 } 391 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL); 392 Py_CLEAR(code); 393 if (mod == NULL) 394 goto error; 395 396 if (Py_VerboseFlag) 397 PySys_FormatStderr("import %U # loaded from Zip %U\n", 398 fullname, modpath); 399 Py_DECREF(modpath); 400 return mod; 401error: 402 Py_XDECREF(code); 403 Py_XDECREF(modpath); 404 return NULL; 405} 406 407/* Return a string matching __file__ for the named module */ 408static PyObject * 409zipimporter_get_filename(PyObject *obj, PyObject *args) 410{ 411 ZipImporter *self = (ZipImporter *)obj; 412 PyObject *fullname, *code, *modpath; 413 int ispackage; 414 415 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename", 416 &fullname)) 417 return NULL; 418 419 /* Deciding the filename requires working out where the code 420 would come from if the module was actually loaded */ 421 code = get_module_code(self, fullname, &ispackage, &modpath); 422 if (code == NULL) 423 return NULL; 424 Py_DECREF(code); /* Only need the path info */ 425 426 return modpath; 427} 428 429/* Return a bool signifying whether the module is a package or not. */ 430static PyObject * 431zipimporter_is_package(PyObject *obj, PyObject *args) 432{ 433 ZipImporter *self = (ZipImporter *)obj; 434 PyObject *fullname; 435 enum zi_module_info mi; 436 437 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package", 438 &fullname)) 439 return NULL; 440 441 mi = get_module_info(self, fullname); 442 if (mi == MI_ERROR) 443 return NULL; 444 if (mi == MI_NOT_FOUND) { 445 PyErr_Format(ZipImportError, "can't find module %R", fullname); 446 return NULL; 447 } 448 return PyBool_FromLong(mi == MI_PACKAGE); 449} 450 451 452static PyObject * 453zipimporter_get_data(PyObject *obj, PyObject *args) 454{ 455 ZipImporter *self = (ZipImporter *)obj; 456 PyObject *pathobj, *key; 457 const Py_UCS4 *path; 458#ifdef ALTSEP 459 Py_UCS4 *p; 460#endif 461 PyObject *toc_entry; 462 Py_ssize_t path_len, len; 463 Py_UCS4 buf[MAXPATHLEN + 1], archive[MAXPATHLEN + 1]; 464 465 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj)) 466 return NULL; 467 468 if (PyUnicode_READY(pathobj) == -1) 469 return NULL; 470 471 path_len = PyUnicode_GET_LENGTH(pathobj); 472 if (path_len >= MAXPATHLEN) { 473 PyErr_SetString(ZipImportError, "path too long"); 474 return NULL; 475 } 476 if (!PyUnicode_AsUCS4(pathobj, buf, Py_ARRAY_LENGTH(buf), 1)) 477 return NULL; 478 path = buf; 479#ifdef ALTSEP 480 for (p = buf; *p; p++) { 481 if (*p == ALTSEP) 482 *p = SEP; 483 } 484#endif 485 len = PyUnicode_GET_LENGTH(self->archive); 486 if ((size_t)len < Py_UCS4_strlen(path)) { 487 if (!PyUnicode_AsUCS4(self->archive, archive, Py_ARRAY_LENGTH(archive), 1)) 488 return NULL; 489 if (Py_UCS4_strncmp(path, archive, len) == 0 && 490 path[len] == SEP) { 491 path += len + 1; 492 path_len -= len + 1; 493 } 494 } 495 496 key = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 497 path, path_len); 498 if (key == NULL) 499 return NULL; 500 toc_entry = PyDict_GetItem(self->files, key); 501 if (toc_entry == NULL) { 502 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); 503 Py_DECREF(key); 504 return NULL; 505 } 506 Py_DECREF(key); 507 return get_data(self->archive, toc_entry); 508} 509 510static PyObject * 511zipimporter_get_code(PyObject *obj, PyObject *args) 512{ 513 ZipImporter *self = (ZipImporter *)obj; 514 PyObject *fullname; 515 516 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname)) 517 return NULL; 518 519 return get_module_code(self, fullname, NULL, NULL); 520} 521 522static PyObject * 523zipimporter_get_source(PyObject *obj, PyObject *args) 524{ 525 ZipImporter *self = (ZipImporter *)obj; 526 PyObject *toc_entry; 527 PyObject *fullname, *subname, *path, *fullpath; 528 enum zi_module_info mi; 529 530 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname)) 531 return NULL; 532 533 mi = get_module_info(self, fullname); 534 if (mi == MI_ERROR) 535 return NULL; 536 if (mi == MI_NOT_FOUND) { 537 PyErr_Format(ZipImportError, "can't find module %R", fullname); 538 return NULL; 539 } 540 541 subname = get_subname(fullname); 542 if (subname == NULL) 543 return NULL; 544 545 path = make_filename(self->prefix, subname); 546 Py_DECREF(subname); 547 if (path == NULL) 548 return NULL; 549 550 if (mi == MI_PACKAGE) 551 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP); 552 else 553 fullpath = PyUnicode_FromFormat("%U.py", path); 554 Py_DECREF(path); 555 if (fullpath == NULL) 556 return NULL; 557 558 toc_entry = PyDict_GetItem(self->files, fullpath); 559 Py_DECREF(fullpath); 560 if (toc_entry != NULL) { 561 PyObject *res, *bytes; 562 bytes = get_data(self->archive, toc_entry); 563 if (bytes == NULL) 564 return NULL; 565 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), 566 PyBytes_GET_SIZE(bytes)); 567 Py_DECREF(bytes); 568 return res; 569 } 570 571 /* we have the module, but no source */ 572 Py_INCREF(Py_None); 573 return Py_None; 574} 575 576PyDoc_STRVAR(doc_find_module, 577"find_module(fullname, path=None) -> self or None.\n\ 578\n\ 579Search for a module specified by 'fullname'. 'fullname' must be the\n\ 580fully qualified (dotted) module name. It returns the zipimporter\n\ 581instance itself if the module was found, or None if it wasn't.\n\ 582The optional 'path' argument is ignored -- it's there for compatibility\n\ 583with the importer protocol."); 584 585PyDoc_STRVAR(doc_load_module, 586"load_module(fullname) -> module.\n\ 587\n\ 588Load the module specified by 'fullname'. 'fullname' must be the\n\ 589fully qualified (dotted) module name. It returns the imported\n\ 590module, or raises ZipImportError if it wasn't found."); 591 592PyDoc_STRVAR(doc_get_data, 593"get_data(pathname) -> string with file data.\n\ 594\n\ 595Return the data associated with 'pathname'. Raise IOError if\n\ 596the file wasn't found."); 597 598PyDoc_STRVAR(doc_is_package, 599"is_package(fullname) -> bool.\n\ 600\n\ 601Return True if the module specified by fullname is a package.\n\ 602Raise ZipImportError if the module couldn't be found."); 603 604PyDoc_STRVAR(doc_get_code, 605"get_code(fullname) -> code object.\n\ 606\n\ 607Return the code object for the specified module. Raise ZipImportError\n\ 608if the module couldn't be found."); 609 610PyDoc_STRVAR(doc_get_source, 611"get_source(fullname) -> source string.\n\ 612\n\ 613Return the source code for the specified module. Raise ZipImportError\n\ 614if the module couldn't be found, return None if the archive does\n\ 615contain the module, but has no source for it."); 616 617 618PyDoc_STRVAR(doc_get_filename, 619"get_filename(fullname) -> filename string.\n\ 620\n\ 621Return the filename for the specified module."); 622 623static PyMethodDef zipimporter_methods[] = { 624 {"find_module", zipimporter_find_module, METH_VARARGS, 625 doc_find_module}, 626 {"load_module", zipimporter_load_module, METH_VARARGS, 627 doc_load_module}, 628 {"get_data", zipimporter_get_data, METH_VARARGS, 629 doc_get_data}, 630 {"get_code", zipimporter_get_code, METH_VARARGS, 631 doc_get_code}, 632 {"get_source", zipimporter_get_source, METH_VARARGS, 633 doc_get_source}, 634 {"get_filename", zipimporter_get_filename, METH_VARARGS, 635 doc_get_filename}, 636 {"is_package", zipimporter_is_package, METH_VARARGS, 637 doc_is_package}, 638 {NULL, NULL} /* sentinel */ 639}; 640 641static PyMemberDef zipimporter_members[] = { 642 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY}, 643 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY}, 644 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY}, 645 {NULL} 646}; 647 648PyDoc_STRVAR(zipimporter_doc, 649"zipimporter(archivepath) -> zipimporter object\n\ 650\n\ 651Create a new zipimporter instance. 'archivepath' must be a path to\n\ 652a zipfile, or to a specific path inside a zipfile. For example, it can be\n\ 653'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\ 654valid directory inside the archive.\n\ 655\n\ 656'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\ 657archive.\n\ 658\n\ 659The 'archive' attribute of zipimporter objects contains the name of the\n\ 660zipfile targeted."); 661 662#define DEFERRED_ADDRESS(ADDR) 0 663 664static PyTypeObject ZipImporter_Type = { 665 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) 666 "zipimport.zipimporter", 667 sizeof(ZipImporter), 668 0, /* tp_itemsize */ 669 (destructor)zipimporter_dealloc, /* tp_dealloc */ 670 0, /* tp_print */ 671 0, /* tp_getattr */ 672 0, /* tp_setattr */ 673 0, /* tp_reserved */ 674 (reprfunc)zipimporter_repr, /* tp_repr */ 675 0, /* tp_as_number */ 676 0, /* tp_as_sequence */ 677 0, /* tp_as_mapping */ 678 0, /* tp_hash */ 679 0, /* tp_call */ 680 0, /* tp_str */ 681 PyObject_GenericGetAttr, /* tp_getattro */ 682 0, /* tp_setattro */ 683 0, /* tp_as_buffer */ 684 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 685 Py_TPFLAGS_HAVE_GC, /* tp_flags */ 686 zipimporter_doc, /* tp_doc */ 687 zipimporter_traverse, /* tp_traverse */ 688 0, /* tp_clear */ 689 0, /* tp_richcompare */ 690 0, /* tp_weaklistoffset */ 691 0, /* tp_iter */ 692 0, /* tp_iternext */ 693 zipimporter_methods, /* tp_methods */ 694 zipimporter_members, /* tp_members */ 695 0, /* tp_getset */ 696 0, /* tp_base */ 697 0, /* tp_dict */ 698 0, /* tp_descr_get */ 699 0, /* tp_descr_set */ 700 0, /* tp_dictoffset */ 701 (initproc)zipimporter_init, /* tp_init */ 702 PyType_GenericAlloc, /* tp_alloc */ 703 PyType_GenericNew, /* tp_new */ 704 PyObject_GC_Del, /* tp_free */ 705}; 706 707 708/* implementation */ 709 710/* Given a buffer, return the long that is represented by the first 711 4 bytes, encoded as little endian. This partially reimplements 712 marshal.c:r_long() */ 713static long 714get_long(unsigned char *buf) { 715 long x; 716 x = buf[0]; 717 x |= (long)buf[1] << 8; 718 x |= (long)buf[2] << 16; 719 x |= (long)buf[3] << 24; 720#if SIZEOF_LONG > 4 721 /* Sign extension for 64-bit machines */ 722 x |= -(x & 0x80000000L); 723#endif 724 return x; 725} 726 727/* 728 read_directory(archive) -> files dict (new reference) 729 730 Given a path to a Zip archive, build a dict, mapping file names 731 (local to the archive, using SEP as a separator) to toc entries. 732 733 A toc_entry is a tuple: 734 735 (__file__, # value to use for __file__, available for all files, 736 # encoded to the filesystem encoding 737 compress, # compression kind; 0 for uncompressed 738 data_size, # size of compressed data on disk 739 file_size, # size of decompressed data 740 file_offset, # offset of file header from start of archive 741 time, # mod time of file (in dos format) 742 date, # mod data of file (in dos format) 743 crc, # crc checksum of the data 744 ) 745 746 Directories can be recognized by the trailing SEP in the name, 747 data_size and file_offset are 0. 748*/ 749static PyObject * 750read_directory(PyObject *archive) 751{ 752 PyObject *files = NULL; 753 FILE *fp; 754 unsigned short flags; 755 long compress, crc, data_size, file_size, file_offset, date, time; 756 long header_offset, name_size, header_size, header_position; 757 long l, count; 758 Py_ssize_t i; 759 size_t length; 760 Py_UCS4 path[MAXPATHLEN + 5]; 761 char name[MAXPATHLEN + 5]; 762 PyObject *nameobj = NULL; 763 char *p, endof_central_dir[22]; 764 long arc_offset; /* offset from beginning of file to start of zip-archive */ 765 PyObject *pathobj; 766 const char *charset; 767 int bootstrap; 768 769 if (PyUnicode_GET_LENGTH(archive) > MAXPATHLEN) { 770 PyErr_SetString(PyExc_OverflowError, 771 "Zip path name is too long"); 772 return NULL; 773 } 774 if (!PyUnicode_AsUCS4(archive, path, Py_ARRAY_LENGTH(path), 1)) 775 return NULL; 776 777 fp = _Py_fopen(archive, "rb"); 778 if (fp == NULL) { 779 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive); 780 return NULL; 781 } 782 fseek(fp, -22, SEEK_END); 783 header_position = ftell(fp); 784 if (fread(endof_central_dir, 1, 22, fp) != 22) { 785 fclose(fp); 786 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 787 return NULL; 788 } 789 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { 790 /* Bad: End of Central Dir signature */ 791 fclose(fp); 792 PyErr_Format(ZipImportError, "not a Zip file: %R", archive); 793 return NULL; 794 } 795 796 header_size = get_long((unsigned char *)endof_central_dir + 12); 797 header_offset = get_long((unsigned char *)endof_central_dir + 16); 798 arc_offset = header_position - header_offset - header_size; 799 header_offset += arc_offset; 800 801 files = PyDict_New(); 802 if (files == NULL) 803 goto error; 804 805 length = Py_UCS4_strlen(path); 806 path[length] = SEP; 807 808 /* Start of Central Directory */ 809 count = 0; 810 for (;;) { 811 PyObject *t; 812 int err; 813 814 fseek(fp, header_offset, 0); /* Start of file header */ 815 l = PyMarshal_ReadLongFromFile(fp); 816 if (l != 0x02014B50) 817 break; /* Bad: Central Dir File Header */ 818 fseek(fp, header_offset + 8, 0); 819 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp); 820 compress = PyMarshal_ReadShortFromFile(fp); 821 time = PyMarshal_ReadShortFromFile(fp); 822 date = PyMarshal_ReadShortFromFile(fp); 823 crc = PyMarshal_ReadLongFromFile(fp); 824 data_size = PyMarshal_ReadLongFromFile(fp); 825 file_size = PyMarshal_ReadLongFromFile(fp); 826 name_size = PyMarshal_ReadShortFromFile(fp); 827 header_size = 46 + name_size + 828 PyMarshal_ReadShortFromFile(fp) + 829 PyMarshal_ReadShortFromFile(fp); 830 fseek(fp, header_offset + 42, 0); 831 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset; 832 if (name_size > MAXPATHLEN) 833 name_size = MAXPATHLEN; 834 835 p = name; 836 for (i = 0; i < (Py_ssize_t)name_size; i++) { 837 *p = (char)getc(fp); 838 if (*p == '/') 839 *p = SEP; 840 p++; 841 } 842 *p = 0; /* Add terminating null byte */ 843 header_offset += header_size; 844 845 bootstrap = 0; 846 if (flags & 0x0800) 847 charset = "utf-8"; 848 else if (!PyThreadState_GET()->interp->codecs_initialized) { 849 /* During bootstrap, we may need to load the encodings 850 package from a ZIP file. But the cp437 encoding is implemented 851 in Python in the encodings package. 852 853 Break out of this dependency by assuming that the path to 854 the encodings module is ASCII-only. */ 855 charset = "ascii"; 856 bootstrap = 1; 857 } 858 else 859 charset = "cp437"; 860 nameobj = PyUnicode_Decode(name, name_size, charset, NULL); 861 if (PyUnicode_READY(nameobj) == -1) 862 goto error; 863 if (nameobj == NULL) { 864 if (bootstrap) 865 PyErr_Format(PyExc_NotImplementedError, 866 "bootstrap issue: python%i%i.zip contains non-ASCII " 867 "filenames without the unicode flag", 868 PY_MAJOR_VERSION, PY_MINOR_VERSION); 869 goto error; 870 } 871 for (i = 0; (i < (MAXPATHLEN - (Py_ssize_t)length - 1)) && 872 (i < PyUnicode_GET_LENGTH(nameobj)); i++) 873 path[length + 1 + i] = PyUnicode_READ_CHAR(nameobj, i); 874 path[length + 1 + i] = 0; 875 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 876 path, Py_UCS4_strlen(path)); 877 if (pathobj == NULL) 878 goto error; 879 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size, 880 file_size, file_offset, time, date, crc); 881 if (t == NULL) 882 goto error; 883 err = PyDict_SetItem(files, nameobj, t); 884 Py_CLEAR(nameobj); 885 Py_DECREF(t); 886 if (err != 0) 887 goto error; 888 count++; 889 } 890 fclose(fp); 891 if (Py_VerboseFlag) 892 PySys_FormatStderr("# zipimport: found %ld names in %R\n", 893 count, archive); 894 return files; 895error: 896 fclose(fp); 897 Py_XDECREF(files); 898 Py_XDECREF(nameobj); 899 return NULL; 900} 901 902/* Return the zlib.decompress function object, or NULL if zlib couldn't 903 be imported. The function is cached when found, so subsequent calls 904 don't import zlib again. */ 905static PyObject * 906get_decompress_func(void) 907{ 908 static int importing_zlib = 0; 909 PyObject *zlib; 910 PyObject *decompress; 911 _Py_identifier(decompress); 912 913 if (importing_zlib != 0) 914 /* Someone has a zlib.py[co] in their Zip file; 915 let's avoid a stack overflow. */ 916 return NULL; 917 importing_zlib = 1; 918 zlib = PyImport_ImportModuleNoBlock("zlib"); 919 importing_zlib = 0; 920 if (zlib != NULL) { 921 decompress = _PyObject_GetAttrId(zlib, 922 &PyId_decompress); 923 Py_DECREF(zlib); 924 } 925 else { 926 PyErr_Clear(); 927 decompress = NULL; 928 } 929 if (Py_VerboseFlag) 930 PySys_WriteStderr("# zipimport: zlib %s\n", 931 zlib != NULL ? "available": "UNAVAILABLE"); 932 return decompress; 933} 934 935/* Given a path to a Zip file and a toc_entry, return the (uncompressed) 936 data as a new reference. */ 937static PyObject * 938get_data(PyObject *archive, PyObject *toc_entry) 939{ 940 PyObject *raw_data, *data = NULL, *decompress; 941 char *buf; 942 FILE *fp; 943 int err; 944 Py_ssize_t bytes_read = 0; 945 long l; 946 PyObject *datapath; 947 long compress, data_size, file_size, file_offset, bytes_size; 948 long time, date, crc; 949 950 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress, 951 &data_size, &file_size, &file_offset, &time, 952 &date, &crc)) { 953 return NULL; 954 } 955 956 fp = _Py_fopen(archive, "rb"); 957 if (!fp) { 958 PyErr_Format(PyExc_IOError, 959 "zipimport: can not open file %U", archive); 960 return NULL; 961 } 962 963 /* Check to make sure the local file header is correct */ 964 fseek(fp, file_offset, 0); 965 l = PyMarshal_ReadLongFromFile(fp); 966 if (l != 0x04034B50) { 967 /* Bad: Local File Header */ 968 PyErr_Format(ZipImportError, 969 "bad local file header in %U", 970 archive); 971 fclose(fp); 972 return NULL; 973 } 974 fseek(fp, file_offset + 26, 0); 975 l = 30 + PyMarshal_ReadShortFromFile(fp) + 976 PyMarshal_ReadShortFromFile(fp); /* local header size */ 977 file_offset += l; /* Start of file data */ 978 979 bytes_size = compress == 0 ? data_size : data_size + 1; 980 if (bytes_size == 0) 981 bytes_size++; 982 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); 983 984 if (raw_data == NULL) { 985 fclose(fp); 986 return NULL; 987 } 988 buf = PyBytes_AsString(raw_data); 989 990 err = fseek(fp, file_offset, 0); 991 if (err == 0) 992 bytes_read = fread(buf, 1, data_size, fp); 993 fclose(fp); 994 if (err || bytes_read != data_size) { 995 PyErr_SetString(PyExc_IOError, 996 "zipimport: can't read data"); 997 Py_DECREF(raw_data); 998 return NULL; 999 } 1000 1001 if (compress != 0) { 1002 buf[data_size] = 'Z'; /* saw this in zipfile.py */ 1003 data_size++; 1004 } 1005 buf[data_size] = '\0'; 1006 1007 if (compress == 0) { /* data is not compressed */ 1008 data = PyBytes_FromStringAndSize(buf, data_size); 1009 Py_DECREF(raw_data); 1010 return data; 1011 } 1012 1013 /* Decompress with zlib */ 1014 decompress = get_decompress_func(); 1015 if (decompress == NULL) { 1016 PyErr_SetString(ZipImportError, 1017 "can't decompress data; " 1018 "zlib not available"); 1019 goto error; 1020 } 1021 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15); 1022 Py_DECREF(decompress); 1023error: 1024 Py_DECREF(raw_data); 1025 return data; 1026} 1027 1028/* Lenient date/time comparison function. The precision of the mtime 1029 in the archive is lower than the mtime stored in a .pyc: we 1030 must allow a difference of at most one second. */ 1031static int 1032eq_mtime(time_t t1, time_t t2) 1033{ 1034 time_t d = t1 - t2; 1035 if (d < 0) 1036 d = -d; 1037 /* dostime only stores even seconds, so be lenient */ 1038 return d <= 1; 1039} 1040 1041/* Given the contents of a .py[co] file in a buffer, unmarshal the data 1042 and return the code object. Return None if it the magic word doesn't 1043 match (we do this instead of raising an exception as we fall back 1044 to .py if available and we don't want to mask other errors). 1045 Returns a new reference. */ 1046static PyObject * 1047unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime) 1048{ 1049 PyObject *code; 1050 char *buf = PyBytes_AsString(data); 1051 Py_ssize_t size = PyBytes_Size(data); 1052 1053 if (size <= 9) { 1054 PyErr_SetString(ZipImportError, 1055 "bad pyc data"); 1056 return NULL; 1057 } 1058 1059 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) { 1060 if (Py_VerboseFlag) 1061 PySys_FormatStderr("# %R has bad magic\n", 1062 pathname); 1063 Py_INCREF(Py_None); 1064 return Py_None; /* signal caller to try alternative */ 1065 } 1066 1067 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), 1068 mtime)) { 1069 if (Py_VerboseFlag) 1070 PySys_FormatStderr("# %R has bad mtime\n", 1071 pathname); 1072 Py_INCREF(Py_None); 1073 return Py_None; /* signal caller to try alternative */ 1074 } 1075 1076 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8); 1077 if (code == NULL) 1078 return NULL; 1079 if (!PyCode_Check(code)) { 1080 Py_DECREF(code); 1081 PyErr_Format(PyExc_TypeError, 1082 "compiled module %R is not a code object", 1083 pathname); 1084 return NULL; 1085 } 1086 return code; 1087} 1088 1089/* Replace any occurances of "\r\n?" in the input string with "\n". 1090 This converts DOS and Mac line endings to Unix line endings. 1091 Also append a trailing "\n" to be compatible with 1092 PyParser_SimpleParseFile(). Returns a new reference. */ 1093static PyObject * 1094normalize_line_endings(PyObject *source) 1095{ 1096 char *buf, *q, *p; 1097 PyObject *fixed_source; 1098 int len = 0; 1099 1100 p = PyBytes_AsString(source); 1101 if (p == NULL) { 1102 return PyBytes_FromStringAndSize("\n\0", 2); 1103 } 1104 1105 /* one char extra for trailing \n and one for terminating \0 */ 1106 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2); 1107 if (buf == NULL) { 1108 PyErr_SetString(PyExc_MemoryError, 1109 "zipimport: no memory to allocate " 1110 "source buffer"); 1111 return NULL; 1112 } 1113 /* replace "\r\n?" by "\n" */ 1114 for (q = buf; *p != '\0'; p++) { 1115 if (*p == '\r') { 1116 *q++ = '\n'; 1117 if (*(p + 1) == '\n') 1118 p++; 1119 } 1120 else 1121 *q++ = *p; 1122 len++; 1123 } 1124 *q++ = '\n'; /* add trailing \n */ 1125 *q = '\0'; 1126 fixed_source = PyBytes_FromStringAndSize(buf, len + 2); 1127 PyMem_Free(buf); 1128 return fixed_source; 1129} 1130 1131/* Given a string buffer containing Python source code, compile it 1132 return and return a code object as a new reference. */ 1133static PyObject * 1134compile_source(PyObject *pathname, PyObject *source) 1135{ 1136 PyObject *code, *fixed_source, *pathbytes; 1137 1138 pathbytes = PyUnicode_EncodeFSDefault(pathname); 1139 if (pathbytes == NULL) 1140 return NULL; 1141 1142 fixed_source = normalize_line_endings(source); 1143 if (fixed_source == NULL) { 1144 Py_DECREF(pathbytes); 1145 return NULL; 1146 } 1147 1148 code = Py_CompileString(PyBytes_AsString(fixed_source), 1149 PyBytes_AsString(pathbytes), 1150 Py_file_input); 1151 Py_DECREF(pathbytes); 1152 Py_DECREF(fixed_source); 1153 return code; 1154} 1155 1156/* Convert the date/time values found in the Zip archive to a value 1157 that's compatible with the time stamp stored in .pyc files. */ 1158static time_t 1159parse_dostime(int dostime, int dosdate) 1160{ 1161 struct tm stm; 1162 1163 memset((void *) &stm, '\0', sizeof(stm)); 1164 1165 stm.tm_sec = (dostime & 0x1f) * 2; 1166 stm.tm_min = (dostime >> 5) & 0x3f; 1167 stm.tm_hour = (dostime >> 11) & 0x1f; 1168 stm.tm_mday = dosdate & 0x1f; 1169 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1; 1170 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80; 1171 stm.tm_isdst = -1; /* wday/yday is ignored */ 1172 1173 return mktime(&stm); 1174} 1175 1176/* Given a path to a .pyc or .pyo file in the archive, return the 1177 modification time of the matching .py file, or 0 if no source 1178 is available. */ 1179static time_t 1180get_mtime_of_source(ZipImporter *self, PyObject *path) 1181{ 1182 PyObject *toc_entry, *stripped; 1183 time_t mtime; 1184 1185 /* strip 'c' or 'o' from *.py[co] */ 1186 if (PyUnicode_READY(path) == -1) 1187 return (time_t)-1; 1188 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path), 1189 PyUnicode_DATA(path), 1190 PyUnicode_GET_LENGTH(path) - 1); 1191 if (stripped == NULL) 1192 return (time_t)-1; 1193 1194 toc_entry = PyDict_GetItem(self->files, stripped); 1195 Py_DECREF(stripped); 1196 if (toc_entry != NULL && PyTuple_Check(toc_entry) && 1197 PyTuple_Size(toc_entry) == 8) { 1198 /* fetch the time stamp of the .py file for comparison 1199 with an embedded pyc time stamp */ 1200 int time, date; 1201 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); 1202 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); 1203 mtime = parse_dostime(time, date); 1204 } else 1205 mtime = 0; 1206 return mtime; 1207} 1208 1209/* Return the code object for the module named by 'fullname' from the 1210 Zip archive as a new reference. */ 1211static PyObject * 1212get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 1213 time_t mtime, PyObject *toc_entry) 1214{ 1215 PyObject *data, *modpath, *code; 1216 1217 data = get_data(self->archive, toc_entry); 1218 if (data == NULL) 1219 return NULL; 1220 1221 modpath = PyTuple_GetItem(toc_entry, 0); 1222 if (isbytecode) 1223 code = unmarshal_code(modpath, data, mtime); 1224 else 1225 code = compile_source(modpath, data); 1226 Py_DECREF(data); 1227 return code; 1228} 1229 1230/* Get the code object associated with the module specified by 1231 'fullname'. */ 1232static PyObject * 1233get_module_code(ZipImporter *self, PyObject *fullname, 1234 int *p_ispackage, PyObject **p_modpath) 1235{ 1236 PyObject *code = NULL, *toc_entry, *subname; 1237 PyObject *path, *fullpath = NULL; 1238 struct st_zip_searchorder *zso; 1239 1240 subname = get_subname(fullname); 1241 if (subname == NULL) 1242 return NULL; 1243 1244 path = make_filename(self->prefix, subname); 1245 Py_DECREF(subname); 1246 if (path == NULL) 1247 return NULL; 1248 1249 for (zso = zip_searchorder; *zso->suffix; zso++) { 1250 code = NULL; 1251 1252 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix); 1253 if (fullpath == NULL) 1254 goto exit; 1255 1256 if (Py_VerboseFlag > 1) 1257 PySys_FormatStderr("# trying %U%c%U\n", 1258 self->archive, (int)SEP, fullpath); 1259 toc_entry = PyDict_GetItem(self->files, fullpath); 1260 if (toc_entry != NULL) { 1261 time_t mtime = 0; 1262 int ispackage = zso->type & IS_PACKAGE; 1263 int isbytecode = zso->type & IS_BYTECODE; 1264 1265 if (isbytecode) { 1266 mtime = get_mtime_of_source(self, fullpath); 1267 if (mtime == (time_t)-1 && PyErr_Occurred()) { 1268 goto exit; 1269 } 1270 } 1271 Py_CLEAR(fullpath); 1272 if (p_ispackage != NULL) 1273 *p_ispackage = ispackage; 1274 code = get_code_from_data(self, ispackage, 1275 isbytecode, mtime, 1276 toc_entry); 1277 if (code == Py_None) { 1278 /* bad magic number or non-matching mtime 1279 in byte code, try next */ 1280 Py_DECREF(code); 1281 continue; 1282 } 1283 if (code != NULL && p_modpath != NULL) { 1284 *p_modpath = PyTuple_GetItem(toc_entry, 0); 1285 Py_INCREF(*p_modpath); 1286 } 1287 goto exit; 1288 } 1289 else 1290 Py_CLEAR(fullpath); 1291 } 1292 PyErr_Format(ZipImportError, "can't find module %R", fullname); 1293exit: 1294 Py_DECREF(path); 1295 Py_XDECREF(fullpath); 1296 return code; 1297} 1298 1299 1300/* Module init */ 1301 1302PyDoc_STRVAR(zipimport_doc, 1303"zipimport provides support for importing Python modules from Zip archives.\n\ 1304\n\ 1305This module exports three objects:\n\ 1306- zipimporter: a class; its constructor takes a path to a Zip archive.\n\ 1307- ZipImportError: exception raised by zipimporter objects. It's a\n\ 1308 subclass of ImportError, so it can be caught as ImportError, too.\n\ 1309- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ 1310 info dicts, as used in zipimporter._files.\n\ 1311\n\ 1312It is usually not needed to use the zipimport module explicitly; it is\n\ 1313used by the builtin import mechanism for sys.path items that are paths\n\ 1314to Zip archives."); 1315 1316static struct PyModuleDef zipimportmodule = { 1317 PyModuleDef_HEAD_INIT, 1318 "zipimport", 1319 zipimport_doc, 1320 -1, 1321 NULL, 1322 NULL, 1323 NULL, 1324 NULL, 1325 NULL 1326}; 1327 1328PyMODINIT_FUNC 1329PyInit_zipimport(void) 1330{ 1331 PyObject *mod; 1332 1333 if (PyType_Ready(&ZipImporter_Type) < 0) 1334 return NULL; 1335 1336 /* Correct directory separator */ 1337 zip_searchorder[0].suffix[0] = SEP; 1338 zip_searchorder[1].suffix[0] = SEP; 1339 zip_searchorder[2].suffix[0] = SEP; 1340 if (Py_OptimizeFlag) { 1341 /* Reverse *.pyc and *.pyo */ 1342 struct st_zip_searchorder tmp; 1343 tmp = zip_searchorder[0]; 1344 zip_searchorder[0] = zip_searchorder[1]; 1345 zip_searchorder[1] = tmp; 1346 tmp = zip_searchorder[3]; 1347 zip_searchorder[3] = zip_searchorder[4]; 1348 zip_searchorder[4] = tmp; 1349 } 1350 1351 mod = PyModule_Create(&zipimportmodule); 1352 if (mod == NULL) 1353 return NULL; 1354 1355 ZipImportError = PyErr_NewException("zipimport.ZipImportError", 1356 PyExc_ImportError, NULL); 1357 if (ZipImportError == NULL) 1358 return NULL; 1359 1360 Py_INCREF(ZipImportError); 1361 if (PyModule_AddObject(mod, "ZipImportError", 1362 ZipImportError) < 0) 1363 return NULL; 1364 1365 Py_INCREF(&ZipImporter_Type); 1366 if (PyModule_AddObject(mod, "zipimporter", 1367 (PyObject *)&ZipImporter_Type) < 0) 1368 return NULL; 1369 1370 zip_directory_cache = PyDict_New(); 1371 if (zip_directory_cache == NULL) 1372 return NULL; 1373 Py_INCREF(zip_directory_cache); 1374 if (PyModule_AddObject(mod, "_zip_directory_cache", 1375 zip_directory_cache) < 0) 1376 return NULL; 1377 return mod; 1378} 1379