zipimport.c revision 09bf7a799da7c8b7cdd0f9c5cd789769b8cab2d5
1#include "Python.h" 2#include "structmember.h" 3#include "osdefs.h" 4#include "marshal.h" 5#include <time.h> 6 7 8#define IS_SOURCE 0x0 9#define IS_BYTECODE 0x1 10#define IS_PACKAGE 0x2 11 12struct st_zip_searchorder { 13 char suffix[14]; 14 int type; 15}; 16 17/* zip_searchorder defines how we search for a module in the Zip 18 archive: we first search for a package __init__, then for 19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries 20 are swapped by initzipimport() if we run in optimized mode. Also, 21 '/' is replaced by SEP there. */ 22static struct st_zip_searchorder zip_searchorder[] = { 23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE}, 24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE}, 25 {"/__init__.py", IS_PACKAGE | IS_SOURCE}, 26 {".pyc", IS_BYTECODE}, 27 {".pyo", IS_BYTECODE}, 28 {".py", IS_SOURCE}, 29 {"", 0} 30}; 31 32/* zipimporter object definition and support */ 33 34typedef struct _zipimporter ZipImporter; 35 36struct _zipimporter { 37 PyObject_HEAD 38 PyObject *archive; /* pathname of the Zip archive, 39 decoded from the filesystem encoding */ 40 PyObject *prefix; /* file prefix: "a/sub/directory/", 41 encoded to the filesystem encoding */ 42 PyObject *files; /* dict with file info {path: toc_entry} */ 43}; 44 45static PyObject *ZipImportError; 46/* read_directory() cache */ 47static PyObject *zip_directory_cache = NULL; 48 49/* forward decls */ 50static PyObject *read_directory(PyObject *archive); 51static PyObject *get_data(PyObject *archive, PyObject *toc_entry); 52static PyObject *get_module_code(ZipImporter *self, char *fullname, 53 int *p_ispackage, PyObject **p_modpath); 54 55 56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) 57 58 59/* zipimporter.__init__ 60 Split the "subdirectory" from the Zip archive path, lookup a matching 61 entry in sys.path_importer_cache, fetch the file directory from there 62 if found, or else read it from the archive. */ 63static int 64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) 65{ 66 PyObject *pathobj, *files; 67 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2]; 68 Py_ssize_t len; 69 70 if (!_PyArg_NoKeywords("zipimporter()", kwds)) 71 return -1; 72 73 if (!PyArg_ParseTuple(args, "O&:zipimporter", 74 PyUnicode_FSDecoder, &pathobj)) 75 return -1; 76 77 /* copy path to buf */ 78 len = PyUnicode_GET_SIZE(pathobj); 79 if (len == 0) { 80 PyErr_SetString(ZipImportError, "archive path is empty"); 81 goto error; 82 } 83 if (len >= MAXPATHLEN) { 84 PyErr_SetString(ZipImportError, 85 "archive path too long"); 86 goto error; 87 } 88 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj)); 89 90#ifdef ALTSEP 91 for (p = buf; *p; p++) { 92 if (*p == ALTSEP) 93 *p = SEP; 94 } 95#endif 96 97 path = NULL; 98 prefix = NULL; 99 for (;;) { 100 struct stat statbuf; 101 int rv; 102 103 if (pathobj == NULL) { 104 pathobj = PyUnicode_FromUnicode(buf, len); 105 if (pathobj == NULL) 106 goto error; 107 } 108 rv = _Py_stat(pathobj, &statbuf); 109 if (rv == 0) { 110 /* it exists */ 111 if (S_ISREG(statbuf.st_mode)) 112 /* it's a file */ 113 path = buf; 114 break; 115 } 116 else if (PyErr_Occurred()) 117 goto error; 118 /* back up one path element */ 119 p = Py_UNICODE_strrchr(buf, SEP); 120 if (prefix != NULL) 121 *prefix = SEP; 122 if (p == NULL) 123 break; 124 *p = '\0'; 125 len = p - buf; 126 prefix = p; 127 Py_CLEAR(pathobj); 128 } 129 if (path == NULL) { 130 PyErr_SetString(ZipImportError, "not a Zip file"); 131 goto error; 132 } 133 134 files = PyDict_GetItem(zip_directory_cache, pathobj); 135 if (files == NULL) { 136 files = read_directory(pathobj); 137 if (files == NULL) 138 goto error; 139 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0) 140 goto error; 141 } 142 else 143 Py_INCREF(files); 144 self->files = files; 145 146 self->archive = pathobj; 147 pathobj = NULL; 148 149 if (prefix != NULL) { 150 prefix++; 151 len = Py_UNICODE_strlen(prefix); 152 if (prefix[len-1] != SEP) { 153 /* add trailing SEP */ 154 prefix[len] = SEP; 155 prefix[len + 1] = '\0'; 156 len++; 157 } 158 } 159 else 160 len = 0; 161 self->prefix = PyUnicode_FromUnicode(prefix, len); 162 if (self->prefix == NULL) 163 goto error; 164 165 return 0; 166 167error: 168 Py_XDECREF(pathobj); 169 return -1; 170} 171 172/* GC support. */ 173static int 174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg) 175{ 176 ZipImporter *self = (ZipImporter *)obj; 177 Py_VISIT(self->files); 178 return 0; 179} 180 181static void 182zipimporter_dealloc(ZipImporter *self) 183{ 184 PyObject_GC_UnTrack(self); 185 Py_XDECREF(self->archive); 186 Py_XDECREF(self->prefix); 187 Py_XDECREF(self->files); 188 Py_TYPE(self)->tp_free((PyObject *)self); 189} 190 191static PyObject * 192zipimporter_repr(ZipImporter *self) 193{ 194 if (self->archive == NULL) 195 return PyUnicode_FromString("<zipimporter object \"???\">"); 196 else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0) 197 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">", 198 self->archive, SEP, self->prefix); 199 else 200 return PyUnicode_FromFormat("<zipimporter object \"%U\">", 201 self->archive); 202} 203 204/* return fullname.split(".")[-1] */ 205static char * 206get_subname(char *fullname) 207{ 208 char *subname = strrchr(fullname, '.'); 209 if (subname == NULL) 210 subname = fullname; 211 else 212 subname++; 213 return subname; 214} 215 216/* Given a (sub)modulename, write the potential file path in the 217 archive (without extension) to the path buffer. Return the 218 length of the resulting string. */ 219static int 220make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize) 221{ 222 size_t len; 223 char *p; 224 PyObject *prefix; 225 226 prefix = PyUnicode_EncodeFSDefault(prefix_obj); 227 if (prefix == NULL) 228 return -1; 229 len = PyBytes_GET_SIZE(prefix); 230 231 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ 232 if (len + strlen(name) + 13 >= pathsize - 1) { 233 PyErr_SetString(ZipImportError, "path too long"); 234 Py_DECREF(prefix); 235 return -1; 236 } 237 238 strcpy(path, PyBytes_AS_STRING(prefix)); 239 Py_DECREF(prefix); 240 strcpy(path + len, name); 241 for (p = path + len; *p; p++) { 242 if (*p == '.') 243 *p = SEP; 244 } 245 len += strlen(name); 246 assert(len < INT_MAX); 247 return (int)len; 248} 249 250enum zi_module_info { 251 MI_ERROR, 252 MI_NOT_FOUND, 253 MI_MODULE, 254 MI_PACKAGE 255}; 256 257/* Return some information about a module. */ 258static enum zi_module_info 259get_module_info(ZipImporter *self, char *fullname) 260{ 261 char *subname, path[MAXPATHLEN + 1]; 262 int len; 263 struct st_zip_searchorder *zso; 264 265 subname = get_subname(fullname); 266 267 len = make_filename(self->prefix, subname, path, sizeof(path)); 268 if (len < 0) 269 return MI_ERROR; 270 271 for (zso = zip_searchorder; *zso->suffix; zso++) { 272 strcpy(path + len, zso->suffix); 273 if (PyDict_GetItemString(self->files, path) != NULL) { 274 if (zso->type & IS_PACKAGE) 275 return MI_PACKAGE; 276 else 277 return MI_MODULE; 278 } 279 } 280 return MI_NOT_FOUND; 281} 282 283/* Check whether we can satisfy the import of the module named by 284 'fullname'. Return self if we can, None if we can't. */ 285static PyObject * 286zipimporter_find_module(PyObject *obj, PyObject *args) 287{ 288 ZipImporter *self = (ZipImporter *)obj; 289 PyObject *path = NULL; 290 char *fullname; 291 enum zi_module_info mi; 292 293 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module", 294 &fullname, &path)) 295 return NULL; 296 297 mi = get_module_info(self, fullname); 298 if (mi == MI_ERROR) 299 return NULL; 300 if (mi == MI_NOT_FOUND) { 301 Py_INCREF(Py_None); 302 return Py_None; 303 } 304 Py_INCREF(self); 305 return (PyObject *)self; 306} 307 308/* Load and return the module named by 'fullname'. */ 309static PyObject * 310zipimporter_load_module(PyObject *obj, PyObject *args) 311{ 312 ZipImporter *self = (ZipImporter *)obj; 313 PyObject *code = NULL, *mod, *dict; 314 char *fullname; 315 PyObject *modpath = NULL, *modpath_bytes; 316 int ispackage; 317 318 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", 319 &fullname)) 320 return NULL; 321 322 code = get_module_code(self, fullname, &ispackage, &modpath); 323 if (code == NULL) 324 goto error; 325 326 mod = PyImport_AddModule(fullname); 327 if (mod == NULL) 328 goto error; 329 dict = PyModule_GetDict(mod); 330 331 /* mod.__loader__ = self */ 332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0) 333 goto error; 334 335 if (ispackage) { 336 /* add __path__ to the module *before* the code gets 337 executed */ 338 PyObject *pkgpath, *fullpath; 339 char *subname = get_subname(fullname); 340 int err; 341 342 fullpath = PyUnicode_FromFormat("%U%c%U%s", 343 self->archive, SEP, 344 self->prefix, subname); 345 if (fullpath == NULL) 346 goto error; 347 348 pkgpath = Py_BuildValue("[O]", fullpath); 349 Py_DECREF(fullpath); 350 if (pkgpath == NULL) 351 goto error; 352 err = PyDict_SetItemString(dict, "__path__", pkgpath); 353 Py_DECREF(pkgpath); 354 if (err != 0) 355 goto error; 356 } 357 modpath_bytes = PyUnicode_EncodeFSDefault(modpath); 358 if (modpath_bytes == NULL) 359 goto error; 360 mod = PyImport_ExecCodeModuleEx(fullname, code, 361 PyBytes_AS_STRING(modpath_bytes)); 362 Py_DECREF(modpath_bytes); 363 Py_CLEAR(code); 364 if (mod == NULL) 365 goto error; 366 367 if (Py_VerboseFlag) 368 PySys_FormatStderr("import %s # loaded from Zip %U\n", 369 fullname, modpath); 370 Py_DECREF(modpath); 371 return mod; 372error: 373 Py_XDECREF(code); 374 Py_XDECREF(modpath); 375 return NULL; 376} 377 378/* Return a string matching __file__ for the named module */ 379static PyObject * 380zipimporter_get_filename(PyObject *obj, PyObject *args) 381{ 382 ZipImporter *self = (ZipImporter *)obj; 383 PyObject *code; 384 char *fullname; 385 PyObject *modpath; 386 int ispackage; 387 388 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", 389 &fullname)) 390 return NULL; 391 392 /* Deciding the filename requires working out where the code 393 would come from if the module was actually loaded */ 394 code = get_module_code(self, fullname, &ispackage, &modpath); 395 if (code == NULL) 396 return NULL; 397 Py_DECREF(code); /* Only need the path info */ 398 399 return modpath; 400} 401 402/* Return a bool signifying whether the module is a package or not. */ 403static PyObject * 404zipimporter_is_package(PyObject *obj, PyObject *args) 405{ 406 ZipImporter *self = (ZipImporter *)obj; 407 char *fullname; 408 enum zi_module_info mi; 409 410 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package", 411 &fullname)) 412 return NULL; 413 414 mi = get_module_info(self, fullname); 415 if (mi == MI_ERROR) 416 return NULL; 417 if (mi == MI_NOT_FOUND) { 418 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 419 return NULL; 420 } 421 return PyBool_FromLong(mi == MI_PACKAGE); 422} 423 424static PyObject * 425zipimporter_get_data(PyObject *obj, PyObject *args) 426{ 427 ZipImporter *self = (ZipImporter *)obj; 428 PyObject *pathobj, *key; 429 const Py_UNICODE *path; 430#ifdef ALTSEP 431 Py_UNICODE *p, buf[MAXPATHLEN + 1]; 432#endif 433 Py_UNICODE *archive; 434 PyObject *toc_entry; 435 Py_ssize_t path_len, len; 436 437 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj)) 438 return NULL; 439 440 path_len = PyUnicode_GET_SIZE(pathobj); 441 path = PyUnicode_AS_UNICODE(pathobj); 442#ifdef ALTSEP 443 if (path_len >= MAXPATHLEN) { 444 PyErr_SetString(ZipImportError, "path too long"); 445 return NULL; 446 } 447 Py_UNICODE_strcpy(buf, path); 448 for (p = buf; *p; p++) { 449 if (*p == ALTSEP) 450 *p = SEP; 451 } 452 path = buf; 453#endif 454 archive = PyUnicode_AS_UNICODE(self->archive); 455 len = PyUnicode_GET_SIZE(self->archive); 456 if ((size_t)len < Py_UNICODE_strlen(path) && 457 Py_UNICODE_strncmp(path, archive, len) == 0 && 458 path[len] == SEP) { 459 path += len + 1; 460 path_len -= len + 1; 461 } 462 463 key = PyUnicode_FromUnicode(path, path_len); 464 if (key == NULL) 465 return NULL; 466 toc_entry = PyDict_GetItem(self->files, key); 467 if (toc_entry == NULL) { 468 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); 469 Py_DECREF(key); 470 return NULL; 471 } 472 Py_DECREF(key); 473 return get_data(self->archive, toc_entry); 474} 475 476static PyObject * 477zipimporter_get_code(PyObject *obj, PyObject *args) 478{ 479 ZipImporter *self = (ZipImporter *)obj; 480 char *fullname; 481 482 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname)) 483 return NULL; 484 485 return get_module_code(self, fullname, NULL, NULL); 486} 487 488static PyObject * 489zipimporter_get_source(PyObject *obj, PyObject *args) 490{ 491 ZipImporter *self = (ZipImporter *)obj; 492 PyObject *toc_entry; 493 char *fullname, *subname, path[MAXPATHLEN+1]; 494 int len; 495 enum zi_module_info mi; 496 497 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname)) 498 return NULL; 499 500 mi = get_module_info(self, fullname); 501 if (mi == MI_ERROR) 502 return NULL; 503 if (mi == MI_NOT_FOUND) { 504 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 505 return NULL; 506 } 507 subname = get_subname(fullname); 508 509 len = make_filename(self->prefix, subname, path, sizeof(path)); 510 if (len < 0) 511 return NULL; 512 513 if (mi == MI_PACKAGE) { 514 path[len] = SEP; 515 strcpy(path + len + 1, "__init__.py"); 516 } 517 else 518 strcpy(path + len, ".py"); 519 520 toc_entry = PyDict_GetItemString(self->files, path); 521 if (toc_entry != NULL) { 522 PyObject *res, *bytes; 523 bytes = get_data(self->archive, toc_entry); 524 if (bytes == NULL) 525 return NULL; 526 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), 527 PyBytes_GET_SIZE(bytes)); 528 Py_DECREF(bytes); 529 return res; 530 } 531 532 /* we have the module, but no source */ 533 Py_INCREF(Py_None); 534 return Py_None; 535} 536 537PyDoc_STRVAR(doc_find_module, 538"find_module(fullname, path=None) -> self or None.\n\ 539\n\ 540Search for a module specified by 'fullname'. 'fullname' must be the\n\ 541fully qualified (dotted) module name. It returns the zipimporter\n\ 542instance itself if the module was found, or None if it wasn't.\n\ 543The optional 'path' argument is ignored -- it's there for compatibility\n\ 544with the importer protocol."); 545 546PyDoc_STRVAR(doc_load_module, 547"load_module(fullname) -> module.\n\ 548\n\ 549Load the module specified by 'fullname'. 'fullname' must be the\n\ 550fully qualified (dotted) module name. It returns the imported\n\ 551module, or raises ZipImportError if it wasn't found."); 552 553PyDoc_STRVAR(doc_get_data, 554"get_data(pathname) -> string with file data.\n\ 555\n\ 556Return the data associated with 'pathname'. Raise IOError if\n\ 557the file wasn't found."); 558 559PyDoc_STRVAR(doc_is_package, 560"is_package(fullname) -> bool.\n\ 561\n\ 562Return True if the module specified by fullname is a package.\n\ 563Raise ZipImportError if the module couldn't be found."); 564 565PyDoc_STRVAR(doc_get_code, 566"get_code(fullname) -> code object.\n\ 567\n\ 568Return the code object for the specified module. Raise ZipImportError\n\ 569if the module couldn't be found."); 570 571PyDoc_STRVAR(doc_get_source, 572"get_source(fullname) -> source string.\n\ 573\n\ 574Return the source code for the specified module. Raise ZipImportError\n\ 575if the module couldn't be found, return None if the archive does\n\ 576contain the module, but has no source for it."); 577 578 579PyDoc_STRVAR(doc_get_filename, 580"get_filename(fullname) -> filename string.\n\ 581\n\ 582Return the filename for the specified module."); 583 584static PyMethodDef zipimporter_methods[] = { 585 {"find_module", zipimporter_find_module, METH_VARARGS, 586 doc_find_module}, 587 {"load_module", zipimporter_load_module, METH_VARARGS, 588 doc_load_module}, 589 {"get_data", zipimporter_get_data, METH_VARARGS, 590 doc_get_data}, 591 {"get_code", zipimporter_get_code, METH_VARARGS, 592 doc_get_code}, 593 {"get_source", zipimporter_get_source, METH_VARARGS, 594 doc_get_source}, 595 {"get_filename", zipimporter_get_filename, METH_VARARGS, 596 doc_get_filename}, 597 {"is_package", zipimporter_is_package, METH_VARARGS, 598 doc_is_package}, 599 {NULL, NULL} /* sentinel */ 600}; 601 602static PyMemberDef zipimporter_members[] = { 603 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY}, 604 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY}, 605 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY}, 606 {NULL} 607}; 608 609PyDoc_STRVAR(zipimporter_doc, 610"zipimporter(archivepath) -> zipimporter object\n\ 611\n\ 612Create a new zipimporter instance. 'archivepath' must be a path to\n\ 613a zipfile, or to a specific path inside a zipfile. For example, it can be\n\ 614'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\ 615valid directory inside the archive.\n\ 616\n\ 617'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\ 618archive.\n\ 619\n\ 620The 'archive' attribute of zipimporter objects contains the name of the\n\ 621zipfile targeted."); 622 623#define DEFERRED_ADDRESS(ADDR) 0 624 625static PyTypeObject ZipImporter_Type = { 626 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) 627 "zipimport.zipimporter", 628 sizeof(ZipImporter), 629 0, /* tp_itemsize */ 630 (destructor)zipimporter_dealloc, /* tp_dealloc */ 631 0, /* tp_print */ 632 0, /* tp_getattr */ 633 0, /* tp_setattr */ 634 0, /* tp_reserved */ 635 (reprfunc)zipimporter_repr, /* tp_repr */ 636 0, /* tp_as_number */ 637 0, /* tp_as_sequence */ 638 0, /* tp_as_mapping */ 639 0, /* tp_hash */ 640 0, /* tp_call */ 641 0, /* tp_str */ 642 PyObject_GenericGetAttr, /* tp_getattro */ 643 0, /* tp_setattro */ 644 0, /* tp_as_buffer */ 645 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 646 Py_TPFLAGS_HAVE_GC, /* tp_flags */ 647 zipimporter_doc, /* tp_doc */ 648 zipimporter_traverse, /* tp_traverse */ 649 0, /* tp_clear */ 650 0, /* tp_richcompare */ 651 0, /* tp_weaklistoffset */ 652 0, /* tp_iter */ 653 0, /* tp_iternext */ 654 zipimporter_methods, /* tp_methods */ 655 zipimporter_members, /* tp_members */ 656 0, /* tp_getset */ 657 0, /* tp_base */ 658 0, /* tp_dict */ 659 0, /* tp_descr_get */ 660 0, /* tp_descr_set */ 661 0, /* tp_dictoffset */ 662 (initproc)zipimporter_init, /* tp_init */ 663 PyType_GenericAlloc, /* tp_alloc */ 664 PyType_GenericNew, /* tp_new */ 665 PyObject_GC_Del, /* tp_free */ 666}; 667 668 669/* implementation */ 670 671/* Given a buffer, return the long that is represented by the first 672 4 bytes, encoded as little endian. This partially reimplements 673 marshal.c:r_long() */ 674static long 675get_long(unsigned char *buf) { 676 long x; 677 x = buf[0]; 678 x |= (long)buf[1] << 8; 679 x |= (long)buf[2] << 16; 680 x |= (long)buf[3] << 24; 681#if SIZEOF_LONG > 4 682 /* Sign extension for 64-bit machines */ 683 x |= -(x & 0x80000000L); 684#endif 685 return x; 686} 687 688/* 689 read_directory(archive) -> files dict (new reference) 690 691 Given a path to a Zip archive, build a dict, mapping file names 692 (local to the archive, using SEP as a separator) to toc entries. 693 694 A toc_entry is a tuple: 695 696 (__file__, # value to use for __file__, available for all files, 697 # encoded to the filesystem encoding 698 compress, # compression kind; 0 for uncompressed 699 data_size, # size of compressed data on disk 700 file_size, # size of decompressed data 701 file_offset, # offset of file header from start of archive 702 time, # mod time of file (in dos format) 703 date, # mod data of file (in dos format) 704 crc, # crc checksum of the data 705 ) 706 707 Directories can be recognized by the trailing SEP in the name, 708 data_size and file_offset are 0. 709*/ 710static PyObject * 711read_directory(PyObject *archive_obj) 712{ 713 /* FIXME: work on Py_UNICODE* instead of char* */ 714 PyObject *files = NULL; 715 FILE *fp; 716 unsigned short flags; 717 short compress, time, date, name_size; 718 long crc, data_size, file_size, header_size; 719 Py_ssize_t file_offset, header_position, header_offset; 720 long i, l, count; 721 size_t length; 722 Py_UNICODE path[MAXPATHLEN + 5]; 723 char name[MAXPATHLEN + 5]; 724 PyObject *nameobj = NULL; 725 char *p, endof_central_dir[22]; 726 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */ 727 PyObject *pathobj; 728 const char *charset; 729 int bootstrap; 730 731 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { 732 PyErr_SetString(PyExc_OverflowError, 733 "Zip path name is too long"); 734 return NULL; 735 } 736 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj)); 737 738 fp = _Py_fopen(archive_obj, "rb"); 739 if (fp == NULL) { 740 if (!PyErr_Occurred()) 741 PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj); 742 return NULL; 743 } 744 745 if (fseek(fp, -22, SEEK_END) == -1) { 746 fclose(fp); 747 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 748 return NULL; 749 } 750 header_position = ftell(fp); 751 if (fread(endof_central_dir, 1, 22, fp) != 22) { 752 fclose(fp); 753 PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj); 754 return NULL; 755 } 756 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { 757 /* Bad: End of Central Dir signature */ 758 fclose(fp); 759 PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj); 760 return NULL; 761 } 762 763 header_size = get_long((unsigned char *)endof_central_dir + 12); 764 header_offset = get_long((unsigned char *)endof_central_dir + 16); 765 arc_offset = header_position - header_offset - header_size; 766 header_offset += arc_offset; 767 768 files = PyDict_New(); 769 if (files == NULL) 770 goto error; 771 772 length = Py_UNICODE_strlen(path); 773 path[length] = SEP; 774 775 /* Start of Central Directory */ 776 count = 0; 777 for (;;) { 778 PyObject *t; 779 int err; 780 781 if (fseek(fp, header_offset, 0) == -1) /* Start of file header */ 782 goto fseek_error; 783 l = PyMarshal_ReadLongFromFile(fp); 784 if (l != 0x02014B50) 785 break; /* Bad: Central Dir File Header */ 786 if (fseek(fp, header_offset + 8, 0) == -1) 787 goto fseek_error; 788 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp); 789 compress = PyMarshal_ReadShortFromFile(fp); 790 time = PyMarshal_ReadShortFromFile(fp); 791 date = PyMarshal_ReadShortFromFile(fp); 792 crc = PyMarshal_ReadLongFromFile(fp); 793 data_size = PyMarshal_ReadLongFromFile(fp); 794 file_size = PyMarshal_ReadLongFromFile(fp); 795 name_size = PyMarshal_ReadShortFromFile(fp); 796 header_size = 46 + name_size + 797 PyMarshal_ReadShortFromFile(fp) + 798 PyMarshal_ReadShortFromFile(fp); 799 if (fseek(fp, header_offset + 42, 0) == -1) 800 goto fseek_error; 801 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset; 802 if (name_size > MAXPATHLEN) 803 name_size = MAXPATHLEN; 804 805 p = name; 806 for (i = 0; i < name_size; i++) { 807 *p = (char)getc(fp); 808 if (*p == '/') 809 *p = SEP; 810 p++; 811 } 812 *p = 0; /* Add terminating null byte */ 813 header_offset += header_size; 814 815 bootstrap = 0; 816 if (flags & 0x0800) 817 charset = "utf-8"; 818 else if (!PyThreadState_GET()->interp->codecs_initialized) { 819 /* During bootstrap, we may need to load the encodings 820 package from a ZIP file. But the cp437 encoding is implemented 821 in Python in the encodings package. 822 823 Break out of this dependency by assuming that the path to 824 the encodings module is ASCII-only. */ 825 charset = "ascii"; 826 bootstrap = 1; 827 } 828 else 829 charset = "cp437"; 830 nameobj = PyUnicode_Decode(name, name_size, charset, NULL); 831 if (nameobj == NULL) { 832 if (bootstrap) 833 PyErr_Format(PyExc_NotImplementedError, 834 "bootstrap issue: python%i%i.zip contains non-ASCII " 835 "filenames without the unicode flag", 836 PY_MAJOR_VERSION, PY_MINOR_VERSION); 837 goto error; 838 } 839 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); 840 841 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path)); 842 if (pathobj == NULL) 843 goto error; 844 t = Py_BuildValue("Nhllnhhl", pathobj, compress, data_size, 845 file_size, file_offset, time, date, crc); 846 if (t == NULL) 847 goto error; 848 err = PyDict_SetItem(files, nameobj, t); 849 Py_CLEAR(nameobj); 850 Py_DECREF(t); 851 if (err != 0) 852 goto error; 853 count++; 854 } 855 fclose(fp); 856 if (Py_VerboseFlag) 857 PySys_FormatStderr("# zipimport: found %ld names in %U\n", 858 count, archive_obj); 859 return files; 860fseek_error: 861 fclose(fp); 862 Py_XDECREF(files); 863 Py_XDECREF(nameobj); 864 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 865 return NULL; 866error: 867 fclose(fp); 868 Py_XDECREF(files); 869 Py_XDECREF(nameobj); 870 return NULL; 871} 872 873/* Return the zlib.decompress function object, or NULL if zlib couldn't 874 be imported. The function is cached when found, so subsequent calls 875 don't import zlib again. */ 876static PyObject * 877get_decompress_func(void) 878{ 879 static int importing_zlib = 0; 880 PyObject *zlib; 881 PyObject *decompress; 882 883 if (importing_zlib != 0) 884 /* Someone has a zlib.py[co] in their Zip file; 885 let's avoid a stack overflow. */ 886 return NULL; 887 importing_zlib = 1; 888 zlib = PyImport_ImportModuleNoBlock("zlib"); 889 importing_zlib = 0; 890 if (zlib != NULL) { 891 decompress = PyObject_GetAttrString(zlib, 892 "decompress"); 893 Py_DECREF(zlib); 894 } 895 else { 896 PyErr_Clear(); 897 decompress = NULL; 898 } 899 if (Py_VerboseFlag) 900 PySys_WriteStderr("# zipimport: zlib %s\n", 901 zlib != NULL ? "available": "UNAVAILABLE"); 902 return decompress; 903} 904 905/* Given a path to a Zip file and a toc_entry, return the (uncompressed) 906 data as a new reference. */ 907static PyObject * 908get_data(PyObject *archive, PyObject *toc_entry) 909{ 910 PyObject *raw_data, *data = NULL, *decompress; 911 char *buf; 912 FILE *fp; 913 int err; 914 Py_ssize_t bytes_read = 0; 915 long l; 916 PyObject *datapath; 917 long compress, data_size, file_size, file_offset, bytes_size; 918 long time, date, crc; 919 920 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress, 921 &data_size, &file_size, &file_offset, &time, 922 &date, &crc)) { 923 return NULL; 924 } 925 926 fp = _Py_fopen(archive, "rb"); 927 if (!fp) { 928 if (!PyErr_Occurred()) 929 PyErr_Format(PyExc_IOError, 930 "zipimport: can not open file %U", archive); 931 return NULL; 932 } 933 934 /* Check to make sure the local file header is correct */ 935 if (fseek(fp, file_offset, 0) == -1) { 936 fclose(fp); 937 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 938 return NULL; 939 } 940 941 l = PyMarshal_ReadLongFromFile(fp); 942 if (l != 0x04034B50) { 943 /* Bad: Local File Header */ 944 PyErr_Format(ZipImportError, 945 "bad local file header in %U", 946 archive); 947 fclose(fp); 948 return NULL; 949 } 950 if (fseek(fp, file_offset + 26, 0) == -1) { 951 fclose(fp); 952 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 953 return NULL; 954 } 955 956 l = 30 + PyMarshal_ReadShortFromFile(fp) + 957 PyMarshal_ReadShortFromFile(fp); /* local header size */ 958 file_offset += l; /* Start of file data */ 959 960 bytes_size = compress == 0 ? data_size : data_size + 1; 961 if (bytes_size == 0) 962 bytes_size++; 963 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); 964 965 if (raw_data == NULL) { 966 fclose(fp); 967 return NULL; 968 } 969 buf = PyBytes_AsString(raw_data); 970 971 err = fseek(fp, file_offset, 0); 972 if (err == 0) { 973 bytes_read = fread(buf, 1, data_size, fp); 974 } else { 975 fclose(fp); 976 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); 977 return NULL; 978 } 979 fclose(fp); 980 if (err || bytes_read != data_size) { 981 PyErr_SetString(PyExc_IOError, 982 "zipimport: can't read data"); 983 Py_DECREF(raw_data); 984 return NULL; 985 } 986 987 if (compress != 0) { 988 buf[data_size] = 'Z'; /* saw this in zipfile.py */ 989 data_size++; 990 } 991 buf[data_size] = '\0'; 992 993 if (compress == 0) { /* data is not compressed */ 994 data = PyBytes_FromStringAndSize(buf, data_size); 995 Py_DECREF(raw_data); 996 return data; 997 } 998 999 /* Decompress with zlib */ 1000 decompress = get_decompress_func(); 1001 if (decompress == NULL) { 1002 PyErr_SetString(ZipImportError, 1003 "can't decompress data; " 1004 "zlib not available"); 1005 goto error; 1006 } 1007 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15); 1008 Py_DECREF(decompress); 1009error: 1010 Py_DECREF(raw_data); 1011 return data; 1012} 1013 1014/* Lenient date/time comparison function. The precision of the mtime 1015 in the archive is lower than the mtime stored in a .pyc: we 1016 must allow a difference of at most one second. */ 1017static int 1018eq_mtime(time_t t1, time_t t2) 1019{ 1020 time_t d = t1 - t2; 1021 if (d < 0) 1022 d = -d; 1023 /* dostime only stores even seconds, so be lenient */ 1024 return d <= 1; 1025} 1026 1027/* Given the contents of a .py[co] file in a buffer, unmarshal the data 1028 and return the code object. Return None if it the magic word doesn't 1029 match (we do this instead of raising an exception as we fall back 1030 to .py if available and we don't want to mask other errors). 1031 Returns a new reference. */ 1032static PyObject * 1033unmarshal_code(char *pathname, PyObject *data, time_t mtime) 1034{ 1035 PyObject *code; 1036 char *buf = PyBytes_AsString(data); 1037 Py_ssize_t size = PyBytes_Size(data); 1038 1039 if (size <= 9) { 1040 PyErr_SetString(ZipImportError, 1041 "bad pyc data"); 1042 return NULL; 1043 } 1044 1045 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) { 1046 if (Py_VerboseFlag) 1047 PySys_WriteStderr("# %s has bad magic\n", 1048 pathname); 1049 Py_INCREF(Py_None); 1050 return Py_None; /* signal caller to try alternative */ 1051 } 1052 1053 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), 1054 mtime)) { 1055 if (Py_VerboseFlag) 1056 PySys_WriteStderr("# %s has bad mtime\n", 1057 pathname); 1058 Py_INCREF(Py_None); 1059 return Py_None; /* signal caller to try alternative */ 1060 } 1061 1062 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8); 1063 if (code == NULL) 1064 return NULL; 1065 if (!PyCode_Check(code)) { 1066 Py_DECREF(code); 1067 PyErr_Format(PyExc_TypeError, 1068 "compiled module %s is not a code object", 1069 pathname); 1070 return NULL; 1071 } 1072 return code; 1073} 1074 1075/* Replace any occurances of "\r\n?" in the input string with "\n". 1076 This converts DOS and Mac line endings to Unix line endings. 1077 Also append a trailing "\n" to be compatible with 1078 PyParser_SimpleParseFile(). Returns a new reference. */ 1079static PyObject * 1080normalize_line_endings(PyObject *source) 1081{ 1082 char *buf, *q, *p = PyBytes_AsString(source); 1083 PyObject *fixed_source; 1084 int len = 0; 1085 1086 if (!p) { 1087 return PyBytes_FromStringAndSize("\n\0", 2); 1088 } 1089 1090 /* one char extra for trailing \n and one for terminating \0 */ 1091 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2); 1092 if (buf == NULL) { 1093 PyErr_SetString(PyExc_MemoryError, 1094 "zipimport: no memory to allocate " 1095 "source buffer"); 1096 return NULL; 1097 } 1098 /* replace "\r\n?" by "\n" */ 1099 for (q = buf; *p != '\0'; p++) { 1100 if (*p == '\r') { 1101 *q++ = '\n'; 1102 if (*(p + 1) == '\n') 1103 p++; 1104 } 1105 else 1106 *q++ = *p; 1107 len++; 1108 } 1109 *q++ = '\n'; /* add trailing \n */ 1110 *q = '\0'; 1111 fixed_source = PyBytes_FromStringAndSize(buf, len + 2); 1112 PyMem_Free(buf); 1113 return fixed_source; 1114} 1115 1116/* Given a string buffer containing Python source code, compile it 1117 return and return a code object as a new reference. */ 1118static PyObject * 1119compile_source(char *pathname, PyObject *source) 1120{ 1121 PyObject *code, *fixed_source; 1122 1123 fixed_source = normalize_line_endings(source); 1124 if (fixed_source == NULL) 1125 return NULL; 1126 1127 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname, 1128 Py_file_input); 1129 Py_DECREF(fixed_source); 1130 return code; 1131} 1132 1133/* Convert the date/time values found in the Zip archive to a value 1134 that's compatible with the time stamp stored in .pyc files. */ 1135static time_t 1136parse_dostime(int dostime, int dosdate) 1137{ 1138 struct tm stm; 1139 1140 memset((void *) &stm, '\0', sizeof(stm)); 1141 1142 stm.tm_sec = (dostime & 0x1f) * 2; 1143 stm.tm_min = (dostime >> 5) & 0x3f; 1144 stm.tm_hour = (dostime >> 11) & 0x1f; 1145 stm.tm_mday = dosdate & 0x1f; 1146 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1; 1147 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80; 1148 stm.tm_isdst = -1; /* wday/yday is ignored */ 1149 1150 return mktime(&stm); 1151} 1152 1153/* Given a path to a .pyc or .pyo file in the archive, return the 1154 modification time of the matching .py file, or 0 if no source 1155 is available. */ 1156static time_t 1157get_mtime_of_source(ZipImporter *self, char *path) 1158{ 1159 PyObject *toc_entry; 1160 time_t mtime = 0; 1161 Py_ssize_t lastchar = strlen(path) - 1; 1162 char savechar = path[lastchar]; 1163 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ 1164 toc_entry = PyDict_GetItemString(self->files, path); 1165 if (toc_entry != NULL && PyTuple_Check(toc_entry) && 1166 PyTuple_Size(toc_entry) == 8) { 1167 /* fetch the time stamp of the .py file for comparison 1168 with an embedded pyc time stamp */ 1169 int time, date; 1170 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); 1171 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); 1172 mtime = parse_dostime(time, date); 1173 } 1174 path[lastchar] = savechar; 1175 return mtime; 1176} 1177 1178/* Return the code object for the module named by 'fullname' from the 1179 Zip archive as a new reference. */ 1180static PyObject * 1181get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 1182 time_t mtime, PyObject *toc_entry) 1183{ 1184 PyObject *data, *code; 1185 PyObject *modpath; 1186 1187 data = get_data(self->archive, toc_entry); 1188 if (data == NULL) 1189 return NULL; 1190 1191 modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0)); 1192 if (modpath == NULL) { 1193 Py_DECREF(data); 1194 return NULL; 1195 } 1196 1197 if (isbytecode) 1198 code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime); 1199 else 1200 code = compile_source(PyBytes_AS_STRING(modpath), data); 1201 Py_DECREF(modpath); 1202 Py_DECREF(data); 1203 return code; 1204} 1205 1206/* Get the code object associated with the module specified by 1207 'fullname'. */ 1208static PyObject * 1209get_module_code(ZipImporter *self, char *fullname, 1210 int *p_ispackage, PyObject **p_modpath) 1211{ 1212 PyObject *toc_entry; 1213 char *subname, path[MAXPATHLEN + 1]; 1214 int len; 1215 struct st_zip_searchorder *zso; 1216 1217 subname = get_subname(fullname); 1218 1219 len = make_filename(self->prefix, subname, path, sizeof(path)); 1220 if (len < 0) 1221 return NULL; 1222 1223 for (zso = zip_searchorder; *zso->suffix; zso++) { 1224 PyObject *code = NULL; 1225 1226 strcpy(path + len, zso->suffix); 1227 if (Py_VerboseFlag > 1) 1228 PySys_FormatStderr("# trying %U%c%s\n", 1229 self->archive, (int)SEP, path); 1230 toc_entry = PyDict_GetItemString(self->files, path); 1231 if (toc_entry != NULL) { 1232 time_t mtime = 0; 1233 int ispackage = zso->type & IS_PACKAGE; 1234 int isbytecode = zso->type & IS_BYTECODE; 1235 1236 if (isbytecode) 1237 mtime = get_mtime_of_source(self, path); 1238 if (p_ispackage != NULL) 1239 *p_ispackage = ispackage; 1240 code = get_code_from_data(self, ispackage, 1241 isbytecode, mtime, 1242 toc_entry); 1243 if (code == Py_None) { 1244 /* bad magic number or non-matching mtime 1245 in byte code, try next */ 1246 Py_DECREF(code); 1247 continue; 1248 } 1249 if (code != NULL && p_modpath != NULL) { 1250 *p_modpath = PyTuple_GetItem(toc_entry, 0); 1251 Py_INCREF(*p_modpath); 1252 } 1253 return code; 1254 } 1255 } 1256 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 1257 return NULL; 1258} 1259 1260 1261/* Module init */ 1262 1263PyDoc_STRVAR(zipimport_doc, 1264"zipimport provides support for importing Python modules from Zip archives.\n\ 1265\n\ 1266This module exports three objects:\n\ 1267- zipimporter: a class; its constructor takes a path to a Zip archive.\n\ 1268- ZipImportError: exception raised by zipimporter objects. It's a\n\ 1269 subclass of ImportError, so it can be caught as ImportError, too.\n\ 1270- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ 1271 info dicts, as used in zipimporter._files.\n\ 1272\n\ 1273It is usually not needed to use the zipimport module explicitly; it is\n\ 1274used by the builtin import mechanism for sys.path items that are paths\n\ 1275to Zip archives."); 1276 1277static struct PyModuleDef zipimportmodule = { 1278 PyModuleDef_HEAD_INIT, 1279 "zipimport", 1280 zipimport_doc, 1281 -1, 1282 NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 NULL 1287}; 1288 1289PyMODINIT_FUNC 1290PyInit_zipimport(void) 1291{ 1292 PyObject *mod; 1293 1294 if (PyType_Ready(&ZipImporter_Type) < 0) 1295 return NULL; 1296 1297 /* Correct directory separator */ 1298 zip_searchorder[0].suffix[0] = SEP; 1299 zip_searchorder[1].suffix[0] = SEP; 1300 zip_searchorder[2].suffix[0] = SEP; 1301 if (Py_OptimizeFlag) { 1302 /* Reverse *.pyc and *.pyo */ 1303 struct st_zip_searchorder tmp; 1304 tmp = zip_searchorder[0]; 1305 zip_searchorder[0] = zip_searchorder[1]; 1306 zip_searchorder[1] = tmp; 1307 tmp = zip_searchorder[3]; 1308 zip_searchorder[3] = zip_searchorder[4]; 1309 zip_searchorder[4] = tmp; 1310 } 1311 1312 mod = PyModule_Create(&zipimportmodule); 1313 if (mod == NULL) 1314 return NULL; 1315 1316 ZipImportError = PyErr_NewException("zipimport.ZipImportError", 1317 PyExc_ImportError, NULL); 1318 if (ZipImportError == NULL) 1319 return NULL; 1320 1321 Py_INCREF(ZipImportError); 1322 if (PyModule_AddObject(mod, "ZipImportError", 1323 ZipImportError) < 0) 1324 return NULL; 1325 1326 Py_INCREF(&ZipImporter_Type); 1327 if (PyModule_AddObject(mod, "zipimporter", 1328 (PyObject *)&ZipImporter_Type) < 0) 1329 return NULL; 1330 1331 zip_directory_cache = PyDict_New(); 1332 if (zip_directory_cache == NULL) 1333 return NULL; 1334 Py_INCREF(zip_directory_cache); 1335 if (PyModule_AddObject(mod, "_zip_directory_cache", 1336 zip_directory_cache) < 0) 1337 return NULL; 1338 return mod; 1339} 1340