zipimport.c revision bd206e27a49dd4cc94ee264c706614190ce0eb3c
1#include "Python.h" 2#include "structmember.h" 3#include "osdefs.h" 4#include "marshal.h" 5#include <time.h> 6 7 8#define IS_SOURCE 0x0 9#define IS_BYTECODE 0x1 10#define IS_PACKAGE 0x2 11 12struct st_zip_searchorder { 13 char suffix[14]; 14 int type; 15}; 16 17/* zip_searchorder defines how we search for a module in the Zip 18 archive: we first search for a package __init__, then for 19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries 20 are swapped by initzipimport() if we run in optimized mode. Also, 21 '/' is replaced by SEP there. */ 22static struct st_zip_searchorder zip_searchorder[] = { 23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE}, 24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE}, 25 {"/__init__.py", IS_PACKAGE | IS_SOURCE}, 26 {".pyc", IS_BYTECODE}, 27 {".pyo", IS_BYTECODE}, 28 {".py", IS_SOURCE}, 29 {"", 0} 30}; 31 32/* zipimporter object definition and support */ 33 34typedef struct _zipimporter ZipImporter; 35 36struct _zipimporter { 37 PyObject_HEAD 38 PyObject *archive; /* pathname of the Zip archive, 39 decoded from the filesystem encoding */ 40 PyObject *prefix; /* file prefix: "a/sub/directory/", 41 encoded to the filesystem encoding */ 42 PyObject *files; /* dict with file info {path: toc_entry} */ 43}; 44 45static PyObject *ZipImportError; 46/* read_directory() cache */ 47static PyObject *zip_directory_cache = NULL; 48 49/* forward decls */ 50static PyObject *read_directory(PyObject *archive); 51static PyObject *get_data(PyObject *archive, PyObject *toc_entry); 52static PyObject *get_module_code(ZipImporter *self, char *fullname, 53 int *p_ispackage, PyObject **p_modpath); 54 55 56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) 57 58 59/* zipimporter.__init__ 60 Split the "subdirectory" from the Zip archive path, lookup a matching 61 entry in sys.path_importer_cache, fetch the file directory from there 62 if found, or else read it from the archive. */ 63static int 64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) 65{ 66 PyObject *pathobj, *files; 67 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2]; 68 Py_ssize_t len; 69 70 if (!_PyArg_NoKeywords("zipimporter()", kwds)) 71 return -1; 72 73 if (!PyArg_ParseTuple(args, "O&:zipimporter", 74 PyUnicode_FSDecoder, &pathobj)) 75 return -1; 76 77 /* copy path to buf */ 78 len = PyUnicode_GET_SIZE(pathobj); 79 if (len == 0) { 80 PyErr_SetString(ZipImportError, "archive path is empty"); 81 goto error; 82 } 83 if (len >= MAXPATHLEN) { 84 PyErr_SetString(ZipImportError, 85 "archive path too long"); 86 goto error; 87 } 88 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj)); 89 90#ifdef ALTSEP 91 for (p = buf; *p; p++) { 92 if (*p == ALTSEP) 93 *p = SEP; 94 } 95#endif 96 97 path = NULL; 98 prefix = NULL; 99 for (;;) { 100 struct stat statbuf; 101 int rv; 102 103 if (pathobj == NULL) { 104 pathobj = PyUnicode_FromUnicode(buf, len); 105 if (pathobj == NULL) 106 goto error; 107 } 108 rv = _Py_stat(pathobj, &statbuf); 109 if (rv == 0) { 110 /* it exists */ 111 if (S_ISREG(statbuf.st_mode)) 112 /* it's a file */ 113 path = buf; 114 break; 115 } 116 else if (PyErr_Occurred()) 117 goto error; 118 /* back up one path element */ 119 p = Py_UNICODE_strrchr(buf, SEP); 120 if (prefix != NULL) 121 *prefix = SEP; 122 if (p == NULL) 123 break; 124 *p = '\0'; 125 len = p - buf; 126 prefix = p; 127 Py_CLEAR(pathobj); 128 } 129 if (path == NULL) { 130 PyErr_SetString(ZipImportError, "not a Zip file"); 131 goto error; 132 } 133 134 files = PyDict_GetItem(zip_directory_cache, pathobj); 135 if (files == NULL) { 136 files = read_directory(pathobj); 137 if (files == NULL) 138 goto error; 139 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0) 140 goto error; 141 } 142 else 143 Py_INCREF(files); 144 self->files = files; 145 146 self->archive = pathobj; 147 pathobj = NULL; 148 149 if (prefix != NULL) { 150 prefix++; 151 len = Py_UNICODE_strlen(prefix); 152 if (prefix[len-1] != SEP) { 153 /* add trailing SEP */ 154 prefix[len] = SEP; 155 prefix[len + 1] = '\0'; 156 len++; 157 } 158 } 159 else 160 len = 0; 161 self->prefix = PyUnicode_FromUnicode(prefix, len); 162 if (self->prefix == NULL) 163 goto error; 164 165 return 0; 166 167error: 168 Py_XDECREF(pathobj); 169 return -1; 170} 171 172/* GC support. */ 173static int 174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg) 175{ 176 ZipImporter *self = (ZipImporter *)obj; 177 Py_VISIT(self->files); 178 return 0; 179} 180 181static void 182zipimporter_dealloc(ZipImporter *self) 183{ 184 PyObject_GC_UnTrack(self); 185 Py_XDECREF(self->archive); 186 Py_XDECREF(self->prefix); 187 Py_XDECREF(self->files); 188 Py_TYPE(self)->tp_free((PyObject *)self); 189} 190 191static PyObject * 192zipimporter_repr(ZipImporter *self) 193{ 194 if (self->archive == NULL) 195 return PyUnicode_FromString("<zipimporter object \"???\">"); 196 else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0) 197 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">", 198 self->archive, SEP, self->prefix); 199 else 200 return PyUnicode_FromFormat("<zipimporter object \"%U\">", 201 self->archive); 202} 203 204/* return fullname.split(".")[-1] */ 205static char * 206get_subname(char *fullname) 207{ 208 char *subname = strrchr(fullname, '.'); 209 if (subname == NULL) 210 subname = fullname; 211 else 212 subname++; 213 return subname; 214} 215 216/* Given a (sub)modulename, write the potential file path in the 217 archive (without extension) to the path buffer. Return the 218 length of the resulting string. */ 219static int 220make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize) 221{ 222 size_t len; 223 char *p; 224 PyObject *prefix; 225 226 prefix = PyUnicode_EncodeFSDefault(prefix_obj); 227 if (prefix == NULL) 228 return -1; 229 len = PyBytes_GET_SIZE(prefix); 230 231 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ 232 if (len + strlen(name) + 13 >= pathsize - 1) { 233 PyErr_SetString(ZipImportError, "path too long"); 234 Py_DECREF(prefix); 235 return -1; 236 } 237 238 strcpy(path, PyBytes_AS_STRING(prefix)); 239 Py_DECREF(prefix); 240 strcpy(path + len, name); 241 for (p = path + len; *p; p++) { 242 if (*p == '.') 243 *p = SEP; 244 } 245 len += strlen(name); 246 assert(len < INT_MAX); 247 return (int)len; 248} 249 250enum zi_module_info { 251 MI_ERROR, 252 MI_NOT_FOUND, 253 MI_MODULE, 254 MI_PACKAGE 255}; 256 257/* Return some information about a module. */ 258static enum zi_module_info 259get_module_info(ZipImporter *self, char *fullname) 260{ 261 char *subname, path[MAXPATHLEN + 1]; 262 int len; 263 struct st_zip_searchorder *zso; 264 265 subname = get_subname(fullname); 266 267 len = make_filename(self->prefix, subname, path, sizeof(path)); 268 if (len < 0) 269 return MI_ERROR; 270 271 for (zso = zip_searchorder; *zso->suffix; zso++) { 272 strcpy(path + len, zso->suffix); 273 if (PyDict_GetItemString(self->files, path) != NULL) { 274 if (zso->type & IS_PACKAGE) 275 return MI_PACKAGE; 276 else 277 return MI_MODULE; 278 } 279 } 280 return MI_NOT_FOUND; 281} 282 283/* Check whether we can satisfy the import of the module named by 284 'fullname'. Return self if we can, None if we can't. */ 285static PyObject * 286zipimporter_find_module(PyObject *obj, PyObject *args) 287{ 288 ZipImporter *self = (ZipImporter *)obj; 289 PyObject *path = NULL; 290 char *fullname; 291 enum zi_module_info mi; 292 293 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module", 294 &fullname, &path)) 295 return NULL; 296 297 mi = get_module_info(self, fullname); 298 if (mi == MI_ERROR) 299 return NULL; 300 if (mi == MI_NOT_FOUND) { 301 Py_INCREF(Py_None); 302 return Py_None; 303 } 304 Py_INCREF(self); 305 return (PyObject *)self; 306} 307 308/* Load and return the module named by 'fullname'. */ 309static PyObject * 310zipimporter_load_module(PyObject *obj, PyObject *args) 311{ 312 ZipImporter *self = (ZipImporter *)obj; 313 PyObject *code = NULL, *mod, *dict; 314 char *fullname; 315 PyObject *modpath = NULL, *modpath_bytes; 316 int ispackage; 317 318 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", 319 &fullname)) 320 return NULL; 321 322 code = get_module_code(self, fullname, &ispackage, &modpath); 323 if (code == NULL) 324 goto error; 325 326 mod = PyImport_AddModule(fullname); 327 if (mod == NULL) 328 goto error; 329 dict = PyModule_GetDict(mod); 330 331 /* mod.__loader__ = self */ 332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0) 333 goto error; 334 335 if (ispackage) { 336 /* add __path__ to the module *before* the code gets 337 executed */ 338 PyObject *pkgpath, *fullpath; 339 char *subname = get_subname(fullname); 340 int err; 341 342 fullpath = PyUnicode_FromFormat("%U%c%U%s", 343 self->archive, SEP, 344 self->prefix, subname); 345 if (fullpath == NULL) 346 goto error; 347 348 pkgpath = Py_BuildValue("[O]", fullpath); 349 Py_DECREF(fullpath); 350 if (pkgpath == NULL) 351 goto error; 352 err = PyDict_SetItemString(dict, "__path__", pkgpath); 353 Py_DECREF(pkgpath); 354 if (err != 0) 355 goto error; 356 } 357 modpath_bytes = PyUnicode_EncodeFSDefault(modpath); 358 if (modpath_bytes == NULL) 359 goto error; 360 mod = PyImport_ExecCodeModuleEx(fullname, code, 361 PyBytes_AS_STRING(modpath_bytes)); 362 Py_DECREF(modpath_bytes); 363 Py_CLEAR(code); 364 if (mod == NULL) 365 goto error; 366 367 if (Py_VerboseFlag) 368 PySys_FormatStderr("import %s # loaded from Zip %U\n", 369 fullname, modpath); 370 Py_DECREF(modpath); 371 return mod; 372error: 373 Py_XDECREF(code); 374 Py_XDECREF(modpath); 375 return NULL; 376} 377 378/* Return a string matching __file__ for the named module */ 379static PyObject * 380zipimporter_get_filename(PyObject *obj, PyObject *args) 381{ 382 ZipImporter *self = (ZipImporter *)obj; 383 PyObject *code; 384 char *fullname; 385 PyObject *modpath; 386 int ispackage; 387 388 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", 389 &fullname)) 390 return NULL; 391 392 /* Deciding the filename requires working out where the code 393 would come from if the module was actually loaded */ 394 code = get_module_code(self, fullname, &ispackage, &modpath); 395 if (code == NULL) 396 return NULL; 397 Py_DECREF(code); /* Only need the path info */ 398 399 return modpath; 400} 401 402/* Return a bool signifying whether the module is a package or not. */ 403static PyObject * 404zipimporter_is_package(PyObject *obj, PyObject *args) 405{ 406 ZipImporter *self = (ZipImporter *)obj; 407 char *fullname; 408 enum zi_module_info mi; 409 410 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package", 411 &fullname)) 412 return NULL; 413 414 mi = get_module_info(self, fullname); 415 if (mi == MI_ERROR) 416 return NULL; 417 if (mi == MI_NOT_FOUND) { 418 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 419 return NULL; 420 } 421 return PyBool_FromLong(mi == MI_PACKAGE); 422} 423 424static PyObject * 425zipimporter_get_data(PyObject *obj, PyObject *args) 426{ 427 ZipImporter *self = (ZipImporter *)obj; 428 PyObject *pathobj, *key; 429 const Py_UNICODE *path; 430#ifdef ALTSEP 431 Py_UNICODE *p, buf[MAXPATHLEN + 1]; 432#endif 433 Py_UNICODE *archive; 434 PyObject *toc_entry; 435 Py_ssize_t path_len, len; 436 437 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj)) 438 return NULL; 439 440 path_len = PyUnicode_GET_SIZE(pathobj); 441 path = PyUnicode_AS_UNICODE(pathobj); 442#ifdef ALTSEP 443 if (path_len >= MAXPATHLEN) { 444 PyErr_SetString(ZipImportError, "path too long"); 445 return NULL; 446 } 447 Py_UNICODE_strcpy(buf, path); 448 for (p = buf; *p; p++) { 449 if (*p == ALTSEP) 450 *p = SEP; 451 } 452 path = buf; 453#endif 454 archive = PyUnicode_AS_UNICODE(self->archive); 455 len = PyUnicode_GET_SIZE(self->archive); 456 if ((size_t)len < Py_UNICODE_strlen(path) && 457 Py_UNICODE_strncmp(path, archive, len) == 0 && 458 path[len] == SEP) { 459 path += len + 1; 460 path_len -= len + 1; 461 } 462 463 key = PyUnicode_FromUnicode(path, path_len); 464 if (key == NULL) 465 return NULL; 466 toc_entry = PyDict_GetItem(self->files, key); 467 if (toc_entry == NULL) { 468 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); 469 Py_DECREF(key); 470 return NULL; 471 } 472 Py_DECREF(key); 473 return get_data(self->archive, toc_entry); 474} 475 476static PyObject * 477zipimporter_get_code(PyObject *obj, PyObject *args) 478{ 479 ZipImporter *self = (ZipImporter *)obj; 480 char *fullname; 481 482 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname)) 483 return NULL; 484 485 return get_module_code(self, fullname, NULL, NULL); 486} 487 488static PyObject * 489zipimporter_get_source(PyObject *obj, PyObject *args) 490{ 491 ZipImporter *self = (ZipImporter *)obj; 492 PyObject *toc_entry; 493 char *fullname, *subname, path[MAXPATHLEN+1]; 494 int len; 495 enum zi_module_info mi; 496 497 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname)) 498 return NULL; 499 500 mi = get_module_info(self, fullname); 501 if (mi == MI_ERROR) 502 return NULL; 503 if (mi == MI_NOT_FOUND) { 504 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 505 return NULL; 506 } 507 subname = get_subname(fullname); 508 509 len = make_filename(self->prefix, subname, path, sizeof(path)); 510 if (len < 0) 511 return NULL; 512 513 if (mi == MI_PACKAGE) { 514 path[len] = SEP; 515 strcpy(path + len + 1, "__init__.py"); 516 } 517 else 518 strcpy(path + len, ".py"); 519 520 toc_entry = PyDict_GetItemString(self->files, path); 521 if (toc_entry != NULL) { 522 PyObject *res, *bytes; 523 bytes = get_data(self->archive, toc_entry); 524 if (bytes == NULL) 525 return NULL; 526 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), 527 PyBytes_GET_SIZE(bytes)); 528 Py_DECREF(bytes); 529 return res; 530 } 531 532 /* we have the module, but no source */ 533 Py_INCREF(Py_None); 534 return Py_None; 535} 536 537PyDoc_STRVAR(doc_find_module, 538"find_module(fullname, path=None) -> self or None.\n\ 539\n\ 540Search for a module specified by 'fullname'. 'fullname' must be the\n\ 541fully qualified (dotted) module name. It returns the zipimporter\n\ 542instance itself if the module was found, or None if it wasn't.\n\ 543The optional 'path' argument is ignored -- it's there for compatibility\n\ 544with the importer protocol."); 545 546PyDoc_STRVAR(doc_load_module, 547"load_module(fullname) -> module.\n\ 548\n\ 549Load the module specified by 'fullname'. 'fullname' must be the\n\ 550fully qualified (dotted) module name. It returns the imported\n\ 551module, or raises ZipImportError if it wasn't found."); 552 553PyDoc_STRVAR(doc_get_data, 554"get_data(pathname) -> string with file data.\n\ 555\n\ 556Return the data associated with 'pathname'. Raise IOError if\n\ 557the file wasn't found."); 558 559PyDoc_STRVAR(doc_is_package, 560"is_package(fullname) -> bool.\n\ 561\n\ 562Return True if the module specified by fullname is a package.\n\ 563Raise ZipImportError if the module couldn't be found."); 564 565PyDoc_STRVAR(doc_get_code, 566"get_code(fullname) -> code object.\n\ 567\n\ 568Return the code object for the specified module. Raise ZipImportError\n\ 569if the module couldn't be found."); 570 571PyDoc_STRVAR(doc_get_source, 572"get_source(fullname) -> source string.\n\ 573\n\ 574Return the source code for the specified module. Raise ZipImportError\n\ 575if the module couldn't be found, return None if the archive does\n\ 576contain the module, but has no source for it."); 577 578 579PyDoc_STRVAR(doc_get_filename, 580"get_filename(fullname) -> filename string.\n\ 581\n\ 582Return the filename for the specified module."); 583 584static PyMethodDef zipimporter_methods[] = { 585 {"find_module", zipimporter_find_module, METH_VARARGS, 586 doc_find_module}, 587 {"load_module", zipimporter_load_module, METH_VARARGS, 588 doc_load_module}, 589 {"get_data", zipimporter_get_data, METH_VARARGS, 590 doc_get_data}, 591 {"get_code", zipimporter_get_code, METH_VARARGS, 592 doc_get_code}, 593 {"get_source", zipimporter_get_source, METH_VARARGS, 594 doc_get_source}, 595 {"get_filename", zipimporter_get_filename, METH_VARARGS, 596 doc_get_filename}, 597 {"is_package", zipimporter_is_package, METH_VARARGS, 598 doc_is_package}, 599 {NULL, NULL} /* sentinel */ 600}; 601 602static PyMemberDef zipimporter_members[] = { 603 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY}, 604 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY}, 605 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY}, 606 {NULL} 607}; 608 609PyDoc_STRVAR(zipimporter_doc, 610"zipimporter(archivepath) -> zipimporter object\n\ 611\n\ 612Create a new zipimporter instance. 'archivepath' must be a path to\n\ 613a zipfile, or to a specific path inside a zipfile. For example, it can be\n\ 614'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\ 615valid directory inside the archive.\n\ 616\n\ 617'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\ 618archive.\n\ 619\n\ 620The 'archive' attribute of zipimporter objects contains the name of the\n\ 621zipfile targeted."); 622 623#define DEFERRED_ADDRESS(ADDR) 0 624 625static PyTypeObject ZipImporter_Type = { 626 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) 627 "zipimport.zipimporter", 628 sizeof(ZipImporter), 629 0, /* tp_itemsize */ 630 (destructor)zipimporter_dealloc, /* tp_dealloc */ 631 0, /* tp_print */ 632 0, /* tp_getattr */ 633 0, /* tp_setattr */ 634 0, /* tp_reserved */ 635 (reprfunc)zipimporter_repr, /* tp_repr */ 636 0, /* tp_as_number */ 637 0, /* tp_as_sequence */ 638 0, /* tp_as_mapping */ 639 0, /* tp_hash */ 640 0, /* tp_call */ 641 0, /* tp_str */ 642 PyObject_GenericGetAttr, /* tp_getattro */ 643 0, /* tp_setattro */ 644 0, /* tp_as_buffer */ 645 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 646 Py_TPFLAGS_HAVE_GC, /* tp_flags */ 647 zipimporter_doc, /* tp_doc */ 648 zipimporter_traverse, /* tp_traverse */ 649 0, /* tp_clear */ 650 0, /* tp_richcompare */ 651 0, /* tp_weaklistoffset */ 652 0, /* tp_iter */ 653 0, /* tp_iternext */ 654 zipimporter_methods, /* tp_methods */ 655 zipimporter_members, /* tp_members */ 656 0, /* tp_getset */ 657 0, /* tp_base */ 658 0, /* tp_dict */ 659 0, /* tp_descr_get */ 660 0, /* tp_descr_set */ 661 0, /* tp_dictoffset */ 662 (initproc)zipimporter_init, /* tp_init */ 663 PyType_GenericAlloc, /* tp_alloc */ 664 PyType_GenericNew, /* tp_new */ 665 PyObject_GC_Del, /* tp_free */ 666}; 667 668 669/* implementation */ 670 671/* Given a buffer, return the long that is represented by the first 672 4 bytes, encoded as little endian. This partially reimplements 673 marshal.c:r_long() */ 674static long 675get_long(unsigned char *buf) { 676 long x; 677 x = buf[0]; 678 x |= (long)buf[1] << 8; 679 x |= (long)buf[2] << 16; 680 x |= (long)buf[3] << 24; 681#if SIZEOF_LONG > 4 682 /* Sign extension for 64-bit machines */ 683 x |= -(x & 0x80000000L); 684#endif 685 return x; 686} 687 688/* 689 read_directory(archive) -> files dict (new reference) 690 691 Given a path to a Zip archive, build a dict, mapping file names 692 (local to the archive, using SEP as a separator) to toc entries. 693 694 A toc_entry is a tuple: 695 696 (__file__, # value to use for __file__, available for all files, 697 # encoded to the filesystem encoding 698 compress, # compression kind; 0 for uncompressed 699 data_size, # size of compressed data on disk 700 file_size, # size of decompressed data 701 file_offset, # offset of file header from start of archive 702 time, # mod time of file (in dos format) 703 date, # mod data of file (in dos format) 704 crc, # crc checksum of the data 705 ) 706 707 Directories can be recognized by the trailing SEP in the name, 708 data_size and file_offset are 0. 709*/ 710static PyObject * 711read_directory(PyObject *archive_obj) 712{ 713 /* FIXME: work on Py_UNICODE* instead of char* */ 714 PyObject *files = NULL; 715 FILE *fp; 716 unsigned short flags; 717 long compress, crc, data_size, file_size, file_offset, date, time; 718 long header_offset, name_size, header_size, header_position; 719 long i, l, count; 720 size_t length; 721 Py_UNICODE path[MAXPATHLEN + 5]; 722 char name[MAXPATHLEN + 5]; 723 PyObject *nameobj = NULL; 724 char *p, endof_central_dir[22]; 725 long arc_offset; /* offset from beginning of file to start of zip-archive */ 726 PyObject *pathobj; 727 const char *charset; 728 int bootstrap; 729 730 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { 731 PyErr_SetString(PyExc_OverflowError, 732 "Zip path name is too long"); 733 return NULL; 734 } 735 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj)); 736 737 fp = _Py_fopen(archive_obj, "rb"); 738 if (fp == NULL) { 739 if (!PyErr_Occurred()) 740 PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj); 741 return NULL; 742 } 743 fseek(fp, -22, SEEK_END); 744 header_position = ftell(fp); 745 if (fread(endof_central_dir, 1, 22, fp) != 22) { 746 fclose(fp); 747 PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj); 748 return NULL; 749 } 750 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { 751 /* Bad: End of Central Dir signature */ 752 fclose(fp); 753 PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj); 754 return NULL; 755 } 756 757 header_size = get_long((unsigned char *)endof_central_dir + 12); 758 header_offset = get_long((unsigned char *)endof_central_dir + 16); 759 arc_offset = header_position - header_offset - header_size; 760 header_offset += arc_offset; 761 762 files = PyDict_New(); 763 if (files == NULL) 764 goto error; 765 766 length = Py_UNICODE_strlen(path); 767 path[length] = SEP; 768 769 /* Start of Central Directory */ 770 count = 0; 771 for (;;) { 772 PyObject *t; 773 int err; 774 775 fseek(fp, header_offset, 0); /* Start of file header */ 776 l = PyMarshal_ReadLongFromFile(fp); 777 if (l != 0x02014B50) 778 break; /* Bad: Central Dir File Header */ 779 fseek(fp, header_offset + 8, 0); 780 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp); 781 compress = PyMarshal_ReadShortFromFile(fp); 782 time = PyMarshal_ReadShortFromFile(fp); 783 date = PyMarshal_ReadShortFromFile(fp); 784 crc = PyMarshal_ReadLongFromFile(fp); 785 data_size = PyMarshal_ReadLongFromFile(fp); 786 file_size = PyMarshal_ReadLongFromFile(fp); 787 name_size = PyMarshal_ReadShortFromFile(fp); 788 header_size = 46 + name_size + 789 PyMarshal_ReadShortFromFile(fp) + 790 PyMarshal_ReadShortFromFile(fp); 791 fseek(fp, header_offset + 42, 0); 792 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset; 793 if (name_size > MAXPATHLEN) 794 name_size = MAXPATHLEN; 795 796 p = name; 797 for (i = 0; i < name_size; i++) { 798 *p = (char)getc(fp); 799 if (*p == '/') 800 *p = SEP; 801 p++; 802 } 803 *p = 0; /* Add terminating null byte */ 804 header_offset += header_size; 805 806 bootstrap = 0; 807 if (flags & 0x0800) 808 charset = "utf-8"; 809 else if (!PyThreadState_GET()->interp->codecs_initialized) { 810 /* During bootstrap, we may need to load the encodings 811 package from a ZIP file. But the cp437 encoding is implemented 812 in Python in the encodings package. 813 814 Break out of this dependency by assuming that the path to 815 the encodings module is ASCII-only. */ 816 charset = "ascii"; 817 bootstrap = 1; 818 } 819 else 820 charset = "cp437"; 821 nameobj = PyUnicode_Decode(name, name_size, charset, NULL); 822 if (nameobj == NULL) { 823 if (bootstrap) 824 PyErr_Format(PyExc_NotImplementedError, 825 "bootstrap issue: python%i%i.zip contains non-ASCII " 826 "filenames without the unicode flag", 827 PY_MAJOR_VERSION, PY_MINOR_VERSION); 828 goto error; 829 } 830 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); 831 832 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path)); 833 if (pathobj == NULL) 834 goto error; 835 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size, 836 file_size, file_offset, time, date, crc); 837 if (t == NULL) 838 goto error; 839 err = PyDict_SetItem(files, nameobj, t); 840 Py_CLEAR(nameobj); 841 Py_DECREF(t); 842 if (err != 0) 843 goto error; 844 count++; 845 } 846 fclose(fp); 847 if (Py_VerboseFlag) 848 PySys_FormatStderr("# zipimport: found %ld names in %U\n", 849 count, archive_obj); 850 return files; 851error: 852 fclose(fp); 853 Py_XDECREF(files); 854 Py_XDECREF(nameobj); 855 return NULL; 856} 857 858/* Return the zlib.decompress function object, or NULL if zlib couldn't 859 be imported. The function is cached when found, so subsequent calls 860 don't import zlib again. */ 861static PyObject * 862get_decompress_func(void) 863{ 864 static int importing_zlib = 0; 865 PyObject *zlib; 866 PyObject *decompress; 867 868 if (importing_zlib != 0) 869 /* Someone has a zlib.py[co] in their Zip file; 870 let's avoid a stack overflow. */ 871 return NULL; 872 importing_zlib = 1; 873 zlib = PyImport_ImportModuleNoBlock("zlib"); 874 importing_zlib = 0; 875 if (zlib != NULL) { 876 decompress = PyObject_GetAttrString(zlib, 877 "decompress"); 878 Py_DECREF(zlib); 879 } 880 else { 881 PyErr_Clear(); 882 decompress = NULL; 883 } 884 if (Py_VerboseFlag) 885 PySys_WriteStderr("# zipimport: zlib %s\n", 886 zlib != NULL ? "available": "UNAVAILABLE"); 887 return decompress; 888} 889 890/* Given a path to a Zip file and a toc_entry, return the (uncompressed) 891 data as a new reference. */ 892static PyObject * 893get_data(PyObject *archive, PyObject *toc_entry) 894{ 895 PyObject *raw_data, *data = NULL, *decompress; 896 char *buf; 897 FILE *fp; 898 int err; 899 Py_ssize_t bytes_read = 0; 900 long l; 901 PyObject *datapath; 902 long compress, data_size, file_size, file_offset, bytes_size; 903 long time, date, crc; 904 905 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress, 906 &data_size, &file_size, &file_offset, &time, 907 &date, &crc)) { 908 return NULL; 909 } 910 911 fp = _Py_fopen(archive, "rb"); 912 if (!fp) { 913 if (!PyErr_Occurred()) 914 PyErr_Format(PyExc_IOError, 915 "zipimport: can not open file %U", archive); 916 return NULL; 917 } 918 919 /* Check to make sure the local file header is correct */ 920 fseek(fp, file_offset, 0); 921 l = PyMarshal_ReadLongFromFile(fp); 922 if (l != 0x04034B50) { 923 /* Bad: Local File Header */ 924 PyErr_Format(ZipImportError, 925 "bad local file header in %U", 926 archive); 927 fclose(fp); 928 return NULL; 929 } 930 fseek(fp, file_offset + 26, 0); 931 l = 30 + PyMarshal_ReadShortFromFile(fp) + 932 PyMarshal_ReadShortFromFile(fp); /* local header size */ 933 file_offset += l; /* Start of file data */ 934 935 bytes_size = compress == 0 ? data_size : data_size + 1; 936 if (bytes_size == 0) 937 bytes_size++; 938 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); 939 940 if (raw_data == NULL) { 941 fclose(fp); 942 return NULL; 943 } 944 buf = PyBytes_AsString(raw_data); 945 946 err = fseek(fp, file_offset, 0); 947 if (err == 0) 948 bytes_read = fread(buf, 1, data_size, fp); 949 fclose(fp); 950 if (err || bytes_read != data_size) { 951 PyErr_SetString(PyExc_IOError, 952 "zipimport: can't read data"); 953 Py_DECREF(raw_data); 954 return NULL; 955 } 956 957 if (compress != 0) { 958 buf[data_size] = 'Z'; /* saw this in zipfile.py */ 959 data_size++; 960 } 961 buf[data_size] = '\0'; 962 963 if (compress == 0) { /* data is not compressed */ 964 data = PyBytes_FromStringAndSize(buf, data_size); 965 Py_DECREF(raw_data); 966 return data; 967 } 968 969 /* Decompress with zlib */ 970 decompress = get_decompress_func(); 971 if (decompress == NULL) { 972 PyErr_SetString(ZipImportError, 973 "can't decompress data; " 974 "zlib not available"); 975 goto error; 976 } 977 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15); 978 Py_DECREF(decompress); 979error: 980 Py_DECREF(raw_data); 981 return data; 982} 983 984/* Lenient date/time comparison function. The precision of the mtime 985 in the archive is lower than the mtime stored in a .pyc: we 986 must allow a difference of at most one second. */ 987static int 988eq_mtime(time_t t1, time_t t2) 989{ 990 time_t d = t1 - t2; 991 if (d < 0) 992 d = -d; 993 /* dostime only stores even seconds, so be lenient */ 994 return d <= 1; 995} 996 997/* Given the contents of a .py[co] file in a buffer, unmarshal the data 998 and return the code object. Return None if it the magic word doesn't 999 match (we do this instead of raising an exception as we fall back 1000 to .py if available and we don't want to mask other errors). 1001 Returns a new reference. */ 1002static PyObject * 1003unmarshal_code(char *pathname, PyObject *data, time_t mtime) 1004{ 1005 PyObject *code; 1006 char *buf = PyBytes_AsString(data); 1007 Py_ssize_t size = PyBytes_Size(data); 1008 1009 if (size <= 9) { 1010 PyErr_SetString(ZipImportError, 1011 "bad pyc data"); 1012 return NULL; 1013 } 1014 1015 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) { 1016 if (Py_VerboseFlag) 1017 PySys_WriteStderr("# %s has bad magic\n", 1018 pathname); 1019 Py_INCREF(Py_None); 1020 return Py_None; /* signal caller to try alternative */ 1021 } 1022 1023 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), 1024 mtime)) { 1025 if (Py_VerboseFlag) 1026 PySys_WriteStderr("# %s has bad mtime\n", 1027 pathname); 1028 Py_INCREF(Py_None); 1029 return Py_None; /* signal caller to try alternative */ 1030 } 1031 1032 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8); 1033 if (code == NULL) 1034 return NULL; 1035 if (!PyCode_Check(code)) { 1036 Py_DECREF(code); 1037 PyErr_Format(PyExc_TypeError, 1038 "compiled module %s is not a code object", 1039 pathname); 1040 return NULL; 1041 } 1042 return code; 1043} 1044 1045/* Replace any occurances of "\r\n?" in the input string with "\n". 1046 This converts DOS and Mac line endings to Unix line endings. 1047 Also append a trailing "\n" to be compatible with 1048 PyParser_SimpleParseFile(). Returns a new reference. */ 1049static PyObject * 1050normalize_line_endings(PyObject *source) 1051{ 1052 char *buf, *q, *p = PyBytes_AsString(source); 1053 PyObject *fixed_source; 1054 int len = 0; 1055 1056 if (!p) { 1057 return PyBytes_FromStringAndSize("\n\0", 2); 1058 } 1059 1060 /* one char extra for trailing \n and one for terminating \0 */ 1061 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2); 1062 if (buf == NULL) { 1063 PyErr_SetString(PyExc_MemoryError, 1064 "zipimport: no memory to allocate " 1065 "source buffer"); 1066 return NULL; 1067 } 1068 /* replace "\r\n?" by "\n" */ 1069 for (q = buf; *p != '\0'; p++) { 1070 if (*p == '\r') { 1071 *q++ = '\n'; 1072 if (*(p + 1) == '\n') 1073 p++; 1074 } 1075 else 1076 *q++ = *p; 1077 len++; 1078 } 1079 *q++ = '\n'; /* add trailing \n */ 1080 *q = '\0'; 1081 fixed_source = PyBytes_FromStringAndSize(buf, len + 2); 1082 PyMem_Free(buf); 1083 return fixed_source; 1084} 1085 1086/* Given a string buffer containing Python source code, compile it 1087 return and return a code object as a new reference. */ 1088static PyObject * 1089compile_source(char *pathname, PyObject *source) 1090{ 1091 PyObject *code, *fixed_source; 1092 1093 fixed_source = normalize_line_endings(source); 1094 if (fixed_source == NULL) 1095 return NULL; 1096 1097 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname, 1098 Py_file_input); 1099 Py_DECREF(fixed_source); 1100 return code; 1101} 1102 1103/* Convert the date/time values found in the Zip archive to a value 1104 that's compatible with the time stamp stored in .pyc files. */ 1105static time_t 1106parse_dostime(int dostime, int dosdate) 1107{ 1108 struct tm stm; 1109 1110 memset((void *) &stm, '\0', sizeof(stm)); 1111 1112 stm.tm_sec = (dostime & 0x1f) * 2; 1113 stm.tm_min = (dostime >> 5) & 0x3f; 1114 stm.tm_hour = (dostime >> 11) & 0x1f; 1115 stm.tm_mday = dosdate & 0x1f; 1116 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1; 1117 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80; 1118 stm.tm_isdst = -1; /* wday/yday is ignored */ 1119 1120 return mktime(&stm); 1121} 1122 1123/* Given a path to a .pyc or .pyo file in the archive, return the 1124 modification time of the matching .py file, or 0 if no source 1125 is available. */ 1126static time_t 1127get_mtime_of_source(ZipImporter *self, char *path) 1128{ 1129 PyObject *toc_entry; 1130 time_t mtime = 0; 1131 Py_ssize_t lastchar = strlen(path) - 1; 1132 char savechar = path[lastchar]; 1133 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ 1134 toc_entry = PyDict_GetItemString(self->files, path); 1135 if (toc_entry != NULL && PyTuple_Check(toc_entry) && 1136 PyTuple_Size(toc_entry) == 8) { 1137 /* fetch the time stamp of the .py file for comparison 1138 with an embedded pyc time stamp */ 1139 int time, date; 1140 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); 1141 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); 1142 mtime = parse_dostime(time, date); 1143 } 1144 path[lastchar] = savechar; 1145 return mtime; 1146} 1147 1148/* Return the code object for the module named by 'fullname' from the 1149 Zip archive as a new reference. */ 1150static PyObject * 1151get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 1152 time_t mtime, PyObject *toc_entry) 1153{ 1154 PyObject *data, *code; 1155 PyObject *modpath; 1156 1157 data = get_data(self->archive, toc_entry); 1158 if (data == NULL) 1159 return NULL; 1160 1161 modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0)); 1162 if (modpath == NULL) { 1163 Py_DECREF(data); 1164 return NULL; 1165 } 1166 1167 if (isbytecode) 1168 code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime); 1169 else 1170 code = compile_source(PyBytes_AS_STRING(modpath), data); 1171 Py_DECREF(modpath); 1172 Py_DECREF(data); 1173 return code; 1174} 1175 1176/* Get the code object associated with the module specified by 1177 'fullname'. */ 1178static PyObject * 1179get_module_code(ZipImporter *self, char *fullname, 1180 int *p_ispackage, PyObject **p_modpath) 1181{ 1182 PyObject *toc_entry; 1183 char *subname, path[MAXPATHLEN + 1]; 1184 int len; 1185 struct st_zip_searchorder *zso; 1186 1187 subname = get_subname(fullname); 1188 1189 len = make_filename(self->prefix, subname, path, sizeof(path)); 1190 if (len < 0) 1191 return NULL; 1192 1193 for (zso = zip_searchorder; *zso->suffix; zso++) { 1194 PyObject *code = NULL; 1195 1196 strcpy(path + len, zso->suffix); 1197 if (Py_VerboseFlag > 1) 1198 PySys_FormatStderr("# trying %U%c%s\n", 1199 self->archive, (int)SEP, path); 1200 toc_entry = PyDict_GetItemString(self->files, path); 1201 if (toc_entry != NULL) { 1202 time_t mtime = 0; 1203 int ispackage = zso->type & IS_PACKAGE; 1204 int isbytecode = zso->type & IS_BYTECODE; 1205 1206 if (isbytecode) 1207 mtime = get_mtime_of_source(self, path); 1208 if (p_ispackage != NULL) 1209 *p_ispackage = ispackage; 1210 code = get_code_from_data(self, ispackage, 1211 isbytecode, mtime, 1212 toc_entry); 1213 if (code == Py_None) { 1214 /* bad magic number or non-matching mtime 1215 in byte code, try next */ 1216 Py_DECREF(code); 1217 continue; 1218 } 1219 if (code != NULL && p_modpath != NULL) { 1220 *p_modpath = PyTuple_GetItem(toc_entry, 0); 1221 Py_INCREF(*p_modpath); 1222 } 1223 return code; 1224 } 1225 } 1226 PyErr_Format(ZipImportError, "can't find module '%s'", fullname); 1227 return NULL; 1228} 1229 1230 1231/* Module init */ 1232 1233PyDoc_STRVAR(zipimport_doc, 1234"zipimport provides support for importing Python modules from Zip archives.\n\ 1235\n\ 1236This module exports three objects:\n\ 1237- zipimporter: a class; its constructor takes a path to a Zip archive.\n\ 1238- ZipImportError: exception raised by zipimporter objects. It's a\n\ 1239 subclass of ImportError, so it can be caught as ImportError, too.\n\ 1240- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ 1241 info dicts, as used in zipimporter._files.\n\ 1242\n\ 1243It is usually not needed to use the zipimport module explicitly; it is\n\ 1244used by the builtin import mechanism for sys.path items that are paths\n\ 1245to Zip archives."); 1246 1247static struct PyModuleDef zipimportmodule = { 1248 PyModuleDef_HEAD_INIT, 1249 "zipimport", 1250 zipimport_doc, 1251 -1, 1252 NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 NULL 1257}; 1258 1259PyMODINIT_FUNC 1260PyInit_zipimport(void) 1261{ 1262 PyObject *mod; 1263 1264 if (PyType_Ready(&ZipImporter_Type) < 0) 1265 return NULL; 1266 1267 /* Correct directory separator */ 1268 zip_searchorder[0].suffix[0] = SEP; 1269 zip_searchorder[1].suffix[0] = SEP; 1270 zip_searchorder[2].suffix[0] = SEP; 1271 if (Py_OptimizeFlag) { 1272 /* Reverse *.pyc and *.pyo */ 1273 struct st_zip_searchorder tmp; 1274 tmp = zip_searchorder[0]; 1275 zip_searchorder[0] = zip_searchorder[1]; 1276 zip_searchorder[1] = tmp; 1277 tmp = zip_searchorder[3]; 1278 zip_searchorder[3] = zip_searchorder[4]; 1279 zip_searchorder[4] = tmp; 1280 } 1281 1282 mod = PyModule_Create(&zipimportmodule); 1283 if (mod == NULL) 1284 return NULL; 1285 1286 ZipImportError = PyErr_NewException("zipimport.ZipImportError", 1287 PyExc_ImportError, NULL); 1288 if (ZipImportError == NULL) 1289 return NULL; 1290 1291 Py_INCREF(ZipImportError); 1292 if (PyModule_AddObject(mod, "ZipImportError", 1293 ZipImportError) < 0) 1294 return NULL; 1295 1296 Py_INCREF(&ZipImporter_Type); 1297 if (PyModule_AddObject(mod, "zipimporter", 1298 (PyObject *)&ZipImporter_Type) < 0) 1299 return NULL; 1300 1301 zip_directory_cache = PyDict_New(); 1302 if (zip_directory_cache == NULL) 1303 return NULL; 1304 Py_INCREF(zip_directory_cache); 1305 if (PyModule_AddObject(mod, "_zip_directory_cache", 1306 zip_directory_cache) < 0) 1307 return NULL; 1308 return mod; 1309} 1310