zipimport.c revision ced27516f5ff24d91382b5e9fb513f0682617a2e
1#include "Python.h" 2#include "structmember.h" 3#include "osdefs.h" 4#include "marshal.h" 5#include <time.h> 6 7 8#define IS_SOURCE 0x0 9#define IS_BYTECODE 0x1 10#define IS_PACKAGE 0x2 11 12struct st_zip_searchorder { 13 char suffix[14]; 14 int type; 15}; 16 17/* zip_searchorder defines how we search for a module in the Zip 18 archive: we first search for a package __init__, then for 19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries 20 are swapped by initzipimport() if we run in optimized mode. Also, 21 '/' is replaced by SEP there. */ 22static struct st_zip_searchorder zip_searchorder[] = { 23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE}, 24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE}, 25 {"/__init__.py", IS_PACKAGE | IS_SOURCE}, 26 {".pyc", IS_BYTECODE}, 27 {".pyo", IS_BYTECODE}, 28 {".py", IS_SOURCE}, 29 {"", 0} 30}; 31 32/* zipimporter object definition and support */ 33 34typedef struct _zipimporter ZipImporter; 35 36struct _zipimporter { 37 PyObject_HEAD 38 PyObject *archive; /* pathname of the Zip archive */ 39 PyObject *prefix; /* file prefix: "a/sub/directory/" */ 40 PyObject *files; /* dict with file info {path: toc_entry} */ 41}; 42 43static PyObject *ZipImportError; 44static PyObject *zip_directory_cache = NULL; 45 46// GOOGLE(nanzhang): Changed two functions below to be visible to launcher so 47// that launcher can access the zip metadata section. 48/* forward decls */ 49PyObject *read_directory(const char *archive); 50PyObject *get_data(const char *archive, PyObject *toc_entry); 51static PyObject *get_module_code(ZipImporter *self, char *fullname, 52 int *p_ispackage, char **p_modpath); 53 54 55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) 56 57 58/* zipimporter.__init__ 59 Split the "subdirectory" from the Zip archive path, lookup a matching 60 entry in sys.path_importer_cache, fetch the file directory from there 61 if found, or else read it from the archive. */ 62static int 63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) 64{ 65 char *path, *p, *prefix, buf[MAXPATHLEN+2]; 66 size_t len; 67 68 if (!_PyArg_NoKeywords("zipimporter()", kwds)) 69 return -1; 70 71 if (!PyArg_ParseTuple(args, "s:zipimporter", 72 &path)) 73 return -1; 74 75 len = strlen(path); 76 if (len == 0) { 77 PyErr_SetString(ZipImportError, "archive path is empty"); 78 return -1; 79 } 80 if (len >= MAXPATHLEN) { 81 PyErr_SetString(ZipImportError, 82 "archive path too long"); 83 return -1; 84 } 85 strcpy(buf, path); 86 87#ifdef ALTSEP 88 for (p = buf; *p; p++) { 89 if (*p == ALTSEP) 90 *p = SEP; 91 } 92#endif 93 94 path = NULL; 95 prefix = NULL; 96 for (;;) { 97#ifndef RISCOS 98 struct stat statbuf; 99 int rv; 100 101 rv = stat(buf, &statbuf); 102 if (rv == 0) { 103 /* it exists */ 104 if (S_ISREG(statbuf.st_mode)) 105 /* it's a file */ 106 path = buf; 107 break; 108 } 109#else 110 if (object_exists(buf)) { 111 /* it exists */ 112 if (isfile(buf)) 113 /* it's a file */ 114 path = buf; 115 break; 116 } 117#endif 118 /* back up one path element */ 119 p = strrchr(buf, SEP); 120 if (prefix != NULL) 121 *prefix = SEP; 122 if (p == NULL) 123 break; 124 *p = '\0'; 125 prefix = p; 126 } 127 if (path != NULL) { 128 PyObject *files; 129 files = PyDict_GetItemString(zip_directory_cache, path); 130 if (files == NULL) { 131 files = read_directory(buf); 132 if (files == NULL) 133 return -1; 134 if (PyDict_SetItemString(zip_directory_cache, path, 135 files) != 0) 136 return -1; 137 } 138 else 139 Py_INCREF(files); 140 self->files = files; 141 } 142 else { 143 PyErr_SetString(ZipImportError, "not a Zip file"); 144 return -1; 145 } 146 147 if (prefix == NULL) 148 prefix = ""; 149 else { 150 prefix++; 151 len = strlen(prefix); 152 if (prefix[len-1] != SEP) { 153 /* add trailing SEP */ 154 prefix[len] = SEP; 155 prefix[len + 1] = '\0'; 156 } 157 } 158 159 self->archive = PyString_FromString(buf); 160 if (self->archive == NULL) 161 return -1; 162 163 self->prefix = PyString_FromString(prefix); 164 if (self->prefix == NULL) 165 return -1; 166 167 return 0; 168} 169 170/* GC support. */ 171static int 172zipimporter_traverse(PyObject *obj, visitproc visit, void *arg) 173{ 174 ZipImporter *self = (ZipImporter *)obj; 175 Py_VISIT(self->files); 176 return 0; 177} 178 179static void 180zipimporter_dealloc(ZipImporter *self) 181{ 182 PyObject_GC_UnTrack(self); 183 Py_XDECREF(self->archive); 184 Py_XDECREF(self->prefix); 185 Py_XDECREF(self->files); 186 Py_TYPE(self)->tp_free((PyObject *)self); 187} 188 189static PyObject * 190zipimporter_repr(ZipImporter *self) 191{ 192 char buf[500]; 193 char *archive = "???"; 194 char *prefix = ""; 195 196 if (self->archive != NULL && PyString_Check(self->archive)) 197 archive = PyString_AsString(self->archive); 198 if (self->prefix != NULL && PyString_Check(self->prefix)) 199 prefix = PyString_AsString(self->prefix); 200 if (prefix != NULL && *prefix) 201 PyOS_snprintf(buf, sizeof(buf), 202 "<zipimporter object \"%.300s%c%.150s\">", 203 archive, SEP, prefix); 204 else 205 PyOS_snprintf(buf, sizeof(buf), 206 "<zipimporter object \"%.300s\">", 207 archive); 208 return PyString_FromString(buf); 209} 210 211/* return fullname.split(".")[-1] */ 212static char * 213get_subname(char *fullname) 214{ 215 char *subname = strrchr(fullname, '.'); 216 if (subname == NULL) 217 subname = fullname; 218 else 219 subname++; 220 return subname; 221} 222 223/* Given a (sub)modulename, write the potential file path in the 224 archive (without extension) to the path buffer. Return the 225 length of the resulting string. */ 226static int 227make_filename(char *prefix, char *name, char *path) 228{ 229 size_t len; 230 char *p; 231 232 len = strlen(prefix); 233 234 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ 235 if (len + strlen(name) + 13 >= MAXPATHLEN) { 236 PyErr_SetString(ZipImportError, "path too long"); 237 return -1; 238 } 239 240 strcpy(path, prefix); 241 strcpy(path + len, name); 242 for (p = path + len; *p; p++) { 243 if (*p == '.') 244 *p = SEP; 245 } 246 len += strlen(name); 247 assert(len < INT_MAX); 248 return (int)len; 249} 250 251enum zi_module_info { 252 MI_ERROR, 253 MI_NOT_FOUND, 254 MI_MODULE, 255 MI_PACKAGE 256}; 257 258/* Return some information about a module. */ 259static enum zi_module_info 260get_module_info(ZipImporter *self, char *fullname) 261{ 262 char *subname, path[MAXPATHLEN + 1]; 263 int len; 264 struct st_zip_searchorder *zso; 265 266 subname = get_subname(fullname); 267 268 len = make_filename(PyString_AsString(self->prefix), subname, path); 269 if (len < 0) 270 return MI_ERROR; 271 272 for (zso = zip_searchorder; *zso->suffix; zso++) { 273 strcpy(path + len, zso->suffix); 274 if (PyDict_GetItemString(self->files, path) != NULL) { 275 if (zso->type & IS_PACKAGE) 276 return MI_PACKAGE; 277 else 278 return MI_MODULE; 279 } 280 } 281 return MI_NOT_FOUND; 282} 283 284/* Check whether we can satisfy the import of the module named by 285 'fullname'. Return self if we can, None if we can't. */ 286static PyObject * 287zipimporter_find_module(PyObject *obj, PyObject *args) 288{ 289 ZipImporter *self = (ZipImporter *)obj; 290 PyObject *path = NULL; 291 char *fullname; 292 enum zi_module_info mi; 293 294 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module", 295 &fullname, &path)) 296 return NULL; 297 298 mi = get_module_info(self, fullname); 299 if (mi == MI_ERROR) 300 return NULL; 301 if (mi == MI_NOT_FOUND) { 302 Py_INCREF(Py_None); 303 return Py_None; 304 } 305 Py_INCREF(self); 306 return (PyObject *)self; 307} 308 309/* Load and return the module named by 'fullname'. */ 310static PyObject * 311zipimporter_load_module(PyObject *obj, PyObject *args) 312{ 313 ZipImporter *self = (ZipImporter *)obj; 314 PyObject *code, *mod, *dict; 315 char *fullname, *modpath; 316 int ispackage; 317 318 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", 319 &fullname)) 320 return NULL; 321 322 code = get_module_code(self, fullname, &ispackage, &modpath); 323 if (code == NULL) 324 return NULL; 325 326 mod = PyImport_AddModule(fullname); 327 if (mod == NULL) { 328 Py_DECREF(code); 329 return NULL; 330 } 331 dict = PyModule_GetDict(mod); 332 333 /* mod.__loader__ = self */ 334 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0) 335 goto error; 336 337 if (ispackage) { 338 /* add __path__ to the module *before* the code gets 339 executed */ 340 PyObject *pkgpath, *fullpath; 341 char *prefix = PyString_AsString(self->prefix); 342 char *subname = get_subname(fullname); 343 int err; 344 345 fullpath = PyString_FromFormat("%s%c%s%s", 346 PyString_AsString(self->archive), 347 SEP, 348 *prefix ? prefix : "", 349 subname); 350 if (fullpath == NULL) 351 goto error; 352 353 pkgpath = Py_BuildValue("[O]", fullpath); 354 Py_DECREF(fullpath); 355 if (pkgpath == NULL) 356 goto error; 357 err = PyDict_SetItemString(dict, "__path__", pkgpath); 358 Py_DECREF(pkgpath); 359 if (err != 0) 360 goto error; 361 } 362 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath); 363 Py_DECREF(code); 364 if (Py_VerboseFlag) 365 PySys_WriteStderr("import %s # loaded from Zip %s\n", 366 fullname, modpath); 367 return mod; 368error: 369 Py_DECREF(code); 370 Py_DECREF(mod); 371 return NULL; 372} 373 374/* Return a string matching __file__ for the named module */ 375static PyObject * 376zipimporter_get_filename(PyObject *obj, PyObject *args) 377{ 378 ZipImporter *self = (ZipImporter *)obj; 379 PyObject *code; 380 char *fullname, *modpath; 381 int ispackage; 382 383 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", 384 &fullname)) 385 return NULL; 386 387 /* Deciding the filename requires working out where the code 388 would come from if the module was actually loaded */ 389 code = get_module_code(self, fullname, &ispackage, &modpath); 390 if (code == NULL) 391 return NULL; 392 Py_DECREF(code); /* Only need the path info */ 393 394 return PyString_FromString(modpath); 395} 396 397/* Return a bool signifying whether the module is a package or not. */ 398static PyObject * 399zipimporter_is_package(PyObject *obj, PyObject *args) 400{ 401 ZipImporter *self = (ZipImporter *)obj; 402 char *fullname; 403 enum zi_module_info mi; 404 405 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package", 406 &fullname)) 407 return NULL; 408 409 mi = get_module_info(self, fullname); 410 if (mi == MI_ERROR) 411 return NULL; 412 if (mi == MI_NOT_FOUND) { 413 PyErr_Format(ZipImportError, "can't find module '%.200s'", 414 fullname); 415 return NULL; 416 } 417 return PyBool_FromLong(mi == MI_PACKAGE); 418} 419 420static PyObject * 421zipimporter_get_data(PyObject *obj, PyObject *args) 422{ 423 ZipImporter *self = (ZipImporter *)obj; 424 char *path; 425#ifdef ALTSEP 426 char *p, buf[MAXPATHLEN + 1]; 427#endif 428 PyObject *toc_entry; 429 Py_ssize_t len; 430 431 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path)) 432 return NULL; 433 434#ifdef ALTSEP 435 if (strlen(path) >= MAXPATHLEN) { 436 PyErr_SetString(ZipImportError, "path too long"); 437 return NULL; 438 } 439 strcpy(buf, path); 440 for (p = buf; *p; p++) { 441 if (*p == ALTSEP) 442 *p = SEP; 443 } 444 path = buf; 445#endif 446 len = PyString_Size(self->archive); 447 if ((size_t)len < strlen(path) && 448 strncmp(path, PyString_AsString(self->archive), len) == 0 && 449 path[len] == SEP) { 450 path = path + len + 1; 451 } 452 453 toc_entry = PyDict_GetItemString(self->files, path); 454 if (toc_entry == NULL) { 455 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path); 456 return NULL; 457 } 458 return get_data(PyString_AsString(self->archive), toc_entry); 459} 460 461static PyObject * 462zipimporter_get_code(PyObject *obj, PyObject *args) 463{ 464 ZipImporter *self = (ZipImporter *)obj; 465 char *fullname; 466 467 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname)) 468 return NULL; 469 470 return get_module_code(self, fullname, NULL, NULL); 471} 472 473static PyObject * 474zipimporter_get_source(PyObject *obj, PyObject *args) 475{ 476 ZipImporter *self = (ZipImporter *)obj; 477 PyObject *toc_entry; 478 char *fullname, *subname, path[MAXPATHLEN+1]; 479 int len; 480 enum zi_module_info mi; 481 482 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname)) 483 return NULL; 484 485 mi = get_module_info(self, fullname); 486 if (mi == MI_ERROR) 487 return NULL; 488 if (mi == MI_NOT_FOUND) { 489 PyErr_Format(ZipImportError, "can't find module '%.200s'", 490 fullname); 491 return NULL; 492 } 493 subname = get_subname(fullname); 494 495 len = make_filename(PyString_AsString(self->prefix), subname, path); 496 if (len < 0) 497 return NULL; 498 499 if (mi == MI_PACKAGE) { 500 path[len] = SEP; 501 strcpy(path + len + 1, "__init__.py"); 502 } 503 else 504 strcpy(path + len, ".py"); 505 506 toc_entry = PyDict_GetItemString(self->files, path); 507 if (toc_entry != NULL) 508 return get_data(PyString_AsString(self->archive), toc_entry); 509 510 /* we have the module, but no source */ 511 Py_INCREF(Py_None); 512 return Py_None; 513} 514 515PyDoc_STRVAR(doc_find_module, 516"find_module(fullname, path=None) -> self or None.\n\ 517\n\ 518Search for a module specified by 'fullname'. 'fullname' must be the\n\ 519fully qualified (dotted) module name. It returns the zipimporter\n\ 520instance itself if the module was found, or None if it wasn't.\n\ 521The optional 'path' argument is ignored -- it's there for compatibility\n\ 522with the importer protocol."); 523 524PyDoc_STRVAR(doc_load_module, 525"load_module(fullname) -> module.\n\ 526\n\ 527Load the module specified by 'fullname'. 'fullname' must be the\n\ 528fully qualified (dotted) module name. It returns the imported\n\ 529module, or raises ZipImportError if it wasn't found."); 530 531PyDoc_STRVAR(doc_get_data, 532"get_data(pathname) -> string with file data.\n\ 533\n\ 534Return the data associated with 'pathname'. Raise IOError if\n\ 535the file wasn't found."); 536 537PyDoc_STRVAR(doc_is_package, 538"is_package(fullname) -> bool.\n\ 539\n\ 540Return True if the module specified by fullname is a package.\n\ 541Raise ZipImportError if the module couldn't be found."); 542 543PyDoc_STRVAR(doc_get_code, 544"get_code(fullname) -> code object.\n\ 545\n\ 546Return the code object for the specified module. Raise ZipImportError\n\ 547if the module couldn't be found."); 548 549PyDoc_STRVAR(doc_get_source, 550"get_source(fullname) -> source string.\n\ 551\n\ 552Return the source code for the specified module. Raise ZipImportError\n\ 553if the module couldn't be found, return None if the archive does\n\ 554contain the module, but has no source for it."); 555 556 557PyDoc_STRVAR(doc_get_filename, 558"get_filename(fullname) -> filename string.\n\ 559\n\ 560Return the filename for the specified module."); 561 562static PyMethodDef zipimporter_methods[] = { 563 {"find_module", zipimporter_find_module, METH_VARARGS, 564 doc_find_module}, 565 {"load_module", zipimporter_load_module, METH_VARARGS, 566 doc_load_module}, 567 {"get_data", zipimporter_get_data, METH_VARARGS, 568 doc_get_data}, 569 {"get_code", zipimporter_get_code, METH_VARARGS, 570 doc_get_code}, 571 {"get_source", zipimporter_get_source, METH_VARARGS, 572 doc_get_source}, 573 {"get_filename", zipimporter_get_filename, METH_VARARGS, 574 doc_get_filename}, 575 {"is_package", zipimporter_is_package, METH_VARARGS, 576 doc_is_package}, 577 {NULL, NULL} /* sentinel */ 578}; 579 580static PyMemberDef zipimporter_members[] = { 581 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY}, 582 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY}, 583 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY}, 584 {NULL} 585}; 586 587PyDoc_STRVAR(zipimporter_doc, 588"zipimporter(archivepath) -> zipimporter object\n\ 589\n\ 590Create a new zipimporter instance. 'archivepath' must be a path to\n\ 591a zipfile, or to a specific path inside a zipfile. For example, it can be\n\ 592'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\ 593valid directory inside the archive.\n\ 594\n\ 595'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\ 596archive.\n\ 597\n\ 598The 'archive' attribute of zipimporter objects contains the name of the\n\ 599zipfile targeted."); 600 601#define DEFERRED_ADDRESS(ADDR) 0 602 603static PyTypeObject ZipImporter_Type = { 604 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) 605 "zipimport.zipimporter", 606 sizeof(ZipImporter), 607 0, /* tp_itemsize */ 608 (destructor)zipimporter_dealloc, /* tp_dealloc */ 609 0, /* tp_print */ 610 0, /* tp_getattr */ 611 0, /* tp_setattr */ 612 0, /* tp_compare */ 613 (reprfunc)zipimporter_repr, /* tp_repr */ 614 0, /* tp_as_number */ 615 0, /* tp_as_sequence */ 616 0, /* tp_as_mapping */ 617 0, /* tp_hash */ 618 0, /* tp_call */ 619 0, /* tp_str */ 620 PyObject_GenericGetAttr, /* tp_getattro */ 621 0, /* tp_setattro */ 622 0, /* tp_as_buffer */ 623 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 624 Py_TPFLAGS_HAVE_GC, /* tp_flags */ 625 zipimporter_doc, /* tp_doc */ 626 zipimporter_traverse, /* tp_traverse */ 627 0, /* tp_clear */ 628 0, /* tp_richcompare */ 629 0, /* tp_weaklistoffset */ 630 0, /* tp_iter */ 631 0, /* tp_iternext */ 632 zipimporter_methods, /* tp_methods */ 633 zipimporter_members, /* tp_members */ 634 0, /* tp_getset */ 635 0, /* tp_base */ 636 0, /* tp_dict */ 637 0, /* tp_descr_get */ 638 0, /* tp_descr_set */ 639 0, /* tp_dictoffset */ 640 (initproc)zipimporter_init, /* tp_init */ 641 PyType_GenericAlloc, /* tp_alloc */ 642 PyType_GenericNew, /* tp_new */ 643 PyObject_GC_Del, /* tp_free */ 644}; 645 646 647/* implementation */ 648 649/* Given a buffer, return the unsigned int that is represented by the first 650 4 bytes, encoded as little endian. This partially reimplements 651 marshal.c:r_long() */ 652static unsigned int 653get_uint32(const unsigned char *buf) 654{ 655 unsigned int x; 656 x = buf[0]; 657 x |= (unsigned int)buf[1] << 8; 658 x |= (unsigned int)buf[2] << 16; 659 x |= (unsigned int)buf[3] << 24; 660 return x; 661} 662 663/* Given a buffer, return the unsigned int that is represented by the first 664 2 bytes, encoded as little endian. This partially reimplements 665 marshal.c:r_short() */ 666static unsigned short 667get_uint16(const unsigned char *buf) 668{ 669 unsigned short x; 670 x = buf[0]; 671 x |= (unsigned short)buf[1] << 8; 672 return x; 673} 674 675static void 676set_file_error(const char *archive, int eof) 677{ 678 if (eof) { 679 PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); 680 } 681 else { 682 PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive); 683 } 684} 685 686/* 687 read_directory(archive) -> files dict (new reference) 688 689 Given a path to a Zip archive, build a dict, mapping file names 690 (local to the archive, using SEP as a separator) to toc entries. 691 692 A toc_entry is a tuple: 693 694 (__file__, # value to use for __file__, available for all files 695 compress, # compression kind; 0 for uncompressed 696 data_size, # size of compressed data on disk 697 file_size, # size of decompressed data 698 file_offset, # offset of file header from start of archive 699 time, # mod time of file (in dos format) 700 date, # mod data of file (in dos format) 701 crc, # crc checksum of the data 702 ) 703 704 Directories can be recognized by the trailing SEP in the name, 705 data_size and file_offset are 0. 706*/ 707PyObject * 708read_directory(const char *archive) 709{ 710 PyObject *files = NULL; 711 FILE *fp; 712 unsigned short compress, time, date, name_size; 713 unsigned int crc, data_size, file_size, header_size, header_offset; 714 unsigned long file_offset, header_position; 715 unsigned long arc_offset; /* Absolute offset to start of the zip-archive. */ 716 unsigned int count, i; 717 unsigned char buffer[46]; 718 size_t length; 719 char path[MAXPATHLEN + 5]; 720 char name[MAXPATHLEN + 5]; 721 const char *errmsg = NULL; 722 723 if (strlen(archive) > MAXPATHLEN) { 724 PyErr_SetString(PyExc_OverflowError, 725 "Zip path name is too long"); 726 return NULL; 727 } 728 strcpy(path, archive); 729 730 fp = fopen(archive, "rb"); 731 if (fp == NULL) { 732 PyErr_Format(ZipImportError, "can't open Zip file: " 733 "'%.200s'", archive); 734 return NULL; 735 } 736 737 if (fseek(fp, -22, SEEK_END) == -1) { 738 goto file_error; 739 } 740 header_position = (unsigned long)ftell(fp); 741 if (header_position == (unsigned long)-1) { 742 goto file_error; 743 } 744 assert(header_position <= (unsigned long)LONG_MAX); 745 if (fread(buffer, 1, 22, fp) != 22) { 746 goto file_error; 747 } 748 if (get_uint32(buffer) != 0x06054B50u) { 749 /* Bad: End of Central Dir signature */ 750 errmsg = "not a Zip file"; 751 goto invalid_header; 752 } 753 754 header_size = get_uint32(buffer + 12); 755 header_offset = get_uint32(buffer + 16); 756 if (header_position < header_size) { 757 errmsg = "bad central directory size"; 758 goto invalid_header; 759 } 760 if (header_position < header_offset) { 761 errmsg = "bad central directory offset"; 762 goto invalid_header; 763 } 764 if (header_position - header_size < header_offset) { 765 errmsg = "bad central directory size or offset"; 766 goto invalid_header; 767 } 768 header_position -= header_size; 769 arc_offset = header_position - header_offset; 770 771 files = PyDict_New(); 772 if (files == NULL) { 773 goto error; 774 } 775 776 length = (long)strlen(path); 777 path[length] = SEP; 778 779 /* Start of Central Directory */ 780 count = 0; 781 if (fseek(fp, (long)header_position, 0) == -1) { 782 goto file_error; 783 } 784 for (;;) { 785 PyObject *t; 786 size_t n; 787 int err; 788 789 n = fread(buffer, 1, 46, fp); 790 if (n < 4) { 791 goto eof_error; 792 } 793 /* Start of file header */ 794 if (get_uint32(buffer) != 0x02014B50u) { 795 break; /* Bad: Central Dir File Header */ 796 } 797 if (n != 46) { 798 goto eof_error; 799 } 800 compress = get_uint16(buffer + 10); 801 time = get_uint16(buffer + 12); 802 date = get_uint16(buffer + 14); 803 crc = get_uint32(buffer + 16); 804 data_size = get_uint32(buffer + 20); 805 file_size = get_uint32(buffer + 24); 806 name_size = get_uint16(buffer + 28); 807 header_size = (unsigned int)name_size + 808 get_uint16(buffer + 30) /* extra field */ + 809 get_uint16(buffer + 32) /* comment */; 810 811 file_offset = get_uint32(buffer + 42); 812 if (file_offset > header_offset) { 813 errmsg = "bad local header offset"; 814 goto invalid_header; 815 } 816 file_offset += arc_offset; 817 818 if (name_size > MAXPATHLEN) { 819 name_size = MAXPATHLEN; 820 } 821 if (fread(name, 1, name_size, fp) != name_size) { 822 goto file_error; 823 } 824 name[name_size] = '\0'; /* Add terminating null byte */ 825 if (SEP != '/') { 826 for (i = 0; i < name_size; i++) { 827 if (name[i] == '/') { 828 name[i] = SEP; 829 } 830 } 831 } 832 /* Skip the rest of the header. 833 * On Windows, calling fseek to skip over the fields we don't use is 834 * slower than reading the data because fseek flushes stdio's 835 * internal buffers. See issue #8745. */ 836 assert(header_size <= 3*0xFFFFu); 837 for (i = name_size; i < header_size; i++) { 838 if (getc(fp) == EOF) { 839 goto file_error; 840 } 841 } 842 843 strncpy(path + length + 1, name, MAXPATHLEN - length - 1); 844 845 t = Py_BuildValue("sHIIkHHI", path, compress, data_size, 846 file_size, file_offset, time, date, crc); 847 if (t == NULL) { 848 goto error; 849 } 850 err = PyDict_SetItemString(files, name, t); 851 Py_DECREF(t); 852 if (err != 0) { 853 goto error; 854 } 855 count++; 856 } 857 fclose(fp); 858 if (Py_VerboseFlag) { 859 PySys_WriteStderr("# zipimport: found %u names in %.200s\n", 860 count, archive); 861 } 862 return files; 863 864eof_error: 865 set_file_error(archive, !ferror(fp)); 866 goto error; 867 868file_error: 869 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive); 870 goto error; 871 872invalid_header: 873 assert(errmsg != NULL); 874 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive); 875 goto error; 876 877error: 878 fclose(fp); 879 Py_XDECREF(files); 880 return NULL; 881} 882 883/* Return the zlib.decompress function object, or NULL if zlib couldn't 884 be imported. The function is cached when found, so subsequent calls 885 don't import zlib again. */ 886static PyObject * 887get_decompress_func(void) 888{ 889 static int importing_zlib = 0; 890 PyObject *zlib; 891 PyObject *decompress; 892 893 if (importing_zlib != 0) 894 /* Someone has a zlib.py[co] in their Zip file; 895 let's avoid a stack overflow. */ 896 return NULL; 897 importing_zlib = 1; 898 zlib = PyImport_ImportModuleNoBlock("zlib"); 899 importing_zlib = 0; 900 if (zlib != NULL) { 901 decompress = PyObject_GetAttrString(zlib, 902 "decompress"); 903 Py_DECREF(zlib); 904 } 905 else { 906 PyErr_Clear(); 907 decompress = NULL; 908 } 909 if (Py_VerboseFlag) 910 PySys_WriteStderr("# zipimport: zlib %s\n", 911 zlib != NULL ? "available": "UNAVAILABLE"); 912 return decompress; 913} 914 915/* Given a path to a Zip file and a toc_entry, return the (uncompressed) 916 data as a new reference. */ 917PyObject * 918get_data(const char *archive, PyObject *toc_entry) 919{ 920 PyObject *raw_data = NULL, *data, *decompress; 921 char *buf; 922 FILE *fp; 923 const char *datapath; 924 unsigned short compress, time, date; 925 unsigned int crc; 926 Py_ssize_t data_size, file_size; 927 long file_offset, header_size; 928 unsigned char buffer[30]; 929 const char *errmsg = NULL; 930 931 if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress, 932 &data_size, &file_size, &file_offset, &time, 933 &date, &crc)) { 934 return NULL; 935 } 936 if (data_size < 0) { 937 PyErr_Format(ZipImportError, "negative data size"); 938 return NULL; 939 } 940 941 fp = fopen(archive, "rb"); 942 if (!fp) { 943 PyErr_Format(PyExc_IOError, 944 "zipimport: can not open file %s", archive); 945 return NULL; 946 } 947 948 /* Check to make sure the local file header is correct */ 949 if (fseek(fp, file_offset, 0) == -1) { 950 goto file_error; 951 } 952 if (fread(buffer, 1, 30, fp) != 30) { 953 goto eof_error; 954 } 955 if (get_uint32(buffer) != 0x04034B50u) { 956 /* Bad: Local File Header */ 957 errmsg = "bad local file header"; 958 goto invalid_header; 959 } 960 961 header_size = (unsigned int)30 + 962 get_uint16(buffer + 26) /* file name */ + 963 get_uint16(buffer + 28) /* extra field */; 964 if (file_offset > LONG_MAX - header_size) { 965 errmsg = "bad local file header size"; 966 goto invalid_header; 967 } 968 file_offset += header_size; /* Start of file data */ 969 970 if (data_size > LONG_MAX - 1) { 971 fclose(fp); 972 PyErr_NoMemory(); 973 return NULL; 974 } 975 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ? 976 data_size : data_size + 1); 977 978 if (raw_data == NULL) { 979 goto error; 980 } 981 buf = PyString_AsString(raw_data); 982 983 if (fseek(fp, file_offset, 0) == -1) { 984 goto file_error; 985 } 986 if (fread(buf, 1, data_size, fp) != (size_t)data_size) { 987 PyErr_SetString(PyExc_IOError, 988 "zipimport: can't read data"); 989 goto error; 990 } 991 992 fclose(fp); 993 fp = NULL; 994 995 if (compress != 0) { 996 buf[data_size] = 'Z'; /* saw this in zipfile.py */ 997 data_size++; 998 } 999 buf[data_size] = '\0'; 1000 1001 if (compress == 0) /* data is not compressed */ 1002 return raw_data; 1003 1004 /* Decompress with zlib */ 1005 decompress = get_decompress_func(); 1006 if (decompress == NULL) { 1007 PyErr_SetString(ZipImportError, 1008 "can't decompress data; " 1009 "zlib not available"); 1010 goto error; 1011 } 1012 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15); 1013 Py_DECREF(decompress); 1014 Py_DECREF(raw_data); 1015 return data; 1016 1017eof_error: 1018 set_file_error(archive, !ferror(fp)); 1019 goto error; 1020 1021file_error: 1022 PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive); 1023 goto error; 1024 1025invalid_header: 1026 assert(errmsg != NULL); 1027 PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive); 1028 goto error; 1029 1030error: 1031 if (fp != NULL) { 1032 fclose(fp); 1033 } 1034 Py_XDECREF(raw_data); 1035 return NULL; 1036} 1037 1038/* Lenient date/time comparison function. The precision of the mtime 1039 in the archive is lower than the mtime stored in a .pyc: we 1040 must allow a difference of at most one second. */ 1041static int 1042eq_mtime(time_t t1, time_t t2) 1043{ 1044 time_t d = t1 - t2; 1045 if (d < 0) 1046 d = -d; 1047 /* dostime only stores even seconds, so be lenient */ 1048 return d <= 1; 1049} 1050 1051/* Given the contents of a .py[co] file in a buffer, unmarshal the data 1052 and return the code object. Return None if it the magic word doesn't 1053 match (we do this instead of raising an exception as we fall back 1054 to .py if available and we don't want to mask other errors). 1055 Returns a new reference. */ 1056static PyObject * 1057unmarshal_code(const char *pathname, PyObject *data, time_t mtime) 1058{ 1059 PyObject *code; 1060 unsigned char *buf = (unsigned char *)PyString_AsString(data); 1061 Py_ssize_t size = PyString_Size(data); 1062 1063 if (size < 8) { 1064 PyErr_SetString(ZipImportError, 1065 "bad pyc data"); 1066 return NULL; 1067 } 1068 1069 if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) { 1070 if (Py_VerboseFlag) { 1071 PySys_WriteStderr("# %s has bad magic\n", 1072 pathname); 1073 } 1074 Py_INCREF(Py_None); 1075 return Py_None; /* signal caller to try alternative */ 1076 } 1077 1078 if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) { 1079 if (Py_VerboseFlag) { 1080 PySys_WriteStderr("# %s has bad mtime\n", 1081 pathname); 1082 } 1083 Py_INCREF(Py_None); 1084 return Py_None; /* signal caller to try alternative */ 1085 } 1086 1087 code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8); 1088 if (code == NULL) { 1089 return NULL; 1090 } 1091 if (!PyCode_Check(code)) { 1092 Py_DECREF(code); 1093 PyErr_Format(PyExc_TypeError, 1094 "compiled module %.200s is not a code object", 1095 pathname); 1096 return NULL; 1097 } 1098 return code; 1099} 1100 1101/* Replace any occurrences of "\r\n?" in the input string with "\n". 1102 This converts DOS and Mac line endings to Unix line endings. 1103 Also append a trailing "\n" to be compatible with 1104 PyParser_SimpleParseFile(). Returns a new reference. */ 1105static PyObject * 1106normalize_line_endings(PyObject *source) 1107{ 1108 char *buf, *q, *p = PyString_AsString(source); 1109 PyObject *fixed_source; 1110 1111 if (!p) 1112 return NULL; 1113 1114 /* one char extra for trailing \n and one for terminating \0 */ 1115 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2); 1116 if (buf == NULL) { 1117 PyErr_SetString(PyExc_MemoryError, 1118 "zipimport: no memory to allocate " 1119 "source buffer"); 1120 return NULL; 1121 } 1122 /* replace "\r\n?" by "\n" */ 1123 for (q = buf; *p != '\0'; p++) { 1124 if (*p == '\r') { 1125 *q++ = '\n'; 1126 if (*(p + 1) == '\n') 1127 p++; 1128 } 1129 else 1130 *q++ = *p; 1131 } 1132 *q++ = '\n'; /* add trailing \n */ 1133 *q = '\0'; 1134 fixed_source = PyString_FromString(buf); 1135 PyMem_Free(buf); 1136 return fixed_source; 1137} 1138 1139/* Given a string buffer containing Python source code, compile it 1140 return and return a code object as a new reference. */ 1141static PyObject * 1142compile_source(char *pathname, PyObject *source) 1143{ 1144 PyObject *code, *fixed_source; 1145 1146 fixed_source = normalize_line_endings(source); 1147 if (fixed_source == NULL) 1148 return NULL; 1149 1150 code = Py_CompileString(PyString_AsString(fixed_source), pathname, 1151 Py_file_input); 1152 Py_DECREF(fixed_source); 1153 return code; 1154} 1155 1156/* Convert the date/time values found in the Zip archive to a value 1157 that's compatible with the time stamp stored in .pyc files. */ 1158static time_t 1159parse_dostime(int dostime, int dosdate) 1160{ 1161 struct tm stm; 1162 1163 memset((void *) &stm, '\0', sizeof(stm)); 1164 1165 stm.tm_sec = (dostime & 0x1f) * 2; 1166 stm.tm_min = (dostime >> 5) & 0x3f; 1167 stm.tm_hour = (dostime >> 11) & 0x1f; 1168 stm.tm_mday = dosdate & 0x1f; 1169 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1; 1170 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80; 1171 stm.tm_isdst = -1; /* wday/yday is ignored */ 1172 1173 return mktime(&stm); 1174} 1175 1176/* Given a path to a .pyc or .pyo file in the archive, return the 1177 modification time of the matching .py file, or 0 if no source 1178 is available. */ 1179static time_t 1180get_mtime_of_source(ZipImporter *self, char *path) 1181{ 1182 PyObject *toc_entry; 1183 time_t mtime = 0; 1184 Py_ssize_t lastchar = strlen(path) - 1; 1185 char savechar = path[lastchar]; 1186 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ 1187 toc_entry = PyDict_GetItemString(self->files, path); 1188 if (toc_entry != NULL && PyTuple_Check(toc_entry) && 1189 PyTuple_Size(toc_entry) == 8) { 1190 /* fetch the time stamp of the .py file for comparison 1191 with an embedded pyc time stamp */ 1192 int time, date; 1193 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5)); 1194 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6)); 1195 mtime = parse_dostime(time, date); 1196 } 1197 path[lastchar] = savechar; 1198 return mtime; 1199} 1200 1201/* Return the code object for the module named by 'fullname' from the 1202 Zip archive as a new reference. */ 1203static PyObject * 1204get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 1205 time_t mtime, PyObject *toc_entry) 1206{ 1207 PyObject *data, *code; 1208 char *modpath; 1209 char *archive = PyString_AsString(self->archive); 1210 1211 if (archive == NULL) 1212 return NULL; 1213 1214 data = get_data(archive, toc_entry); 1215 if (data == NULL) 1216 return NULL; 1217 1218 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0)); 1219 1220 if (isbytecode) { 1221 code = unmarshal_code(modpath, data, mtime); 1222 } 1223 else { 1224 code = compile_source(modpath, data); 1225 } 1226 Py_DECREF(data); 1227 return code; 1228} 1229 1230/* Get the code object associated with the module specified by 1231 'fullname'. */ 1232static PyObject * 1233get_module_code(ZipImporter *self, char *fullname, 1234 int *p_ispackage, char **p_modpath) 1235{ 1236 PyObject *toc_entry; 1237 char *subname, path[MAXPATHLEN + 1]; 1238 int len; 1239 struct st_zip_searchorder *zso; 1240 1241 subname = get_subname(fullname); 1242 1243 len = make_filename(PyString_AsString(self->prefix), subname, path); 1244 if (len < 0) 1245 return NULL; 1246 1247 for (zso = zip_searchorder; *zso->suffix; zso++) { 1248 PyObject *code = NULL; 1249 1250 strcpy(path + len, zso->suffix); 1251 if (Py_VerboseFlag > 1) 1252 PySys_WriteStderr("# trying %s%c%s\n", 1253 PyString_AsString(self->archive), 1254 SEP, path); 1255 toc_entry = PyDict_GetItemString(self->files, path); 1256 if (toc_entry != NULL) { 1257 time_t mtime = 0; 1258 int ispackage = zso->type & IS_PACKAGE; 1259 int isbytecode = zso->type & IS_BYTECODE; 1260 1261 if (isbytecode) 1262 mtime = get_mtime_of_source(self, path); 1263 if (p_ispackage != NULL) 1264 *p_ispackage = ispackage; 1265 code = get_code_from_data(self, ispackage, 1266 isbytecode, mtime, 1267 toc_entry); 1268 if (code == Py_None) { 1269 /* bad magic number or non-matching mtime 1270 in byte code, try next */ 1271 Py_DECREF(code); 1272 continue; 1273 } 1274 if (code != NULL && p_modpath != NULL) 1275 *p_modpath = PyString_AsString( 1276 PyTuple_GetItem(toc_entry, 0)); 1277 return code; 1278 } 1279 } 1280 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname); 1281 return NULL; 1282} 1283 1284 1285/* Module init */ 1286 1287PyDoc_STRVAR(zipimport_doc, 1288"zipimport provides support for importing Python modules from Zip archives.\n\ 1289\n\ 1290This module exports three objects:\n\ 1291- zipimporter: a class; its constructor takes a path to a Zip archive.\n\ 1292- ZipImportError: exception raised by zipimporter objects. It's a\n\ 1293 subclass of ImportError, so it can be caught as ImportError, too.\n\ 1294- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ 1295 info dicts, as used in zipimporter._files.\n\ 1296\n\ 1297It is usually not needed to use the zipimport module explicitly; it is\n\ 1298used by the builtin import mechanism for sys.path items that are paths\n\ 1299to Zip archives."); 1300 1301PyMODINIT_FUNC 1302initzipimport(void) 1303{ 1304 PyObject *mod; 1305 1306 if (PyType_Ready(&ZipImporter_Type) < 0) 1307 return; 1308 1309 /* Correct directory separator */ 1310 zip_searchorder[0].suffix[0] = SEP; 1311 zip_searchorder[1].suffix[0] = SEP; 1312 zip_searchorder[2].suffix[0] = SEP; 1313 if (Py_OptimizeFlag) { 1314 /* Reverse *.pyc and *.pyo */ 1315 struct st_zip_searchorder tmp; 1316 tmp = zip_searchorder[0]; 1317 zip_searchorder[0] = zip_searchorder[1]; 1318 zip_searchorder[1] = tmp; 1319 tmp = zip_searchorder[3]; 1320 zip_searchorder[3] = zip_searchorder[4]; 1321 zip_searchorder[4] = tmp; 1322 } 1323 1324 mod = Py_InitModule4("zipimport", NULL, zipimport_doc, 1325 NULL, PYTHON_API_VERSION); 1326 if (mod == NULL) 1327 return; 1328 1329 ZipImportError = PyErr_NewException("zipimport.ZipImportError", 1330 PyExc_ImportError, NULL); 1331 if (ZipImportError == NULL) 1332 return; 1333 1334 Py_INCREF(ZipImportError); 1335 if (PyModule_AddObject(mod, "ZipImportError", 1336 ZipImportError) < 0) 1337 return; 1338 1339 Py_INCREF(&ZipImporter_Type); 1340 if (PyModule_AddObject(mod, "zipimporter", 1341 (PyObject *)&ZipImporter_Type) < 0) 1342 return; 1343 1344 zip_directory_cache = PyDict_New(); 1345 if (zip_directory_cache == NULL) 1346 return; 1347 Py_INCREF(zip_directory_cache); 1348 if (PyModule_AddObject(mod, "_zip_directory_cache", 1349 zip_directory_cache) < 0) 1350 return; 1351} 1352