zipimport.c revision 2460a43a6569fbf240c5a72d0b052565617213eb
1#include "Python.h" 2#include "structmember.h" 3#include "osdefs.h" 4#include "marshal.h" 5#include <time.h> 6 7 8#define IS_SOURCE 0x0 9#define IS_BYTECODE 0x1 10#define IS_PACKAGE 0x2 11 12struct st_zip_searchorder { 13 char suffix[14]; 14 int type; 15}; 16 17/* zip_searchorder defines how we search for a module in the Zip 18 archive: we first search for a package __init__, then for 19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries 20 are swapped by initzipimport() if we run in optimized mode. Also, 21 '/' is replaced by SEP there. */ 22static struct st_zip_searchorder zip_searchorder[] = { 23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE}, 24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE}, 25 {"/__init__.py", IS_PACKAGE | IS_SOURCE}, 26 {".pyc", IS_BYTECODE}, 27 {".pyo", IS_BYTECODE}, 28 {".py", IS_SOURCE}, 29 {"", 0} 30}; 31 32/* zipimporter object definition and support */ 33 34typedef struct _zipimporter ZipImporter; 35 36struct _zipimporter { 37 PyObject_HEAD 38 PyObject *archive; /* pathname of the Zip archive */ 39 PyObject *prefix; /* file prefix: "a/sub/directory/" */ 40 PyObject *files; /* dict with file info {path: toc_entry} */ 41}; 42 43static PyObject *ZipImportError; 44static PyObject *zip_directory_cache = NULL; 45 46/* forward decls */ 47static PyObject *read_directory(PyObject *archive); 48static PyObject *get_data(char *archive, PyObject *toc_entry); 49static PyObject *get_module_code(ZipImporter *self, char *fullname, 50 int *p_ispackage, char **p_modpath); 51 52 53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) 54 55 56/* zipimporter.__init__ 57 Split the "subdirectory" from the Zip archive path, lookup a matching 58 entry in sys.path_importer_cache, fetch the file directory from there 59 if found, or else read it from the archive. */ 60static int 61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) 62{ 63 PyObject *pathobj, *files; 64 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2]; 65 Py_ssize_t len; 66 67 if (!_PyArg_NoKeywords("zipimporter()", kwds)) 68 return -1; 69 70 if (!PyArg_ParseTuple(args, "O&:zipimporter", 71 PyUnicode_FSDecoder, &pathobj)) 72 return -1; 73 74 /* copy path to buf */ 75 len = PyUnicode_GET_SIZE(pathobj); 76 if (len == 0) { 77 PyErr_SetString(ZipImportError, "archive path is empty"); 78 goto error; 79 } 80 if (len >= MAXPATHLEN) { 81 PyErr_SetString(ZipImportError, 82 "archive path too long"); 83 goto error; 84 } 85 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj)); 86 87#ifdef ALTSEP 88 for (p = buf; *p; p++) { 89 if (*p == ALTSEP) 90 *p = SEP; 91 } 92#endif 93 94 path = NULL; 95 prefix = NULL; 96 for (;;) { 97 struct stat statbuf; 98 int rv; 99 100 if (pathobj == NULL) { 101 pathobj = PyUnicode_FromUnicode(buf, len); 102 if (pathobj == NULL) 103 goto error; 104 } 105 rv = _Py_stat(pathobj, &statbuf); 106 if (rv == 0) { 107 /* it exists */ 108 if (S_ISREG(statbuf.st_mode)) 109 /* it's a file */ 110 path = buf; 111 break; 112 } 113 else if (PyErr_Occurred()) 114 goto error; 115 /* back up one path element */ 116 p = Py_UNICODE_strrchr(buf, SEP); 117 if (prefix != NULL) 118 *prefix = SEP; 119 if (p == NULL) 120 break; 121 *p = '\0'; 122 len = p - buf; 123 prefix = p; 124 Py_CLEAR(pathobj); 125 } 126 if (path == NULL) { 127 PyErr_SetString(ZipImportError, "not a Zip file"); 128 goto error; 129 } 130 131 files = PyDict_GetItem(zip_directory_cache, pathobj); 132 if (files == NULL) { 133 files = read_directory(pathobj); 134 if (files == NULL) 135 goto error; 136 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0) 137 goto error; 138 } 139 else 140 Py_INCREF(files); 141 self->files = files; 142 143 self->archive = pathobj; 144 pathobj = NULL; 145 146 if (prefix != NULL) { 147 prefix++; 148 len = Py_UNICODE_strlen(prefix); 149 if (prefix[len-1] != SEP) { 150 /* add trailing SEP */ 151 prefix[len] = SEP; 152 prefix[len + 1] = '\0'; 153 len++; 154 } 155 } 156 else 157 len = 0; 158 self->prefix = PyUnicode_FromUnicode(prefix, len); 159 if (self->prefix == NULL) 160 goto error; 161 162 return 0; 163 164error: 165 Py_XDECREF(pathobj); 166 return -1; 167} 168 169/* GC support. */ 170static int 171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg) 172{ 173 ZipImporter *self = (ZipImporter *)obj; 174 Py_VISIT(self->files); 175 return 0; 176} 177 178static void 179zipimporter_dealloc(ZipImporter *self) 180{ 181 PyObject_GC_UnTrack(self); 182 Py_XDECREF(self->archive); 183 Py_XDECREF(self->prefix); 184 Py_XDECREF(self->files); 185 Py_TYPE(self)->tp_free((PyObject *)self); 186} 187 188static PyObject * 189zipimporter_repr(ZipImporter *self) 190{ 191 char *archive = "???"; 192 char *prefix = ""; 193 194 if (self->archive != NULL && PyUnicode_Check(self->archive)) 195 archive = _PyUnicode_AsString(self->archive); 196 if (self->prefix != NULL && PyUnicode_Check(self->prefix)) 197 prefix = _PyUnicode_AsString(self->prefix); 198 if (prefix != NULL && *prefix) 199 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">", 200 archive, SEP, prefix); 201 else 202 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">", 203 archive); 204} 205 206/* return fullname.split(".")[-1] */ 207static char * 208get_subname(char *fullname) 209{ 210 char *subname = strrchr(fullname, '.'); 211 if (subname == NULL) 212 subname = fullname; 213 else 214 subname++; 215 return subname; 216} 217 218/* Given a (sub)modulename, write the potential file path in the 219 archive (without extension) to the path buffer. Return the 220 length of the resulting string. */ 221static int 222make_filename(char *prefix, char *name, char *path) 223{ 224 size_t len; 225 char *p; 226 227 len = strlen(prefix); 228 229 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ 230 if (len + strlen(name) + 13 >= MAXPATHLEN) { 231 PyErr_SetString(ZipImportError, "path too long"); 232 return -1; 233 } 234 235 strcpy(path, prefix); 236 strcpy(path + len, name); 237 for (p = path + len; *p; p++) { 238 if (*p == '.') 239 *p = SEP; 240 } 241 len += strlen(name); 242 assert(len < INT_MAX); 243 return (int)len; 244} 245 246enum zi_module_info { 247 MI_ERROR, 248 MI_NOT_FOUND, 249 MI_MODULE, 250 MI_PACKAGE 251}; 252 253/* Return some information about a module. */ 254static enum zi_module_info 255get_module_info(ZipImporter *self, char *fullname) 256{ 257 char *subname, path[MAXPATHLEN + 1]; 258 int len; 259 struct st_zip_searchorder *zso; 260 261 subname = get_subname(fullname); 262 263 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); 264 if (len < 0) 265 return MI_ERROR; 266 267 for (zso = zip_searchorder; *zso->suffix; zso++) { 268 strcpy(path + len, zso->suffix); 269 if (PyDict_GetItemString(self->files, path) != NULL) { 270 if (zso->type & IS_PACKAGE) 271 return MI_PACKAGE; 272 else 273 return MI_MODULE; 274 } 275 } 276 return MI_NOT_FOUND; 277} 278 279/* Check whether we can satisfy the import of the module named by 280 'fullname'. Return self if we can, None if we can't. */ 281static PyObject * 282zipimporter_find_module(PyObject *obj, PyObject *args) 283{ 284 ZipImporter *self = (ZipImporter *)obj; 285 PyObject *path = NULL; 286 char *fullname; 287 enum zi_module_info mi; 288 289 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module", 290 &fullname, &path)) 291 return NULL; 292 293 mi = get_module_info(self, fullname); 294 if (mi == MI_ERROR) 295 return NULL; 296 if (mi == MI_NOT_FOUND) { 297 Py_INCREF(Py_None); 298 return Py_None; 299 } 300 Py_INCREF(self); 301 return (PyObject *)self; 302} 303 304/* Load and return the module named by 'fullname'. */ 305static PyObject * 306zipimporter_load_module(PyObject *obj, PyObject *args) 307{ 308 ZipImporter *self = (ZipImporter *)obj; 309 PyObject *code, *mod, *dict; 310 char *fullname, *modpath; 311 int ispackage; 312 313 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", 314 &fullname)) 315 return NULL; 316 317 code = get_module_code(self, fullname, &ispackage, &modpath); 318 if (code == NULL) 319 return NULL; 320 321 mod = PyImport_AddModule(fullname); 322 if (mod == NULL) { 323 Py_DECREF(code); 324 return NULL; 325 } 326 dict = PyModule_GetDict(mod); 327 328 /* mod.__loader__ = self */ 329 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0) 330 goto error; 331 332 if (ispackage) { 333 /* add __path__ to the module *before* the code gets 334 executed */ 335 PyObject *pkgpath, *fullpath; 336 char *subname = get_subname(fullname); 337 int err; 338 339 fullpath = PyUnicode_FromFormat("%U%c%U%s", 340 self->archive, SEP, 341 self->prefix, subname); 342 if (fullpath == NULL) 343 goto error; 344 345 pkgpath = Py_BuildValue("[O]", fullpath); 346 Py_DECREF(fullpath); 347 if (pkgpath == NULL) 348 goto error; 349 err = PyDict_SetItemString(dict, "__path__", pkgpath); 350 Py_DECREF(pkgpath); 351 if (err != 0) 352 goto error; 353 } 354 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath); 355 Py_DECREF(code); 356 if (Py_VerboseFlag) 357 PySys_WriteStderr("import %s # loaded from Zip %s\n", 358 fullname, modpath); 359 return mod; 360error: 361 Py_DECREF(code); 362 Py_DECREF(mod); 363 return NULL; 364} 365 366/* Return a string matching __file__ for the named module */ 367static PyObject * 368zipimporter_get_filename(PyObject *obj, PyObject *args) 369{ 370 ZipImporter *self = (ZipImporter *)obj; 371 PyObject *code; 372 char *fullname, *modpath; 373 int ispackage; 374 375 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", 376 &fullname)) 377 return NULL; 378 379 /* Deciding the filename requires working out where the code 380 would come from if the module was actually loaded */ 381 code = get_module_code(self, fullname, &ispackage, &modpath); 382 if (code == NULL) 383 return NULL; 384 Py_DECREF(code); /* Only need the path info */ 385 386 return PyUnicode_FromString(modpath); 387} 388 389/* Return a bool signifying whether the module is a package or not. */ 390static PyObject * 391zipimporter_is_package(PyObject *obj, PyObject *args) 392{ 393 ZipImporter *self = (ZipImporter *)obj; 394 char *fullname; 395 enum zi_module_info mi; 396 397 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package", 398 &fullname)) 399 return NULL; 400 401 mi = get_module_info(self, fullname); 402 if (mi == MI_ERROR) 403 return NULL; 404 if (mi == MI_NOT_FOUND) { 405 PyErr_Format(ZipImportError, "can't find module '%.200s'", 406 fullname); 407 return NULL; 408 } 409 return PyBool_FromLong(mi == MI_PACKAGE); 410} 411 412static PyObject * 413zipimporter_get_data(PyObject *obj, PyObject *args) 414{ 415 ZipImporter *self = (ZipImporter *)obj; 416 char *path; 417#ifdef ALTSEP 418 char *p, buf[MAXPATHLEN + 1]; 419#endif 420 PyObject *toc_entry; 421 Py_ssize_t len; 422 char *archive_str; 423 424 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path)) 425 return NULL; 426 427#ifdef ALTSEP 428 if (strlen(path) >= MAXPATHLEN) { 429 PyErr_SetString(ZipImportError, "path too long"); 430 return NULL; 431 } 432 strcpy(buf, path); 433 for (p = buf; *p; p++) { 434 if (*p == ALTSEP) 435 *p = SEP; 436 } 437 path = buf; 438#endif 439 archive_str = _PyUnicode_AsStringAndSize(self->archive, &len); 440 if ((size_t)len < strlen(path) && 441 strncmp(path, archive_str, len) == 0 && 442 path[len] == SEP) { 443 path = path + len + 1; 444 } 445 446 toc_entry = PyDict_GetItemString(self->files, path); 447 if (toc_entry == NULL) { 448 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path); 449 return NULL; 450 } 451 return get_data(archive_str, toc_entry); 452} 453 454static PyObject * 455zipimporter_get_code(PyObject *obj, PyObject *args) 456{ 457 ZipImporter *self = (ZipImporter *)obj; 458 char *fullname; 459 460 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname)) 461 return NULL; 462 463 return get_module_code(self, fullname, NULL, NULL); 464} 465 466static PyObject * 467zipimporter_get_source(PyObject *obj, PyObject *args) 468{ 469 ZipImporter *self = (ZipImporter *)obj; 470 PyObject *toc_entry; 471 char *fullname, *subname, path[MAXPATHLEN+1]; 472 int len; 473 enum zi_module_info mi; 474 475 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname)) 476 return NULL; 477 478 mi = get_module_info(self, fullname); 479 if (mi == MI_ERROR) 480 return NULL; 481 if (mi == MI_NOT_FOUND) { 482 PyErr_Format(ZipImportError, "can't find module '%.200s'", 483 fullname); 484 return NULL; 485 } 486 subname = get_subname(fullname); 487 488 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); 489 if (len < 0) 490 return NULL; 491 492 if (mi == MI_PACKAGE) { 493 path[len] = SEP; 494 strcpy(path + len + 1, "__init__.py"); 495 } 496 else 497 strcpy(path + len, ".py"); 498 499 toc_entry = PyDict_GetItemString(self->files, path); 500 if (toc_entry != NULL) { 501 PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry); 502 PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes)); 503 Py_XDECREF(bytes); 504 return res; 505 } 506 507 /* we have the module, but no source */ 508 Py_INCREF(Py_None); 509 return Py_None; 510} 511 512PyDoc_STRVAR(doc_find_module, 513"find_module(fullname, path=None) -> self or None.\n\ 514\n\ 515Search for a module specified by 'fullname'. 'fullname' must be the\n\ 516fully qualified (dotted) module name. It returns the zipimporter\n\ 517instance itself if the module was found, or None if it wasn't.\n\ 518The optional 'path' argument is ignored -- it's there for compatibility\n\ 519with the importer protocol."); 520 521PyDoc_STRVAR(doc_load_module, 522"load_module(fullname) -> module.\n\ 523\n\ 524Load the module specified by 'fullname'. 'fullname' must be the\n\ 525fully qualified (dotted) module name. It returns the imported\n\ 526module, or raises ZipImportError if it wasn't found."); 527 528PyDoc_STRVAR(doc_get_data, 529"get_data(pathname) -> string with file data.\n\ 530\n\ 531Return the data associated with 'pathname'. Raise IOError if\n\ 532the file wasn't found."); 533 534PyDoc_STRVAR(doc_is_package, 535"is_package(fullname) -> bool.\n\ 536\n\ 537Return True if the module specified by fullname is a package.\n\ 538Raise ZipImportError if the module couldn't be found."); 539 540PyDoc_STRVAR(doc_get_code, 541"get_code(fullname) -> code object.\n\ 542\n\ 543Return the code object for the specified module. Raise ZipImportError\n\ 544if the module couldn't be found."); 545 546PyDoc_STRVAR(doc_get_source, 547"get_source(fullname) -> source string.\n\ 548\n\ 549Return the source code for the specified module. Raise ZipImportError\n\ 550if the module couldn't be found, return None if the archive does\n\ 551contain the module, but has no source for it."); 552 553 554PyDoc_STRVAR(doc_get_filename, 555"get_filename(fullname) -> filename string.\n\ 556\n\ 557Return the filename for the specified module."); 558 559static PyMethodDef zipimporter_methods[] = { 560 {"find_module", zipimporter_find_module, METH_VARARGS, 561 doc_find_module}, 562 {"load_module", zipimporter_load_module, METH_VARARGS, 563 doc_load_module}, 564 {"get_data", zipimporter_get_data, METH_VARARGS, 565 doc_get_data}, 566 {"get_code", zipimporter_get_code, METH_VARARGS, 567 doc_get_code}, 568 {"get_source", zipimporter_get_source, METH_VARARGS, 569 doc_get_source}, 570 {"get_filename", zipimporter_get_filename, METH_VARARGS, 571 doc_get_filename}, 572 {"is_package", zipimporter_is_package, METH_VARARGS, 573 doc_is_package}, 574 {NULL, NULL} /* sentinel */ 575}; 576 577static PyMemberDef zipimporter_members[] = { 578 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY}, 579 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY}, 580 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY}, 581 {NULL} 582}; 583 584PyDoc_STRVAR(zipimporter_doc, 585"zipimporter(archivepath) -> zipimporter object\n\ 586\n\ 587Create a new zipimporter instance. 'archivepath' must be a path to\n\ 588a zipfile, or to a specific path inside a zipfile. For example, it can be\n\ 589'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\ 590valid directory inside the archive.\n\ 591\n\ 592'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\ 593archive.\n\ 594\n\ 595The 'archive' attribute of zipimporter objects contains the name of the\n\ 596zipfile targeted."); 597 598#define DEFERRED_ADDRESS(ADDR) 0 599 600static PyTypeObject ZipImporter_Type = { 601 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) 602 "zipimport.zipimporter", 603 sizeof(ZipImporter), 604 0, /* tp_itemsize */ 605 (destructor)zipimporter_dealloc, /* tp_dealloc */ 606 0, /* tp_print */ 607 0, /* tp_getattr */ 608 0, /* tp_setattr */ 609 0, /* tp_reserved */ 610 (reprfunc)zipimporter_repr, /* tp_repr */ 611 0, /* tp_as_number */ 612 0, /* tp_as_sequence */ 613 0, /* tp_as_mapping */ 614 0, /* tp_hash */ 615 0, /* tp_call */ 616 0, /* tp_str */ 617 PyObject_GenericGetAttr, /* tp_getattro */ 618 0, /* tp_setattro */ 619 0, /* tp_as_buffer */ 620 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 621 Py_TPFLAGS_HAVE_GC, /* tp_flags */ 622 zipimporter_doc, /* tp_doc */ 623 zipimporter_traverse, /* tp_traverse */ 624 0, /* tp_clear */ 625 0, /* tp_richcompare */ 626 0, /* tp_weaklistoffset */ 627 0, /* tp_iter */ 628 0, /* tp_iternext */ 629 zipimporter_methods, /* tp_methods */ 630 zipimporter_members, /* tp_members */ 631 0, /* tp_getset */ 632 0, /* tp_base */ 633 0, /* tp_dict */ 634 0, /* tp_descr_get */ 635 0, /* tp_descr_set */ 636 0, /* tp_dictoffset */ 637 (initproc)zipimporter_init, /* tp_init */ 638 PyType_GenericAlloc, /* tp_alloc */ 639 PyType_GenericNew, /* tp_new */ 640 PyObject_GC_Del, /* tp_free */ 641}; 642 643 644/* implementation */ 645 646/* Given a buffer, return the long that is represented by the first 647 4 bytes, encoded as little endian. This partially reimplements 648 marshal.c:r_long() */ 649static long 650get_long(unsigned char *buf) { 651 long x; 652 x = buf[0]; 653 x |= (long)buf[1] << 8; 654 x |= (long)buf[2] << 16; 655 x |= (long)buf[3] << 24; 656#if SIZEOF_LONG > 4 657 /* Sign extension for 64-bit machines */ 658 x |= -(x & 0x80000000L); 659#endif 660 return x; 661} 662 663/* 664 read_directory(archive) -> files dict (new reference) 665 666 Given a path to a Zip archive, build a dict, mapping file names 667 (local to the archive, using SEP as a separator) to toc entries. 668 669 A toc_entry is a tuple: 670 671 (__file__, # value to use for __file__, available for all files 672 compress, # compression kind; 0 for uncompressed 673 data_size, # size of compressed data on disk 674 file_size, # size of decompressed data 675 file_offset, # offset of file header from start of archive 676 time, # mod time of file (in dos format) 677 date, # mod data of file (in dos format) 678 crc, # crc checksum of the data 679 ) 680 681 Directories can be recognized by the trailing SEP in the name, 682 data_size and file_offset are 0. 683*/ 684static PyObject * 685read_directory(PyObject *archive_obj) 686{ 687 /* FIXME: work on Py_UNICODE* instead of char* */ 688 PyObject *files = NULL; 689 FILE *fp; 690 long compress, crc, data_size, file_size, file_offset, date, time; 691 long header_offset, name_size, header_size, header_position; 692 long i, l, count; 693 size_t length; 694 Py_UNICODE path[MAXPATHLEN + 5]; 695 char name[MAXPATHLEN + 5]; 696 PyObject *nameobj = NULL; 697 char *p, endof_central_dir[22]; 698 long arc_offset; /* offset from beginning of file to start of zip-archive */ 699 PyObject *pathobj; 700 701 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { 702 PyErr_SetString(PyExc_OverflowError, 703 "Zip path name is too long"); 704 return NULL; 705 } 706 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj)); 707 708 fp = _Py_fopen(archive_obj, "rb"); 709 if (fp == NULL) { 710 PyErr_Format(ZipImportError, "can't open Zip file: " 711 "'%.200U'", archive_obj); 712 return NULL; 713 } 714 fseek(fp, -22, SEEK_END); 715 header_position = ftell(fp); 716 if (fread(endof_central_dir, 1, 22, fp) != 22) { 717 fclose(fp); 718 PyErr_Format(ZipImportError, "can't read Zip file: " 719 "'%.200U'", archive_obj); 720 return NULL; 721 } 722 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { 723 /* Bad: End of Central Dir signature */ 724 fclose(fp); 725 PyErr_Format(ZipImportError, "not a Zip file: " 726 "'%.200U'", archive_obj); 727 return NULL; 728 } 729 730 header_size = get_long((unsigned char *)endof_central_dir + 12); 731 header_offset = get_long((unsigned char *)endof_central_dir + 16); 732 arc_offset = header_position - header_offset - header_size; 733 header_offset += arc_offset; 734 735 files = PyDict_New(); 736 if (files == NULL) 737 goto error; 738 739 length = Py_UNICODE_strlen(path); 740 path[length] = SEP; 741 742 /* Start of Central Directory */ 743 count = 0; 744 for (;;) { 745 PyObject *t; 746 int err; 747 748 fseek(fp, header_offset, 0); /* Start of file header */ 749 l = PyMarshal_ReadLongFromFile(fp); 750 if (l != 0x02014B50) 751 break; /* Bad: Central Dir File Header */ 752 fseek(fp, header_offset + 10, 0); 753 compress = PyMarshal_ReadShortFromFile(fp); 754 time = PyMarshal_ReadShortFromFile(fp); 755 date = PyMarshal_ReadShortFromFile(fp); 756 crc = PyMarshal_ReadLongFromFile(fp); 757 data_size = PyMarshal_ReadLongFromFile(fp); 758 file_size = PyMarshal_ReadLongFromFile(fp); 759 name_size = PyMarshal_ReadShortFromFile(fp); 760 header_size = 46 + name_size + 761 PyMarshal_ReadShortFromFile(fp) + 762 PyMarshal_ReadShortFromFile(fp); 763 fseek(fp, header_offset + 42, 0); 764 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset; 765 if (name_size > MAXPATHLEN) 766 name_size = MAXPATHLEN; 767 768 p = name; 769 for (i = 0; i < name_size; i++) { 770 *p = (char)getc(fp); 771 if (*p == '/') 772 *p = SEP; 773 p++; 774 } 775 *p = 0; /* Add terminating null byte */ 776 header_offset += header_size; 777 778 nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size); 779 if (nameobj == NULL) 780 goto error; 781 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); 782 783 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path)); 784 if (pathobj == NULL) 785 goto error; 786 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size, 787 file_size, file_offset, time, date, crc); 788 if (t == NULL) 789 goto error; 790 err = PyDict_SetItem(files, nameobj, t); 791 Py_CLEAR(nameobj); 792 Py_DECREF(t); 793 if (err != 0) 794 goto error; 795 count++; 796 } 797 fclose(fp); 798 if (Py_VerboseFlag) 799 PySys_FormatStderr("# zipimport: found %ld names in %U\n", 800 count, archive_obj); 801 return files; 802error: 803 fclose(fp); 804 Py_XDECREF(files); 805 Py_XDECREF(nameobj); 806 return NULL; 807} 808 809/* Return the zlib.decompress function object, or NULL if zlib couldn't 810 be imported. The function is cached when found, so subsequent calls 811 don't import zlib again. Returns a *borrowed* reference. 812 XXX This makes zlib.decompress immortal. */ 813static PyObject * 814get_decompress_func(void) 815{ 816 static PyObject *decompress = NULL; 817 818 if (decompress == NULL) { 819 PyObject *zlib; 820 static int importing_zlib = 0; 821 822 if (importing_zlib != 0) 823 /* Someone has a zlib.py[co] in their Zip file; 824 let's avoid a stack overflow. */ 825 return NULL; 826 importing_zlib = 1; 827 zlib = PyImport_ImportModuleNoBlock("zlib"); 828 importing_zlib = 0; 829 if (zlib != NULL) { 830 decompress = PyObject_GetAttrString(zlib, 831 "decompress"); 832 Py_DECREF(zlib); 833 } 834 else 835 PyErr_Clear(); 836 if (Py_VerboseFlag) 837 PySys_WriteStderr("# zipimport: zlib %s\n", 838 zlib != NULL ? "available": "UNAVAILABLE"); 839 } 840 return decompress; 841} 842 843/* Given a path to a Zip file and a toc_entry, return the (uncompressed) 844 data as a new reference. */ 845static PyObject * 846get_data(char *archive, PyObject *toc_entry) 847{ 848 PyObject *raw_data, *data = NULL, *decompress; 849 char *buf; 850 FILE *fp; 851 int err; 852 Py_ssize_t bytes_read = 0; 853 long l; 854 char *datapath; 855 long compress, data_size, file_size, file_offset, bytes_size; 856 long time, date, crc; 857 858 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress, 859 &data_size, &file_size, &file_offset, &time, 860 &date, &crc)) { 861 return NULL; 862 } 863 864 fp = fopen(archive, "rb"); 865 if (!fp) { 866 PyErr_Format(PyExc_IOError, 867 "zipimport: can not open file %s", archive); 868 return NULL; 869 } 870 871 /* Check to make sure the local file header is correct */ 872 fseek(fp, file_offset, 0); 873 l = PyMarshal_ReadLongFromFile(fp); 874 if (l != 0x04034B50) { 875 /* Bad: Local File Header */ 876 PyErr_Format(ZipImportError, 877 "bad local file header in %s", 878 archive); 879 fclose(fp); 880 return NULL; 881 } 882 fseek(fp, file_offset + 26, 0); 883 l = 30 + PyMarshal_ReadShortFromFile(fp) + 884 PyMarshal_ReadShortFromFile(fp); /* local header size */ 885 file_offset += l; /* Start of file data */ 886 887 bytes_size = compress == 0 ? data_size : data_size + 1; 888 if (bytes_size == 0) 889 bytes_size++; 890 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); 891 892 if (raw_data == NULL) { 893 fclose(fp); 894 return NULL; 895 } 896 buf = PyBytes_AsString(raw_data); 897 898 err = fseek(fp, file_offset, 0); 899 if (err == 0) 900 bytes_read = fread(buf, 1, data_size, fp); 901 fclose(fp); 902 if (err || bytes_read != data_size) { 903 PyErr_SetString(PyExc_IOError, 904 "zipimport: can't read data"); 905 Py_DECREF(raw_data); 906 return NULL; 907 } 908 909 if (compress != 0) { 910 buf[data_size] = 'Z'; /* saw this in zipfile.py */ 911 data_size++; 912 } 913 buf[data_size] = '\0'; 914 915 if (compress == 0) { /* data is not compressed */ 916 data = PyBytes_FromStringAndSize(buf, data_size); 917 Py_DECREF(raw_data); 918 return data; 919 } 920 921 /* Decompress with zlib */ 922 decompress = get_decompress_func(); 923 if (decompress == NULL) { 924 PyErr_SetString(ZipImportError, 925 "can't decompress data; " 926 "zlib not available"); 927 goto error; 928 } 929 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15); 930error: 931 Py_DECREF(raw_data); 932 return data; 933} 934 935/* Lenient date/time comparison function. The precision of the mtime 936 in the archive is lower than the mtime stored in a .pyc: we 937 must allow a difference of at most one second. */ 938static int 939eq_mtime(time_t t1, time_t t2) 940{ 941 time_t d = t1 - t2; 942 if (d < 0) 943 d = -d; 944 /* dostime only stores even seconds, so be lenient */ 945 return d <= 1; 946} 947 948/* Given the contents of a .py[co] file in a buffer, unmarshal the data 949 and return the code object. Return None if it the magic word doesn't 950 match (we do this instead of raising an exception as we fall back 951 to .py if available and we don't want to mask other errors). 952 Returns a new reference. */ 953static PyObject * 954unmarshal_code(char *pathname, PyObject *data, time_t mtime) 955{ 956 PyObject *code; 957 char *buf = PyBytes_AsString(data); 958 Py_ssize_t size = PyBytes_Size(data); 959 960 if (size <= 9) { 961 PyErr_SetString(ZipImportError, 962 "bad pyc data"); 963 return NULL; 964 } 965 966 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) { 967 if (Py_VerboseFlag) 968 PySys_WriteStderr("# %s has bad magic\n", 969 pathname); 970 Py_INCREF(Py_None); 971 return Py_None; /* signal caller to try alternative */ 972 } 973 974 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), 975 mtime)) { 976 if (Py_VerboseFlag) 977 PySys_WriteStderr("# %s has bad mtime\n", 978 pathname); 979 Py_INCREF(Py_None); 980 return Py_None; /* signal caller to try alternative */ 981 } 982 983 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8); 984 if (code == NULL) 985 return NULL; 986 if (!PyCode_Check(code)) { 987 Py_DECREF(code); 988 PyErr_Format(PyExc_TypeError, 989 "compiled module %.200s is not a code object", 990 pathname); 991 return NULL; 992 } 993 return code; 994} 995 996/* Replace any occurances of "\r\n?" in the input string with "\n". 997 This converts DOS and Mac line endings to Unix line endings. 998 Also append a trailing "\n" to be compatible with 999 PyParser_SimpleParseFile(). Returns a new reference. */ 1000static PyObject * 1001normalize_line_endings(PyObject *source) 1002{ 1003 char *buf, *q, *p = PyBytes_AsString(source); 1004 PyObject *fixed_source; 1005 int len = 0; 1006 1007 if (!p) { 1008 return PyBytes_FromStringAndSize("\n\0", 2); 1009 } 1010 1011 /* one char extra for trailing \n and one for terminating \0 */ 1012 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2); 1013 if (buf == NULL) { 1014 PyErr_SetString(PyExc_MemoryError, 1015 "zipimport: no memory to allocate " 1016 "source buffer"); 1017 return NULL; 1018 } 1019 /* replace "\r\n?" by "\n" */ 1020 for (q = buf; *p != '\0'; p++) { 1021 if (*p == '\r') { 1022 *q++ = '\n'; 1023 if (*(p + 1) == '\n') 1024 p++; 1025 } 1026 else 1027 *q++ = *p; 1028 len++; 1029 } 1030 *q++ = '\n'; /* add trailing \n */ 1031 *q = '\0'; 1032 fixed_source = PyBytes_FromStringAndSize(buf, len + 2); 1033 PyMem_Free(buf); 1034 return fixed_source; 1035} 1036 1037/* Given a string buffer containing Python source code, compile it 1038 return and return a code object as a new reference. */ 1039static PyObject * 1040compile_source(char *pathname, PyObject *source) 1041{ 1042 PyObject *code, *fixed_source; 1043 1044 fixed_source = normalize_line_endings(source); 1045 if (fixed_source == NULL) 1046 return NULL; 1047 1048 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname, 1049 Py_file_input); 1050 Py_DECREF(fixed_source); 1051 return code; 1052} 1053 1054/* Convert the date/time values found in the Zip archive to a value 1055 that's compatible with the time stamp stored in .pyc files. */ 1056static time_t 1057parse_dostime(int dostime, int dosdate) 1058{ 1059 struct tm stm; 1060 1061 memset((void *) &stm, '\0', sizeof(stm)); 1062 1063 stm.tm_sec = (dostime & 0x1f) * 2; 1064 stm.tm_min = (dostime >> 5) & 0x3f; 1065 stm.tm_hour = (dostime >> 11) & 0x1f; 1066 stm.tm_mday = dosdate & 0x1f; 1067 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1; 1068 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80; 1069 stm.tm_isdst = -1; /* wday/yday is ignored */ 1070 1071 return mktime(&stm); 1072} 1073 1074/* Given a path to a .pyc or .pyo file in the archive, return the 1075 modifictaion time of the matching .py file, or 0 if no source 1076 is available. */ 1077static time_t 1078get_mtime_of_source(ZipImporter *self, char *path) 1079{ 1080 PyObject *toc_entry; 1081 time_t mtime = 0; 1082 Py_ssize_t lastchar = strlen(path) - 1; 1083 char savechar = path[lastchar]; 1084 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ 1085 toc_entry = PyDict_GetItemString(self->files, path); 1086 if (toc_entry != NULL && PyTuple_Check(toc_entry) && 1087 PyTuple_Size(toc_entry) == 8) { 1088 /* fetch the time stamp of the .py file for comparison 1089 with an embedded pyc time stamp */ 1090 int time, date; 1091 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); 1092 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); 1093 mtime = parse_dostime(time, date); 1094 } 1095 path[lastchar] = savechar; 1096 return mtime; 1097} 1098 1099/* Return the code object for the module named by 'fullname' from the 1100 Zip archive as a new reference. */ 1101static PyObject * 1102get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 1103 time_t mtime, PyObject *toc_entry) 1104{ 1105 PyObject *data, *code; 1106 char *modpath; 1107 char *archive = _PyUnicode_AsString(self->archive); 1108 1109 if (archive == NULL) 1110 return NULL; 1111 1112 data = get_data(archive, toc_entry); 1113 if (data == NULL) 1114 return NULL; 1115 1116 modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0)); 1117 1118 if (isbytecode) { 1119 code = unmarshal_code(modpath, data, mtime); 1120 } 1121 else { 1122 code = compile_source(modpath, data); 1123 } 1124 Py_DECREF(data); 1125 return code; 1126} 1127 1128/* Get the code object assoiciated with the module specified by 1129 'fullname'. */ 1130static PyObject * 1131get_module_code(ZipImporter *self, char *fullname, 1132 int *p_ispackage, char **p_modpath) 1133{ 1134 PyObject *toc_entry; 1135 char *subname, path[MAXPATHLEN + 1]; 1136 int len; 1137 struct st_zip_searchorder *zso; 1138 1139 subname = get_subname(fullname); 1140 1141 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); 1142 if (len < 0) 1143 return NULL; 1144 1145 for (zso = zip_searchorder; *zso->suffix; zso++) { 1146 PyObject *code = NULL; 1147 1148 strcpy(path + len, zso->suffix); 1149 if (Py_VerboseFlag > 1) 1150 PySys_WriteStderr("# trying %s%c%s\n", 1151 _PyUnicode_AsString(self->archive), 1152 (int)SEP, path); 1153 toc_entry = PyDict_GetItemString(self->files, path); 1154 if (toc_entry != NULL) { 1155 time_t mtime = 0; 1156 int ispackage = zso->type & IS_PACKAGE; 1157 int isbytecode = zso->type & IS_BYTECODE; 1158 1159 if (isbytecode) 1160 mtime = get_mtime_of_source(self, path); 1161 if (p_ispackage != NULL) 1162 *p_ispackage = ispackage; 1163 code = get_code_from_data(self, ispackage, 1164 isbytecode, mtime, 1165 toc_entry); 1166 if (code == Py_None) { 1167 /* bad magic number or non-matching mtime 1168 in byte code, try next */ 1169 Py_DECREF(code); 1170 continue; 1171 } 1172 if (code != NULL && p_modpath != NULL) 1173 *p_modpath = _PyUnicode_AsString( 1174 PyTuple_GetItem(toc_entry, 0)); 1175 return code; 1176 } 1177 } 1178 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname); 1179 return NULL; 1180} 1181 1182 1183/* Module init */ 1184 1185PyDoc_STRVAR(zipimport_doc, 1186"zipimport provides support for importing Python modules from Zip archives.\n\ 1187\n\ 1188This module exports three objects:\n\ 1189- zipimporter: a class; its constructor takes a path to a Zip archive.\n\ 1190- ZipImportError: exception raised by zipimporter objects. It's a\n\ 1191 subclass of ImportError, so it can be caught as ImportError, too.\n\ 1192- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ 1193 info dicts, as used in zipimporter._files.\n\ 1194\n\ 1195It is usually not needed to use the zipimport module explicitly; it is\n\ 1196used by the builtin import mechanism for sys.path items that are paths\n\ 1197to Zip archives."); 1198 1199static struct PyModuleDef zipimportmodule = { 1200 PyModuleDef_HEAD_INIT, 1201 "zipimport", 1202 zipimport_doc, 1203 -1, 1204 NULL, 1205 NULL, 1206 NULL, 1207 NULL, 1208 NULL 1209}; 1210 1211PyMODINIT_FUNC 1212PyInit_zipimport(void) 1213{ 1214 PyObject *mod; 1215 1216 if (PyType_Ready(&ZipImporter_Type) < 0) 1217 return NULL; 1218 1219 /* Correct directory separator */ 1220 zip_searchorder[0].suffix[0] = SEP; 1221 zip_searchorder[1].suffix[0] = SEP; 1222 zip_searchorder[2].suffix[0] = SEP; 1223 if (Py_OptimizeFlag) { 1224 /* Reverse *.pyc and *.pyo */ 1225 struct st_zip_searchorder tmp; 1226 tmp = zip_searchorder[0]; 1227 zip_searchorder[0] = zip_searchorder[1]; 1228 zip_searchorder[1] = tmp; 1229 tmp = zip_searchorder[3]; 1230 zip_searchorder[3] = zip_searchorder[4]; 1231 zip_searchorder[4] = tmp; 1232 } 1233 1234 mod = PyModule_Create(&zipimportmodule); 1235 if (mod == NULL) 1236 return NULL; 1237 1238 ZipImportError = PyErr_NewException("zipimport.ZipImportError", 1239 PyExc_ImportError, NULL); 1240 if (ZipImportError == NULL) 1241 return NULL; 1242 1243 Py_INCREF(ZipImportError); 1244 if (PyModule_AddObject(mod, "ZipImportError", 1245 ZipImportError) < 0) 1246 return NULL; 1247 1248 Py_INCREF(&ZipImporter_Type); 1249 if (PyModule_AddObject(mod, "zipimporter", 1250 (PyObject *)&ZipImporter_Type) < 0) 1251 return NULL; 1252 1253 zip_directory_cache = PyDict_New(); 1254 if (zip_directory_cache == NULL) 1255 return NULL; 1256 Py_INCREF(zip_directory_cache); 1257 if (PyModule_AddObject(mod, "_zip_directory_cache", 1258 zip_directory_cache) < 0) 1259 return NULL; 1260 return mod; 1261} 1262