zipimport.c revision 09bf7a799da7c8b7cdd0f9c5cd789769b8cab2d5
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive,
39                           decoded from the filesystem encoding */
40    PyObject *prefix;   /* file prefix: "a/sub/directory/",
41                           encoded to the filesystem encoding */
42    PyObject *files;    /* dict with file info {path: toc_entry} */
43};
44
45static PyObject *ZipImportError;
46/* read_directory() cache */
47static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
50static PyObject *read_directory(PyObject *archive);
51static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
52static PyObject *get_module_code(ZipImporter *self, char *fullname,
53                                 int *p_ispackage, PyObject **p_modpath);
54
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60   Split the "subdirectory" from the Zip archive path, lookup a matching
61   entry in sys.path_importer_cache, fetch the file directory from there
62   if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
66    PyObject *pathobj, *files;
67    Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
68    Py_ssize_t len;
69
70    if (!_PyArg_NoKeywords("zipimporter()", kwds))
71        return -1;
72
73    if (!PyArg_ParseTuple(args, "O&:zipimporter",
74                          PyUnicode_FSDecoder, &pathobj))
75        return -1;
76
77    /* copy path to buf */
78    len = PyUnicode_GET_SIZE(pathobj);
79    if (len == 0) {
80        PyErr_SetString(ZipImportError, "archive path is empty");
81        goto error;
82    }
83    if (len >= MAXPATHLEN) {
84        PyErr_SetString(ZipImportError,
85                        "archive path too long");
86        goto error;
87    }
88    Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
89
90#ifdef ALTSEP
91    for (p = buf; *p; p++) {
92        if (*p == ALTSEP)
93            *p = SEP;
94    }
95#endif
96
97    path = NULL;
98    prefix = NULL;
99    for (;;) {
100        struct stat statbuf;
101        int rv;
102
103        if (pathobj == NULL) {
104            pathobj = PyUnicode_FromUnicode(buf, len);
105            if (pathobj == NULL)
106                goto error;
107        }
108        rv = _Py_stat(pathobj, &statbuf);
109        if (rv == 0) {
110            /* it exists */
111            if (S_ISREG(statbuf.st_mode))
112                /* it's a file */
113                path = buf;
114            break;
115        }
116        else if (PyErr_Occurred())
117            goto error;
118        /* back up one path element */
119        p = Py_UNICODE_strrchr(buf, SEP);
120        if (prefix != NULL)
121            *prefix = SEP;
122        if (p == NULL)
123            break;
124        *p = '\0';
125        len = p - buf;
126        prefix = p;
127        Py_CLEAR(pathobj);
128    }
129    if (path == NULL) {
130        PyErr_SetString(ZipImportError, "not a Zip file");
131        goto error;
132    }
133
134    files = PyDict_GetItem(zip_directory_cache, pathobj);
135    if (files == NULL) {
136        files = read_directory(pathobj);
137        if (files == NULL)
138            goto error;
139        if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
140            goto error;
141    }
142    else
143        Py_INCREF(files);
144    self->files = files;
145
146    self->archive = pathobj;
147    pathobj = NULL;
148
149    if (prefix != NULL) {
150        prefix++;
151        len = Py_UNICODE_strlen(prefix);
152        if (prefix[len-1] != SEP) {
153            /* add trailing SEP */
154            prefix[len] = SEP;
155            prefix[len + 1] = '\0';
156            len++;
157        }
158    }
159    else
160        len = 0;
161    self->prefix = PyUnicode_FromUnicode(prefix, len);
162    if (self->prefix == NULL)
163        goto error;
164
165    return 0;
166
167error:
168    Py_XDECREF(pathobj);
169    return -1;
170}
171
172/* GC support. */
173static int
174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
175{
176    ZipImporter *self = (ZipImporter *)obj;
177    Py_VISIT(self->files);
178    return 0;
179}
180
181static void
182zipimporter_dealloc(ZipImporter *self)
183{
184    PyObject_GC_UnTrack(self);
185    Py_XDECREF(self->archive);
186    Py_XDECREF(self->prefix);
187    Py_XDECREF(self->files);
188    Py_TYPE(self)->tp_free((PyObject *)self);
189}
190
191static PyObject *
192zipimporter_repr(ZipImporter *self)
193{
194    if (self->archive == NULL)
195        return PyUnicode_FromString("<zipimporter object \"???\">");
196    else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
197        return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
198                                    self->archive, SEP, self->prefix);
199    else
200        return PyUnicode_FromFormat("<zipimporter object \"%U\">",
201                                    self->archive);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208    char *subname = strrchr(fullname, '.');
209    if (subname == NULL)
210        subname = fullname;
211    else
212        subname++;
213    return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217   archive (without extension) to the path buffer. Return the
218   length of the resulting string. */
219static int
220make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize)
221{
222    size_t len;
223    char *p;
224    PyObject *prefix;
225
226    prefix = PyUnicode_EncodeFSDefault(prefix_obj);
227    if (prefix == NULL)
228        return -1;
229    len = PyBytes_GET_SIZE(prefix);
230
231    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
232    if (len + strlen(name) + 13 >= pathsize - 1) {
233        PyErr_SetString(ZipImportError, "path too long");
234        Py_DECREF(prefix);
235        return -1;
236    }
237
238    strcpy(path, PyBytes_AS_STRING(prefix));
239    Py_DECREF(prefix);
240    strcpy(path + len, name);
241    for (p = path + len; *p; p++) {
242        if (*p == '.')
243            *p = SEP;
244    }
245    len += strlen(name);
246    assert(len < INT_MAX);
247    return (int)len;
248}
249
250enum zi_module_info {
251    MI_ERROR,
252    MI_NOT_FOUND,
253    MI_MODULE,
254    MI_PACKAGE
255};
256
257/* Return some information about a module. */
258static enum zi_module_info
259get_module_info(ZipImporter *self, char *fullname)
260{
261    char *subname, path[MAXPATHLEN + 1];
262    int len;
263    struct st_zip_searchorder *zso;
264
265    subname = get_subname(fullname);
266
267    len = make_filename(self->prefix, subname, path, sizeof(path));
268    if (len < 0)
269        return MI_ERROR;
270
271    for (zso = zip_searchorder; *zso->suffix; zso++) {
272        strcpy(path + len, zso->suffix);
273        if (PyDict_GetItemString(self->files, path) != NULL) {
274            if (zso->type & IS_PACKAGE)
275                return MI_PACKAGE;
276            else
277                return MI_MODULE;
278        }
279    }
280    return MI_NOT_FOUND;
281}
282
283/* Check whether we can satisfy the import of the module named by
284   'fullname'. Return self if we can, None if we can't. */
285static PyObject *
286zipimporter_find_module(PyObject *obj, PyObject *args)
287{
288    ZipImporter *self = (ZipImporter *)obj;
289    PyObject *path = NULL;
290    char *fullname;
291    enum zi_module_info mi;
292
293    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
294                          &fullname, &path))
295        return NULL;
296
297    mi = get_module_info(self, fullname);
298    if (mi == MI_ERROR)
299        return NULL;
300    if (mi == MI_NOT_FOUND) {
301        Py_INCREF(Py_None);
302        return Py_None;
303    }
304    Py_INCREF(self);
305    return (PyObject *)self;
306}
307
308/* Load and return the module named by 'fullname'. */
309static PyObject *
310zipimporter_load_module(PyObject *obj, PyObject *args)
311{
312    ZipImporter *self = (ZipImporter *)obj;
313    PyObject *code = NULL, *mod, *dict;
314    char *fullname;
315    PyObject *modpath = NULL, *modpath_bytes;
316    int ispackage;
317
318    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
319                          &fullname))
320        return NULL;
321
322    code = get_module_code(self, fullname, &ispackage, &modpath);
323    if (code == NULL)
324        goto error;
325
326    mod = PyImport_AddModule(fullname);
327    if (mod == NULL)
328        goto error;
329    dict = PyModule_GetDict(mod);
330
331    /* mod.__loader__ = self */
332    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333        goto error;
334
335    if (ispackage) {
336        /* add __path__ to the module *before* the code gets
337           executed */
338        PyObject *pkgpath, *fullpath;
339        char *subname = get_subname(fullname);
340        int err;
341
342        fullpath = PyUnicode_FromFormat("%U%c%U%s",
343                                self->archive, SEP,
344                                self->prefix, subname);
345        if (fullpath == NULL)
346            goto error;
347
348        pkgpath = Py_BuildValue("[O]", fullpath);
349        Py_DECREF(fullpath);
350        if (pkgpath == NULL)
351            goto error;
352        err = PyDict_SetItemString(dict, "__path__", pkgpath);
353        Py_DECREF(pkgpath);
354        if (err != 0)
355            goto error;
356    }
357    modpath_bytes = PyUnicode_EncodeFSDefault(modpath);
358    if (modpath_bytes == NULL)
359        goto error;
360    mod = PyImport_ExecCodeModuleEx(fullname, code,
361                                    PyBytes_AS_STRING(modpath_bytes));
362    Py_DECREF(modpath_bytes);
363    Py_CLEAR(code);
364    if (mod == NULL)
365        goto error;
366
367    if (Py_VerboseFlag)
368        PySys_FormatStderr("import %s # loaded from Zip %U\n",
369                           fullname, modpath);
370    Py_DECREF(modpath);
371    return mod;
372error:
373    Py_XDECREF(code);
374    Py_XDECREF(modpath);
375    return NULL;
376}
377
378/* Return a string matching __file__ for the named module */
379static PyObject *
380zipimporter_get_filename(PyObject *obj, PyObject *args)
381{
382    ZipImporter *self = (ZipImporter *)obj;
383    PyObject *code;
384    char *fullname;
385    PyObject *modpath;
386    int ispackage;
387
388    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
389                          &fullname))
390        return NULL;
391
392    /* Deciding the filename requires working out where the code
393       would come from if the module was actually loaded */
394    code = get_module_code(self, fullname, &ispackage, &modpath);
395    if (code == NULL)
396        return NULL;
397    Py_DECREF(code); /* Only need the path info */
398
399    return modpath;
400}
401
402/* Return a bool signifying whether the module is a package or not. */
403static PyObject *
404zipimporter_is_package(PyObject *obj, PyObject *args)
405{
406    ZipImporter *self = (ZipImporter *)obj;
407    char *fullname;
408    enum zi_module_info mi;
409
410    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
411                          &fullname))
412        return NULL;
413
414    mi = get_module_info(self, fullname);
415    if (mi == MI_ERROR)
416        return NULL;
417    if (mi == MI_NOT_FOUND) {
418        PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
419        return NULL;
420    }
421    return PyBool_FromLong(mi == MI_PACKAGE);
422}
423
424static PyObject *
425zipimporter_get_data(PyObject *obj, PyObject *args)
426{
427    ZipImporter *self = (ZipImporter *)obj;
428    PyObject *pathobj, *key;
429    const Py_UNICODE *path;
430#ifdef ALTSEP
431    Py_UNICODE *p, buf[MAXPATHLEN + 1];
432#endif
433    Py_UNICODE *archive;
434    PyObject *toc_entry;
435    Py_ssize_t path_len, len;
436
437    if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
438        return NULL;
439
440    path_len = PyUnicode_GET_SIZE(pathobj);
441    path = PyUnicode_AS_UNICODE(pathobj);
442#ifdef ALTSEP
443    if (path_len >= MAXPATHLEN) {
444        PyErr_SetString(ZipImportError, "path too long");
445        return NULL;
446    }
447    Py_UNICODE_strcpy(buf, path);
448    for (p = buf; *p; p++) {
449        if (*p == ALTSEP)
450            *p = SEP;
451    }
452    path = buf;
453#endif
454    archive = PyUnicode_AS_UNICODE(self->archive);
455    len = PyUnicode_GET_SIZE(self->archive);
456    if ((size_t)len < Py_UNICODE_strlen(path) &&
457        Py_UNICODE_strncmp(path, archive, len) == 0 &&
458        path[len] == SEP) {
459        path += len + 1;
460        path_len -= len + 1;
461    }
462
463    key = PyUnicode_FromUnicode(path, path_len);
464    if (key == NULL)
465        return NULL;
466    toc_entry = PyDict_GetItem(self->files, key);
467    if (toc_entry == NULL) {
468        PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
469        Py_DECREF(key);
470        return NULL;
471    }
472    Py_DECREF(key);
473    return get_data(self->archive, toc_entry);
474}
475
476static PyObject *
477zipimporter_get_code(PyObject *obj, PyObject *args)
478{
479    ZipImporter *self = (ZipImporter *)obj;
480    char *fullname;
481
482    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
483        return NULL;
484
485    return get_module_code(self, fullname, NULL, NULL);
486}
487
488static PyObject *
489zipimporter_get_source(PyObject *obj, PyObject *args)
490{
491    ZipImporter *self = (ZipImporter *)obj;
492    PyObject *toc_entry;
493    char *fullname, *subname, path[MAXPATHLEN+1];
494    int len;
495    enum zi_module_info mi;
496
497    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
498        return NULL;
499
500    mi = get_module_info(self, fullname);
501    if (mi == MI_ERROR)
502        return NULL;
503    if (mi == MI_NOT_FOUND) {
504        PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
505        return NULL;
506    }
507    subname = get_subname(fullname);
508
509    len = make_filename(self->prefix, subname, path, sizeof(path));
510    if (len < 0)
511        return NULL;
512
513    if (mi == MI_PACKAGE) {
514        path[len] = SEP;
515        strcpy(path + len + 1, "__init__.py");
516    }
517    else
518        strcpy(path + len, ".py");
519
520    toc_entry = PyDict_GetItemString(self->files, path);
521    if (toc_entry != NULL) {
522        PyObject *res, *bytes;
523        bytes = get_data(self->archive, toc_entry);
524        if (bytes == NULL)
525            return NULL;
526        res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
527                                          PyBytes_GET_SIZE(bytes));
528        Py_DECREF(bytes);
529        return res;
530    }
531
532    /* we have the module, but no source */
533    Py_INCREF(Py_None);
534    return Py_None;
535}
536
537PyDoc_STRVAR(doc_find_module,
538"find_module(fullname, path=None) -> self or None.\n\
539\n\
540Search for a module specified by 'fullname'. 'fullname' must be the\n\
541fully qualified (dotted) module name. It returns the zipimporter\n\
542instance itself if the module was found, or None if it wasn't.\n\
543The optional 'path' argument is ignored -- it's there for compatibility\n\
544with the importer protocol.");
545
546PyDoc_STRVAR(doc_load_module,
547"load_module(fullname) -> module.\n\
548\n\
549Load the module specified by 'fullname'. 'fullname' must be the\n\
550fully qualified (dotted) module name. It returns the imported\n\
551module, or raises ZipImportError if it wasn't found.");
552
553PyDoc_STRVAR(doc_get_data,
554"get_data(pathname) -> string with file data.\n\
555\n\
556Return the data associated with 'pathname'. Raise IOError if\n\
557the file wasn't found.");
558
559PyDoc_STRVAR(doc_is_package,
560"is_package(fullname) -> bool.\n\
561\n\
562Return True if the module specified by fullname is a package.\n\
563Raise ZipImportError if the module couldn't be found.");
564
565PyDoc_STRVAR(doc_get_code,
566"get_code(fullname) -> code object.\n\
567\n\
568Return the code object for the specified module. Raise ZipImportError\n\
569if the module couldn't be found.");
570
571PyDoc_STRVAR(doc_get_source,
572"get_source(fullname) -> source string.\n\
573\n\
574Return the source code for the specified module. Raise ZipImportError\n\
575if the module couldn't be found, return None if the archive does\n\
576contain the module, but has no source for it.");
577
578
579PyDoc_STRVAR(doc_get_filename,
580"get_filename(fullname) -> filename string.\n\
581\n\
582Return the filename for the specified module.");
583
584static PyMethodDef zipimporter_methods[] = {
585    {"find_module", zipimporter_find_module, METH_VARARGS,
586     doc_find_module},
587    {"load_module", zipimporter_load_module, METH_VARARGS,
588     doc_load_module},
589    {"get_data", zipimporter_get_data, METH_VARARGS,
590     doc_get_data},
591    {"get_code", zipimporter_get_code, METH_VARARGS,
592     doc_get_code},
593    {"get_source", zipimporter_get_source, METH_VARARGS,
594     doc_get_source},
595    {"get_filename", zipimporter_get_filename, METH_VARARGS,
596     doc_get_filename},
597    {"is_package", zipimporter_is_package, METH_VARARGS,
598     doc_is_package},
599    {NULL,              NULL}   /* sentinel */
600};
601
602static PyMemberDef zipimporter_members[] = {
603    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
604    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
605    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
606    {NULL}
607};
608
609PyDoc_STRVAR(zipimporter_doc,
610"zipimporter(archivepath) -> zipimporter object\n\
611\n\
612Create a new zipimporter instance. 'archivepath' must be a path to\n\
613a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
614'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
615valid directory inside the archive.\n\
616\n\
617'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
618archive.\n\
619\n\
620The 'archive' attribute of zipimporter objects contains the name of the\n\
621zipfile targeted.");
622
623#define DEFERRED_ADDRESS(ADDR) 0
624
625static PyTypeObject ZipImporter_Type = {
626    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
627    "zipimport.zipimporter",
628    sizeof(ZipImporter),
629    0,                                          /* tp_itemsize */
630    (destructor)zipimporter_dealloc,            /* tp_dealloc */
631    0,                                          /* tp_print */
632    0,                                          /* tp_getattr */
633    0,                                          /* tp_setattr */
634    0,                                          /* tp_reserved */
635    (reprfunc)zipimporter_repr,                 /* tp_repr */
636    0,                                          /* tp_as_number */
637    0,                                          /* tp_as_sequence */
638    0,                                          /* tp_as_mapping */
639    0,                                          /* tp_hash */
640    0,                                          /* tp_call */
641    0,                                          /* tp_str */
642    PyObject_GenericGetAttr,                    /* tp_getattro */
643    0,                                          /* tp_setattro */
644    0,                                          /* tp_as_buffer */
645    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
646        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
647    zipimporter_doc,                            /* tp_doc */
648    zipimporter_traverse,                       /* tp_traverse */
649    0,                                          /* tp_clear */
650    0,                                          /* tp_richcompare */
651    0,                                          /* tp_weaklistoffset */
652    0,                                          /* tp_iter */
653    0,                                          /* tp_iternext */
654    zipimporter_methods,                        /* tp_methods */
655    zipimporter_members,                        /* tp_members */
656    0,                                          /* tp_getset */
657    0,                                          /* tp_base */
658    0,                                          /* tp_dict */
659    0,                                          /* tp_descr_get */
660    0,                                          /* tp_descr_set */
661    0,                                          /* tp_dictoffset */
662    (initproc)zipimporter_init,                 /* tp_init */
663    PyType_GenericAlloc,                        /* tp_alloc */
664    PyType_GenericNew,                          /* tp_new */
665    PyObject_GC_Del,                            /* tp_free */
666};
667
668
669/* implementation */
670
671/* Given a buffer, return the long that is represented by the first
672   4 bytes, encoded as little endian. This partially reimplements
673   marshal.c:r_long() */
674static long
675get_long(unsigned char *buf) {
676    long x;
677    x =  buf[0];
678    x |= (long)buf[1] <<  8;
679    x |= (long)buf[2] << 16;
680    x |= (long)buf[3] << 24;
681#if SIZEOF_LONG > 4
682    /* Sign extension for 64-bit machines */
683    x |= -(x & 0x80000000L);
684#endif
685    return x;
686}
687
688/*
689   read_directory(archive) -> files dict (new reference)
690
691   Given a path to a Zip archive, build a dict, mapping file names
692   (local to the archive, using SEP as a separator) to toc entries.
693
694   A toc_entry is a tuple:
695
696   (__file__,      # value to use for __file__, available for all files,
697                   # encoded to the filesystem encoding
698    compress,      # compression kind; 0 for uncompressed
699    data_size,     # size of compressed data on disk
700    file_size,     # size of decompressed data
701    file_offset,   # offset of file header from start of archive
702    time,          # mod time of file (in dos format)
703    date,          # mod data of file (in dos format)
704    crc,           # crc checksum of the data
705   )
706
707   Directories can be recognized by the trailing SEP in the name,
708   data_size and file_offset are 0.
709*/
710static PyObject *
711read_directory(PyObject *archive_obj)
712{
713    /* FIXME: work on Py_UNICODE* instead of char* */
714    PyObject *files = NULL;
715    FILE *fp;
716    unsigned short flags;
717    short compress, time, date, name_size;
718    long crc, data_size, file_size, header_size;
719    Py_ssize_t file_offset, header_position, header_offset;
720    long i, l, count;
721    size_t length;
722    Py_UNICODE path[MAXPATHLEN + 5];
723    char name[MAXPATHLEN + 5];
724    PyObject *nameobj = NULL;
725    char *p, endof_central_dir[22];
726    Py_ssize_t arc_offset;  /* Absolute offset to start of the zip-archive. */
727    PyObject *pathobj;
728    const char *charset;
729    int bootstrap;
730
731    if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
732        PyErr_SetString(PyExc_OverflowError,
733                        "Zip path name is too long");
734        return NULL;
735    }
736    Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
737
738    fp = _Py_fopen(archive_obj, "rb");
739    if (fp == NULL) {
740        if (!PyErr_Occurred())
741            PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj);
742        return NULL;
743    }
744
745    if (fseek(fp, -22, SEEK_END) == -1) {
746        fclose(fp);
747        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
748        return NULL;
749    }
750    header_position = ftell(fp);
751    if (fread(endof_central_dir, 1, 22, fp) != 22) {
752        fclose(fp);
753        PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj);
754        return NULL;
755    }
756    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
757        /* Bad: End of Central Dir signature */
758        fclose(fp);
759        PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj);
760        return NULL;
761    }
762
763    header_size = get_long((unsigned char *)endof_central_dir + 12);
764    header_offset = get_long((unsigned char *)endof_central_dir + 16);
765    arc_offset = header_position - header_offset - header_size;
766    header_offset += arc_offset;
767
768    files = PyDict_New();
769    if (files == NULL)
770        goto error;
771
772    length = Py_UNICODE_strlen(path);
773    path[length] = SEP;
774
775    /* Start of Central Directory */
776    count = 0;
777    for (;;) {
778        PyObject *t;
779        int err;
780
781        if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
782            goto fseek_error;
783        l = PyMarshal_ReadLongFromFile(fp);
784        if (l != 0x02014B50)
785            break;              /* Bad: Central Dir File Header */
786        if (fseek(fp, header_offset + 8, 0) == -1)
787            goto fseek_error;
788        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
789        compress = PyMarshal_ReadShortFromFile(fp);
790        time = PyMarshal_ReadShortFromFile(fp);
791        date = PyMarshal_ReadShortFromFile(fp);
792        crc = PyMarshal_ReadLongFromFile(fp);
793        data_size = PyMarshal_ReadLongFromFile(fp);
794        file_size = PyMarshal_ReadLongFromFile(fp);
795        name_size = PyMarshal_ReadShortFromFile(fp);
796        header_size = 46 + name_size +
797           PyMarshal_ReadShortFromFile(fp) +
798           PyMarshal_ReadShortFromFile(fp);
799        if (fseek(fp, header_offset + 42, 0) == -1)
800            goto fseek_error;
801        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
802        if (name_size > MAXPATHLEN)
803            name_size = MAXPATHLEN;
804
805        p = name;
806        for (i = 0; i < name_size; i++) {
807            *p = (char)getc(fp);
808            if (*p == '/')
809                *p = SEP;
810            p++;
811        }
812        *p = 0;         /* Add terminating null byte */
813        header_offset += header_size;
814
815        bootstrap = 0;
816        if (flags & 0x0800)
817            charset = "utf-8";
818        else if (!PyThreadState_GET()->interp->codecs_initialized) {
819            /* During bootstrap, we may need to load the encodings
820               package from a ZIP file. But the cp437 encoding is implemented
821               in Python in the encodings package.
822
823               Break out of this dependency by assuming that the path to
824               the encodings module is ASCII-only. */
825            charset = "ascii";
826            bootstrap = 1;
827        }
828        else
829            charset = "cp437";
830        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
831        if (nameobj == NULL) {
832            if (bootstrap)
833                PyErr_Format(PyExc_NotImplementedError,
834                    "bootstrap issue: python%i%i.zip contains non-ASCII "
835                    "filenames without the unicode flag",
836                    PY_MAJOR_VERSION, PY_MINOR_VERSION);
837            goto error;
838        }
839        Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
840
841        pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
842        if (pathobj == NULL)
843            goto error;
844        t = Py_BuildValue("Nhllnhhl", pathobj, compress, data_size,
845                          file_size, file_offset, time, date, crc);
846        if (t == NULL)
847            goto error;
848        err = PyDict_SetItem(files, nameobj, t);
849        Py_CLEAR(nameobj);
850        Py_DECREF(t);
851        if (err != 0)
852            goto error;
853        count++;
854    }
855    fclose(fp);
856    if (Py_VerboseFlag)
857        PySys_FormatStderr("# zipimport: found %ld names in %U\n",
858            count, archive_obj);
859    return files;
860fseek_error:
861    fclose(fp);
862    Py_XDECREF(files);
863    Py_XDECREF(nameobj);
864    PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
865    return NULL;
866error:
867    fclose(fp);
868    Py_XDECREF(files);
869    Py_XDECREF(nameobj);
870    return NULL;
871}
872
873/* Return the zlib.decompress function object, or NULL if zlib couldn't
874   be imported. The function is cached when found, so subsequent calls
875   don't import zlib again. */
876static PyObject *
877get_decompress_func(void)
878{
879    static int importing_zlib = 0;
880    PyObject *zlib;
881    PyObject *decompress;
882
883    if (importing_zlib != 0)
884        /* Someone has a zlib.py[co] in their Zip file;
885           let's avoid a stack overflow. */
886        return NULL;
887    importing_zlib = 1;
888    zlib = PyImport_ImportModuleNoBlock("zlib");
889    importing_zlib = 0;
890    if (zlib != NULL) {
891        decompress = PyObject_GetAttrString(zlib,
892                                            "decompress");
893        Py_DECREF(zlib);
894    }
895    else {
896        PyErr_Clear();
897        decompress = NULL;
898    }
899    if (Py_VerboseFlag)
900        PySys_WriteStderr("# zipimport: zlib %s\n",
901            zlib != NULL ? "available": "UNAVAILABLE");
902    return decompress;
903}
904
905/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
906   data as a new reference. */
907static PyObject *
908get_data(PyObject *archive, PyObject *toc_entry)
909{
910    PyObject *raw_data, *data = NULL, *decompress;
911    char *buf;
912    FILE *fp;
913    int err;
914    Py_ssize_t bytes_read = 0;
915    long l;
916    PyObject *datapath;
917    long compress, data_size, file_size, file_offset, bytes_size;
918    long time, date, crc;
919
920    if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
921                          &data_size, &file_size, &file_offset, &time,
922                          &date, &crc)) {
923        return NULL;
924    }
925
926    fp = _Py_fopen(archive, "rb");
927    if (!fp) {
928        if (!PyErr_Occurred())
929            PyErr_Format(PyExc_IOError,
930               "zipimport: can not open file %U", archive);
931        return NULL;
932    }
933
934    /* Check to make sure the local file header is correct */
935    if (fseek(fp, file_offset, 0) == -1) {
936        fclose(fp);
937        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
938        return NULL;
939    }
940
941    l = PyMarshal_ReadLongFromFile(fp);
942    if (l != 0x04034B50) {
943        /* Bad: Local File Header */
944        PyErr_Format(ZipImportError,
945                     "bad local file header in %U",
946                     archive);
947        fclose(fp);
948        return NULL;
949    }
950    if (fseek(fp, file_offset + 26, 0) == -1) {
951        fclose(fp);
952        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
953        return NULL;
954    }
955
956    l = 30 + PyMarshal_ReadShortFromFile(fp) +
957        PyMarshal_ReadShortFromFile(fp);        /* local header size */
958    file_offset += l;           /* Start of file data */
959
960    bytes_size = compress == 0 ? data_size : data_size + 1;
961    if (bytes_size == 0)
962        bytes_size++;
963    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
964
965    if (raw_data == NULL) {
966        fclose(fp);
967        return NULL;
968    }
969    buf = PyBytes_AsString(raw_data);
970
971    err = fseek(fp, file_offset, 0);
972    if (err == 0) {
973        bytes_read = fread(buf, 1, data_size, fp);
974    } else {
975        fclose(fp);
976        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
977        return NULL;
978    }
979    fclose(fp);
980    if (err || bytes_read != data_size) {
981        PyErr_SetString(PyExc_IOError,
982                        "zipimport: can't read data");
983        Py_DECREF(raw_data);
984        return NULL;
985    }
986
987    if (compress != 0) {
988        buf[data_size] = 'Z';  /* saw this in zipfile.py */
989        data_size++;
990    }
991    buf[data_size] = '\0';
992
993    if (compress == 0) {  /* data is not compressed */
994        data = PyBytes_FromStringAndSize(buf, data_size);
995        Py_DECREF(raw_data);
996        return data;
997    }
998
999    /* Decompress with zlib */
1000    decompress = get_decompress_func();
1001    if (decompress == NULL) {
1002        PyErr_SetString(ZipImportError,
1003                        "can't decompress data; "
1004                        "zlib not available");
1005        goto error;
1006    }
1007    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1008    Py_DECREF(decompress);
1009error:
1010    Py_DECREF(raw_data);
1011    return data;
1012}
1013
1014/* Lenient date/time comparison function. The precision of the mtime
1015   in the archive is lower than the mtime stored in a .pyc: we
1016   must allow a difference of at most one second. */
1017static int
1018eq_mtime(time_t t1, time_t t2)
1019{
1020    time_t d = t1 - t2;
1021    if (d < 0)
1022        d = -d;
1023    /* dostime only stores even seconds, so be lenient */
1024    return d <= 1;
1025}
1026
1027/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1028   and return the code object. Return None if it the magic word doesn't
1029   match (we do this instead of raising an exception as we fall back
1030   to .py if available and we don't want to mask other errors).
1031   Returns a new reference. */
1032static PyObject *
1033unmarshal_code(char *pathname, PyObject *data, time_t mtime)
1034{
1035    PyObject *code;
1036    char *buf = PyBytes_AsString(data);
1037    Py_ssize_t size = PyBytes_Size(data);
1038
1039    if (size <= 9) {
1040        PyErr_SetString(ZipImportError,
1041                        "bad pyc data");
1042        return NULL;
1043    }
1044
1045    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1046        if (Py_VerboseFlag)
1047            PySys_WriteStderr("# %s has bad magic\n",
1048                              pathname);
1049        Py_INCREF(Py_None);
1050        return Py_None;  /* signal caller to try alternative */
1051    }
1052
1053    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1054                                mtime)) {
1055        if (Py_VerboseFlag)
1056            PySys_WriteStderr("# %s has bad mtime\n",
1057                              pathname);
1058        Py_INCREF(Py_None);
1059        return Py_None;  /* signal caller to try alternative */
1060    }
1061
1062    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
1063    if (code == NULL)
1064        return NULL;
1065    if (!PyCode_Check(code)) {
1066        Py_DECREF(code);
1067        PyErr_Format(PyExc_TypeError,
1068             "compiled module %s is not a code object",
1069             pathname);
1070        return NULL;
1071    }
1072    return code;
1073}
1074
1075/* Replace any occurances of "\r\n?" in the input string with "\n".
1076   This converts DOS and Mac line endings to Unix line endings.
1077   Also append a trailing "\n" to be compatible with
1078   PyParser_SimpleParseFile(). Returns a new reference. */
1079static PyObject *
1080normalize_line_endings(PyObject *source)
1081{
1082    char *buf, *q, *p = PyBytes_AsString(source);
1083    PyObject *fixed_source;
1084    int len = 0;
1085
1086    if (!p) {
1087        return PyBytes_FromStringAndSize("\n\0", 2);
1088    }
1089
1090    /* one char extra for trailing \n and one for terminating \0 */
1091    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1092    if (buf == NULL) {
1093        PyErr_SetString(PyExc_MemoryError,
1094                        "zipimport: no memory to allocate "
1095                        "source buffer");
1096        return NULL;
1097    }
1098    /* replace "\r\n?" by "\n" */
1099    for (q = buf; *p != '\0'; p++) {
1100        if (*p == '\r') {
1101            *q++ = '\n';
1102            if (*(p + 1) == '\n')
1103                p++;
1104        }
1105        else
1106            *q++ = *p;
1107        len++;
1108    }
1109    *q++ = '\n';  /* add trailing \n */
1110    *q = '\0';
1111    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1112    PyMem_Free(buf);
1113    return fixed_source;
1114}
1115
1116/* Given a string buffer containing Python source code, compile it
1117   return and return a code object as a new reference. */
1118static PyObject *
1119compile_source(char *pathname, PyObject *source)
1120{
1121    PyObject *code, *fixed_source;
1122
1123    fixed_source = normalize_line_endings(source);
1124    if (fixed_source == NULL)
1125        return NULL;
1126
1127    code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1128                            Py_file_input);
1129    Py_DECREF(fixed_source);
1130    return code;
1131}
1132
1133/* Convert the date/time values found in the Zip archive to a value
1134   that's compatible with the time stamp stored in .pyc files. */
1135static time_t
1136parse_dostime(int dostime, int dosdate)
1137{
1138    struct tm stm;
1139
1140    memset((void *) &stm, '\0', sizeof(stm));
1141
1142    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1143    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1144    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1145    stm.tm_mday  =   dosdate        & 0x1f;
1146    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1147    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1148    stm.tm_isdst =   -1; /* wday/yday is ignored */
1149
1150    return mktime(&stm);
1151}
1152
1153/* Given a path to a .pyc or .pyo file in the archive, return the
1154   modification time of the matching .py file, or 0 if no source
1155   is available. */
1156static time_t
1157get_mtime_of_source(ZipImporter *self, char *path)
1158{
1159    PyObject *toc_entry;
1160    time_t mtime = 0;
1161    Py_ssize_t lastchar = strlen(path) - 1;
1162    char savechar = path[lastchar];
1163    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1164    toc_entry = PyDict_GetItemString(self->files, path);
1165    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1166        PyTuple_Size(toc_entry) == 8) {
1167        /* fetch the time stamp of the .py file for comparison
1168           with an embedded pyc time stamp */
1169        int time, date;
1170        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1171        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1172        mtime = parse_dostime(time, date);
1173    }
1174    path[lastchar] = savechar;
1175    return mtime;
1176}
1177
1178/* Return the code object for the module named by 'fullname' from the
1179   Zip archive as a new reference. */
1180static PyObject *
1181get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1182                   time_t mtime, PyObject *toc_entry)
1183{
1184    PyObject *data, *code;
1185    PyObject *modpath;
1186
1187    data = get_data(self->archive, toc_entry);
1188    if (data == NULL)
1189        return NULL;
1190
1191    modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0));
1192    if (modpath == NULL) {
1193        Py_DECREF(data);
1194        return NULL;
1195    }
1196
1197    if (isbytecode)
1198        code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime);
1199    else
1200        code = compile_source(PyBytes_AS_STRING(modpath), data);
1201    Py_DECREF(modpath);
1202    Py_DECREF(data);
1203    return code;
1204}
1205
1206/* Get the code object associated with the module specified by
1207   'fullname'. */
1208static PyObject *
1209get_module_code(ZipImporter *self, char *fullname,
1210                int *p_ispackage, PyObject **p_modpath)
1211{
1212    PyObject *toc_entry;
1213    char *subname, path[MAXPATHLEN + 1];
1214    int len;
1215    struct st_zip_searchorder *zso;
1216
1217    subname = get_subname(fullname);
1218
1219    len = make_filename(self->prefix, subname, path, sizeof(path));
1220    if (len < 0)
1221        return NULL;
1222
1223    for (zso = zip_searchorder; *zso->suffix; zso++) {
1224        PyObject *code = NULL;
1225
1226        strcpy(path + len, zso->suffix);
1227        if (Py_VerboseFlag > 1)
1228            PySys_FormatStderr("# trying %U%c%s\n",
1229                               self->archive, (int)SEP, path);
1230        toc_entry = PyDict_GetItemString(self->files, path);
1231        if (toc_entry != NULL) {
1232            time_t mtime = 0;
1233            int ispackage = zso->type & IS_PACKAGE;
1234            int isbytecode = zso->type & IS_BYTECODE;
1235
1236            if (isbytecode)
1237                mtime = get_mtime_of_source(self, path);
1238            if (p_ispackage != NULL)
1239                *p_ispackage = ispackage;
1240            code = get_code_from_data(self, ispackage,
1241                                      isbytecode, mtime,
1242                                      toc_entry);
1243            if (code == Py_None) {
1244                /* bad magic number or non-matching mtime
1245                   in byte code, try next */
1246                Py_DECREF(code);
1247                continue;
1248            }
1249            if (code != NULL && p_modpath != NULL) {
1250                *p_modpath = PyTuple_GetItem(toc_entry, 0);
1251                Py_INCREF(*p_modpath);
1252            }
1253            return code;
1254        }
1255    }
1256    PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
1257    return NULL;
1258}
1259
1260
1261/* Module init */
1262
1263PyDoc_STRVAR(zipimport_doc,
1264"zipimport provides support for importing Python modules from Zip archives.\n\
1265\n\
1266This module exports three objects:\n\
1267- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1268- ZipImportError: exception raised by zipimporter objects. It's a\n\
1269  subclass of ImportError, so it can be caught as ImportError, too.\n\
1270- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1271  info dicts, as used in zipimporter._files.\n\
1272\n\
1273It is usually not needed to use the zipimport module explicitly; it is\n\
1274used by the builtin import mechanism for sys.path items that are paths\n\
1275to Zip archives.");
1276
1277static struct PyModuleDef zipimportmodule = {
1278    PyModuleDef_HEAD_INIT,
1279    "zipimport",
1280    zipimport_doc,
1281    -1,
1282    NULL,
1283    NULL,
1284    NULL,
1285    NULL,
1286    NULL
1287};
1288
1289PyMODINIT_FUNC
1290PyInit_zipimport(void)
1291{
1292    PyObject *mod;
1293
1294    if (PyType_Ready(&ZipImporter_Type) < 0)
1295        return NULL;
1296
1297    /* Correct directory separator */
1298    zip_searchorder[0].suffix[0] = SEP;
1299    zip_searchorder[1].suffix[0] = SEP;
1300    zip_searchorder[2].suffix[0] = SEP;
1301    if (Py_OptimizeFlag) {
1302        /* Reverse *.pyc and *.pyo */
1303        struct st_zip_searchorder tmp;
1304        tmp = zip_searchorder[0];
1305        zip_searchorder[0] = zip_searchorder[1];
1306        zip_searchorder[1] = tmp;
1307        tmp = zip_searchorder[3];
1308        zip_searchorder[3] = zip_searchorder[4];
1309        zip_searchorder[4] = tmp;
1310    }
1311
1312    mod = PyModule_Create(&zipimportmodule);
1313    if (mod == NULL)
1314        return NULL;
1315
1316    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1317                                        PyExc_ImportError, NULL);
1318    if (ZipImportError == NULL)
1319        return NULL;
1320
1321    Py_INCREF(ZipImportError);
1322    if (PyModule_AddObject(mod, "ZipImportError",
1323                           ZipImportError) < 0)
1324        return NULL;
1325
1326    Py_INCREF(&ZipImporter_Type);
1327    if (PyModule_AddObject(mod, "zipimporter",
1328                           (PyObject *)&ZipImporter_Type) < 0)
1329        return NULL;
1330
1331    zip_directory_cache = PyDict_New();
1332    if (zip_directory_cache == NULL)
1333        return NULL;
1334    Py_INCREF(zip_directory_cache);
1335    if (PyModule_AddObject(mod, "_zip_directory_cache",
1336                           zip_directory_cache) < 0)
1337        return NULL;
1338    return mod;
1339}
1340