zipimport.c revision bd206e27a49dd4cc94ee264c706614190ce0eb3c
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive,
39                           decoded from the filesystem encoding */
40    PyObject *prefix;   /* file prefix: "a/sub/directory/",
41                           encoded to the filesystem encoding */
42    PyObject *files;    /* dict with file info {path: toc_entry} */
43};
44
45static PyObject *ZipImportError;
46/* read_directory() cache */
47static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
50static PyObject *read_directory(PyObject *archive);
51static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
52static PyObject *get_module_code(ZipImporter *self, char *fullname,
53                                 int *p_ispackage, PyObject **p_modpath);
54
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60   Split the "subdirectory" from the Zip archive path, lookup a matching
61   entry in sys.path_importer_cache, fetch the file directory from there
62   if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
66    PyObject *pathobj, *files;
67    Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
68    Py_ssize_t len;
69
70    if (!_PyArg_NoKeywords("zipimporter()", kwds))
71        return -1;
72
73    if (!PyArg_ParseTuple(args, "O&:zipimporter",
74                          PyUnicode_FSDecoder, &pathobj))
75        return -1;
76
77    /* copy path to buf */
78    len = PyUnicode_GET_SIZE(pathobj);
79    if (len == 0) {
80        PyErr_SetString(ZipImportError, "archive path is empty");
81        goto error;
82    }
83    if (len >= MAXPATHLEN) {
84        PyErr_SetString(ZipImportError,
85                        "archive path too long");
86        goto error;
87    }
88    Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
89
90#ifdef ALTSEP
91    for (p = buf; *p; p++) {
92        if (*p == ALTSEP)
93            *p = SEP;
94    }
95#endif
96
97    path = NULL;
98    prefix = NULL;
99    for (;;) {
100        struct stat statbuf;
101        int rv;
102
103        if (pathobj == NULL) {
104            pathobj = PyUnicode_FromUnicode(buf, len);
105            if (pathobj == NULL)
106                goto error;
107        }
108        rv = _Py_stat(pathobj, &statbuf);
109        if (rv == 0) {
110            /* it exists */
111            if (S_ISREG(statbuf.st_mode))
112                /* it's a file */
113                path = buf;
114            break;
115        }
116        else if (PyErr_Occurred())
117            goto error;
118        /* back up one path element */
119        p = Py_UNICODE_strrchr(buf, SEP);
120        if (prefix != NULL)
121            *prefix = SEP;
122        if (p == NULL)
123            break;
124        *p = '\0';
125        len = p - buf;
126        prefix = p;
127        Py_CLEAR(pathobj);
128    }
129    if (path == NULL) {
130        PyErr_SetString(ZipImportError, "not a Zip file");
131        goto error;
132    }
133
134    files = PyDict_GetItem(zip_directory_cache, pathobj);
135    if (files == NULL) {
136        files = read_directory(pathobj);
137        if (files == NULL)
138            goto error;
139        if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
140            goto error;
141    }
142    else
143        Py_INCREF(files);
144    self->files = files;
145
146    self->archive = pathobj;
147    pathobj = NULL;
148
149    if (prefix != NULL) {
150        prefix++;
151        len = Py_UNICODE_strlen(prefix);
152        if (prefix[len-1] != SEP) {
153            /* add trailing SEP */
154            prefix[len] = SEP;
155            prefix[len + 1] = '\0';
156            len++;
157        }
158    }
159    else
160        len = 0;
161    self->prefix = PyUnicode_FromUnicode(prefix, len);
162    if (self->prefix == NULL)
163        goto error;
164
165    return 0;
166
167error:
168    Py_XDECREF(pathobj);
169    return -1;
170}
171
172/* GC support. */
173static int
174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
175{
176    ZipImporter *self = (ZipImporter *)obj;
177    Py_VISIT(self->files);
178    return 0;
179}
180
181static void
182zipimporter_dealloc(ZipImporter *self)
183{
184    PyObject_GC_UnTrack(self);
185    Py_XDECREF(self->archive);
186    Py_XDECREF(self->prefix);
187    Py_XDECREF(self->files);
188    Py_TYPE(self)->tp_free((PyObject *)self);
189}
190
191static PyObject *
192zipimporter_repr(ZipImporter *self)
193{
194    if (self->archive == NULL)
195        return PyUnicode_FromString("<zipimporter object \"???\">");
196    else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
197        return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
198                                    self->archive, SEP, self->prefix);
199    else
200        return PyUnicode_FromFormat("<zipimporter object \"%U\">",
201                                    self->archive);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208    char *subname = strrchr(fullname, '.');
209    if (subname == NULL)
210        subname = fullname;
211    else
212        subname++;
213    return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217   archive (without extension) to the path buffer. Return the
218   length of the resulting string. */
219static int
220make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize)
221{
222    size_t len;
223    char *p;
224    PyObject *prefix;
225
226    prefix = PyUnicode_EncodeFSDefault(prefix_obj);
227    if (prefix == NULL)
228        return -1;
229    len = PyBytes_GET_SIZE(prefix);
230
231    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
232    if (len + strlen(name) + 13 >= pathsize - 1) {
233        PyErr_SetString(ZipImportError, "path too long");
234        Py_DECREF(prefix);
235        return -1;
236    }
237
238    strcpy(path, PyBytes_AS_STRING(prefix));
239    Py_DECREF(prefix);
240    strcpy(path + len, name);
241    for (p = path + len; *p; p++) {
242        if (*p == '.')
243            *p = SEP;
244    }
245    len += strlen(name);
246    assert(len < INT_MAX);
247    return (int)len;
248}
249
250enum zi_module_info {
251    MI_ERROR,
252    MI_NOT_FOUND,
253    MI_MODULE,
254    MI_PACKAGE
255};
256
257/* Return some information about a module. */
258static enum zi_module_info
259get_module_info(ZipImporter *self, char *fullname)
260{
261    char *subname, path[MAXPATHLEN + 1];
262    int len;
263    struct st_zip_searchorder *zso;
264
265    subname = get_subname(fullname);
266
267    len = make_filename(self->prefix, subname, path, sizeof(path));
268    if (len < 0)
269        return MI_ERROR;
270
271    for (zso = zip_searchorder; *zso->suffix; zso++) {
272        strcpy(path + len, zso->suffix);
273        if (PyDict_GetItemString(self->files, path) != NULL) {
274            if (zso->type & IS_PACKAGE)
275                return MI_PACKAGE;
276            else
277                return MI_MODULE;
278        }
279    }
280    return MI_NOT_FOUND;
281}
282
283/* Check whether we can satisfy the import of the module named by
284   'fullname'. Return self if we can, None if we can't. */
285static PyObject *
286zipimporter_find_module(PyObject *obj, PyObject *args)
287{
288    ZipImporter *self = (ZipImporter *)obj;
289    PyObject *path = NULL;
290    char *fullname;
291    enum zi_module_info mi;
292
293    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
294                          &fullname, &path))
295        return NULL;
296
297    mi = get_module_info(self, fullname);
298    if (mi == MI_ERROR)
299        return NULL;
300    if (mi == MI_NOT_FOUND) {
301        Py_INCREF(Py_None);
302        return Py_None;
303    }
304    Py_INCREF(self);
305    return (PyObject *)self;
306}
307
308/* Load and return the module named by 'fullname'. */
309static PyObject *
310zipimporter_load_module(PyObject *obj, PyObject *args)
311{
312    ZipImporter *self = (ZipImporter *)obj;
313    PyObject *code = NULL, *mod, *dict;
314    char *fullname;
315    PyObject *modpath = NULL, *modpath_bytes;
316    int ispackage;
317
318    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
319                          &fullname))
320        return NULL;
321
322    code = get_module_code(self, fullname, &ispackage, &modpath);
323    if (code == NULL)
324        goto error;
325
326    mod = PyImport_AddModule(fullname);
327    if (mod == NULL)
328        goto error;
329    dict = PyModule_GetDict(mod);
330
331    /* mod.__loader__ = self */
332    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333        goto error;
334
335    if (ispackage) {
336        /* add __path__ to the module *before* the code gets
337           executed */
338        PyObject *pkgpath, *fullpath;
339        char *subname = get_subname(fullname);
340        int err;
341
342        fullpath = PyUnicode_FromFormat("%U%c%U%s",
343                                self->archive, SEP,
344                                self->prefix, subname);
345        if (fullpath == NULL)
346            goto error;
347
348        pkgpath = Py_BuildValue("[O]", fullpath);
349        Py_DECREF(fullpath);
350        if (pkgpath == NULL)
351            goto error;
352        err = PyDict_SetItemString(dict, "__path__", pkgpath);
353        Py_DECREF(pkgpath);
354        if (err != 0)
355            goto error;
356    }
357    modpath_bytes = PyUnicode_EncodeFSDefault(modpath);
358    if (modpath_bytes == NULL)
359        goto error;
360    mod = PyImport_ExecCodeModuleEx(fullname, code,
361                                    PyBytes_AS_STRING(modpath_bytes));
362    Py_DECREF(modpath_bytes);
363    Py_CLEAR(code);
364    if (mod == NULL)
365        goto error;
366
367    if (Py_VerboseFlag)
368        PySys_FormatStderr("import %s # loaded from Zip %U\n",
369                           fullname, modpath);
370    Py_DECREF(modpath);
371    return mod;
372error:
373    Py_XDECREF(code);
374    Py_XDECREF(modpath);
375    return NULL;
376}
377
378/* Return a string matching __file__ for the named module */
379static PyObject *
380zipimporter_get_filename(PyObject *obj, PyObject *args)
381{
382    ZipImporter *self = (ZipImporter *)obj;
383    PyObject *code;
384    char *fullname;
385    PyObject *modpath;
386    int ispackage;
387
388    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
389                          &fullname))
390        return NULL;
391
392    /* Deciding the filename requires working out where the code
393       would come from if the module was actually loaded */
394    code = get_module_code(self, fullname, &ispackage, &modpath);
395    if (code == NULL)
396        return NULL;
397    Py_DECREF(code); /* Only need the path info */
398
399    return modpath;
400}
401
402/* Return a bool signifying whether the module is a package or not. */
403static PyObject *
404zipimporter_is_package(PyObject *obj, PyObject *args)
405{
406    ZipImporter *self = (ZipImporter *)obj;
407    char *fullname;
408    enum zi_module_info mi;
409
410    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
411                          &fullname))
412        return NULL;
413
414    mi = get_module_info(self, fullname);
415    if (mi == MI_ERROR)
416        return NULL;
417    if (mi == MI_NOT_FOUND) {
418        PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
419        return NULL;
420    }
421    return PyBool_FromLong(mi == MI_PACKAGE);
422}
423
424static PyObject *
425zipimporter_get_data(PyObject *obj, PyObject *args)
426{
427    ZipImporter *self = (ZipImporter *)obj;
428    PyObject *pathobj, *key;
429    const Py_UNICODE *path;
430#ifdef ALTSEP
431    Py_UNICODE *p, buf[MAXPATHLEN + 1];
432#endif
433    Py_UNICODE *archive;
434    PyObject *toc_entry;
435    Py_ssize_t path_len, len;
436
437    if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
438        return NULL;
439
440    path_len = PyUnicode_GET_SIZE(pathobj);
441    path = PyUnicode_AS_UNICODE(pathobj);
442#ifdef ALTSEP
443    if (path_len >= MAXPATHLEN) {
444        PyErr_SetString(ZipImportError, "path too long");
445        return NULL;
446    }
447    Py_UNICODE_strcpy(buf, path);
448    for (p = buf; *p; p++) {
449        if (*p == ALTSEP)
450            *p = SEP;
451    }
452    path = buf;
453#endif
454    archive = PyUnicode_AS_UNICODE(self->archive);
455    len = PyUnicode_GET_SIZE(self->archive);
456    if ((size_t)len < Py_UNICODE_strlen(path) &&
457        Py_UNICODE_strncmp(path, archive, len) == 0 &&
458        path[len] == SEP) {
459        path += len + 1;
460        path_len -= len + 1;
461    }
462
463    key = PyUnicode_FromUnicode(path, path_len);
464    if (key == NULL)
465        return NULL;
466    toc_entry = PyDict_GetItem(self->files, key);
467    if (toc_entry == NULL) {
468        PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
469        Py_DECREF(key);
470        return NULL;
471    }
472    Py_DECREF(key);
473    return get_data(self->archive, toc_entry);
474}
475
476static PyObject *
477zipimporter_get_code(PyObject *obj, PyObject *args)
478{
479    ZipImporter *self = (ZipImporter *)obj;
480    char *fullname;
481
482    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
483        return NULL;
484
485    return get_module_code(self, fullname, NULL, NULL);
486}
487
488static PyObject *
489zipimporter_get_source(PyObject *obj, PyObject *args)
490{
491    ZipImporter *self = (ZipImporter *)obj;
492    PyObject *toc_entry;
493    char *fullname, *subname, path[MAXPATHLEN+1];
494    int len;
495    enum zi_module_info mi;
496
497    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
498        return NULL;
499
500    mi = get_module_info(self, fullname);
501    if (mi == MI_ERROR)
502        return NULL;
503    if (mi == MI_NOT_FOUND) {
504        PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
505        return NULL;
506    }
507    subname = get_subname(fullname);
508
509    len = make_filename(self->prefix, subname, path, sizeof(path));
510    if (len < 0)
511        return NULL;
512
513    if (mi == MI_PACKAGE) {
514        path[len] = SEP;
515        strcpy(path + len + 1, "__init__.py");
516    }
517    else
518        strcpy(path + len, ".py");
519
520    toc_entry = PyDict_GetItemString(self->files, path);
521    if (toc_entry != NULL) {
522        PyObject *res, *bytes;
523        bytes = get_data(self->archive, toc_entry);
524        if (bytes == NULL)
525            return NULL;
526        res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
527                                          PyBytes_GET_SIZE(bytes));
528        Py_DECREF(bytes);
529        return res;
530    }
531
532    /* we have the module, but no source */
533    Py_INCREF(Py_None);
534    return Py_None;
535}
536
537PyDoc_STRVAR(doc_find_module,
538"find_module(fullname, path=None) -> self or None.\n\
539\n\
540Search for a module specified by 'fullname'. 'fullname' must be the\n\
541fully qualified (dotted) module name. It returns the zipimporter\n\
542instance itself if the module was found, or None if it wasn't.\n\
543The optional 'path' argument is ignored -- it's there for compatibility\n\
544with the importer protocol.");
545
546PyDoc_STRVAR(doc_load_module,
547"load_module(fullname) -> module.\n\
548\n\
549Load the module specified by 'fullname'. 'fullname' must be the\n\
550fully qualified (dotted) module name. It returns the imported\n\
551module, or raises ZipImportError if it wasn't found.");
552
553PyDoc_STRVAR(doc_get_data,
554"get_data(pathname) -> string with file data.\n\
555\n\
556Return the data associated with 'pathname'. Raise IOError if\n\
557the file wasn't found.");
558
559PyDoc_STRVAR(doc_is_package,
560"is_package(fullname) -> bool.\n\
561\n\
562Return True if the module specified by fullname is a package.\n\
563Raise ZipImportError if the module couldn't be found.");
564
565PyDoc_STRVAR(doc_get_code,
566"get_code(fullname) -> code object.\n\
567\n\
568Return the code object for the specified module. Raise ZipImportError\n\
569if the module couldn't be found.");
570
571PyDoc_STRVAR(doc_get_source,
572"get_source(fullname) -> source string.\n\
573\n\
574Return the source code for the specified module. Raise ZipImportError\n\
575if the module couldn't be found, return None if the archive does\n\
576contain the module, but has no source for it.");
577
578
579PyDoc_STRVAR(doc_get_filename,
580"get_filename(fullname) -> filename string.\n\
581\n\
582Return the filename for the specified module.");
583
584static PyMethodDef zipimporter_methods[] = {
585    {"find_module", zipimporter_find_module, METH_VARARGS,
586     doc_find_module},
587    {"load_module", zipimporter_load_module, METH_VARARGS,
588     doc_load_module},
589    {"get_data", zipimporter_get_data, METH_VARARGS,
590     doc_get_data},
591    {"get_code", zipimporter_get_code, METH_VARARGS,
592     doc_get_code},
593    {"get_source", zipimporter_get_source, METH_VARARGS,
594     doc_get_source},
595    {"get_filename", zipimporter_get_filename, METH_VARARGS,
596     doc_get_filename},
597    {"is_package", zipimporter_is_package, METH_VARARGS,
598     doc_is_package},
599    {NULL,              NULL}   /* sentinel */
600};
601
602static PyMemberDef zipimporter_members[] = {
603    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
604    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
605    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
606    {NULL}
607};
608
609PyDoc_STRVAR(zipimporter_doc,
610"zipimporter(archivepath) -> zipimporter object\n\
611\n\
612Create a new zipimporter instance. 'archivepath' must be a path to\n\
613a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
614'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
615valid directory inside the archive.\n\
616\n\
617'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
618archive.\n\
619\n\
620The 'archive' attribute of zipimporter objects contains the name of the\n\
621zipfile targeted.");
622
623#define DEFERRED_ADDRESS(ADDR) 0
624
625static PyTypeObject ZipImporter_Type = {
626    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
627    "zipimport.zipimporter",
628    sizeof(ZipImporter),
629    0,                                          /* tp_itemsize */
630    (destructor)zipimporter_dealloc,            /* tp_dealloc */
631    0,                                          /* tp_print */
632    0,                                          /* tp_getattr */
633    0,                                          /* tp_setattr */
634    0,                                          /* tp_reserved */
635    (reprfunc)zipimporter_repr,                 /* tp_repr */
636    0,                                          /* tp_as_number */
637    0,                                          /* tp_as_sequence */
638    0,                                          /* tp_as_mapping */
639    0,                                          /* tp_hash */
640    0,                                          /* tp_call */
641    0,                                          /* tp_str */
642    PyObject_GenericGetAttr,                    /* tp_getattro */
643    0,                                          /* tp_setattro */
644    0,                                          /* tp_as_buffer */
645    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
646        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
647    zipimporter_doc,                            /* tp_doc */
648    zipimporter_traverse,                       /* tp_traverse */
649    0,                                          /* tp_clear */
650    0,                                          /* tp_richcompare */
651    0,                                          /* tp_weaklistoffset */
652    0,                                          /* tp_iter */
653    0,                                          /* tp_iternext */
654    zipimporter_methods,                        /* tp_methods */
655    zipimporter_members,                        /* tp_members */
656    0,                                          /* tp_getset */
657    0,                                          /* tp_base */
658    0,                                          /* tp_dict */
659    0,                                          /* tp_descr_get */
660    0,                                          /* tp_descr_set */
661    0,                                          /* tp_dictoffset */
662    (initproc)zipimporter_init,                 /* tp_init */
663    PyType_GenericAlloc,                        /* tp_alloc */
664    PyType_GenericNew,                          /* tp_new */
665    PyObject_GC_Del,                            /* tp_free */
666};
667
668
669/* implementation */
670
671/* Given a buffer, return the long that is represented by the first
672   4 bytes, encoded as little endian. This partially reimplements
673   marshal.c:r_long() */
674static long
675get_long(unsigned char *buf) {
676    long x;
677    x =  buf[0];
678    x |= (long)buf[1] <<  8;
679    x |= (long)buf[2] << 16;
680    x |= (long)buf[3] << 24;
681#if SIZEOF_LONG > 4
682    /* Sign extension for 64-bit machines */
683    x |= -(x & 0x80000000L);
684#endif
685    return x;
686}
687
688/*
689   read_directory(archive) -> files dict (new reference)
690
691   Given a path to a Zip archive, build a dict, mapping file names
692   (local to the archive, using SEP as a separator) to toc entries.
693
694   A toc_entry is a tuple:
695
696   (__file__,      # value to use for __file__, available for all files,
697                   # encoded to the filesystem encoding
698    compress,      # compression kind; 0 for uncompressed
699    data_size,     # size of compressed data on disk
700    file_size,     # size of decompressed data
701    file_offset,   # offset of file header from start of archive
702    time,          # mod time of file (in dos format)
703    date,          # mod data of file (in dos format)
704    crc,           # crc checksum of the data
705   )
706
707   Directories can be recognized by the trailing SEP in the name,
708   data_size and file_offset are 0.
709*/
710static PyObject *
711read_directory(PyObject *archive_obj)
712{
713    /* FIXME: work on Py_UNICODE* instead of char* */
714    PyObject *files = NULL;
715    FILE *fp;
716    unsigned short flags;
717    long compress, crc, data_size, file_size, file_offset, date, time;
718    long header_offset, name_size, header_size, header_position;
719    long i, l, count;
720    size_t length;
721    Py_UNICODE path[MAXPATHLEN + 5];
722    char name[MAXPATHLEN + 5];
723    PyObject *nameobj = NULL;
724    char *p, endof_central_dir[22];
725    long arc_offset; /* offset from beginning of file to start of zip-archive */
726    PyObject *pathobj;
727    const char *charset;
728    int bootstrap;
729
730    if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
731        PyErr_SetString(PyExc_OverflowError,
732                        "Zip path name is too long");
733        return NULL;
734    }
735    Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
736
737    fp = _Py_fopen(archive_obj, "rb");
738    if (fp == NULL) {
739        if (!PyErr_Occurred())
740            PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj);
741        return NULL;
742    }
743    fseek(fp, -22, SEEK_END);
744    header_position = ftell(fp);
745    if (fread(endof_central_dir, 1, 22, fp) != 22) {
746        fclose(fp);
747        PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj);
748        return NULL;
749    }
750    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
751        /* Bad: End of Central Dir signature */
752        fclose(fp);
753        PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj);
754        return NULL;
755    }
756
757    header_size = get_long((unsigned char *)endof_central_dir + 12);
758    header_offset = get_long((unsigned char *)endof_central_dir + 16);
759    arc_offset = header_position - header_offset - header_size;
760    header_offset += arc_offset;
761
762    files = PyDict_New();
763    if (files == NULL)
764        goto error;
765
766    length = Py_UNICODE_strlen(path);
767    path[length] = SEP;
768
769    /* Start of Central Directory */
770    count = 0;
771    for (;;) {
772        PyObject *t;
773        int err;
774
775        fseek(fp, header_offset, 0);  /* Start of file header */
776        l = PyMarshal_ReadLongFromFile(fp);
777        if (l != 0x02014B50)
778            break;              /* Bad: Central Dir File Header */
779        fseek(fp, header_offset + 8, 0);
780        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
781        compress = PyMarshal_ReadShortFromFile(fp);
782        time = PyMarshal_ReadShortFromFile(fp);
783        date = PyMarshal_ReadShortFromFile(fp);
784        crc = PyMarshal_ReadLongFromFile(fp);
785        data_size = PyMarshal_ReadLongFromFile(fp);
786        file_size = PyMarshal_ReadLongFromFile(fp);
787        name_size = PyMarshal_ReadShortFromFile(fp);
788        header_size = 46 + name_size +
789           PyMarshal_ReadShortFromFile(fp) +
790           PyMarshal_ReadShortFromFile(fp);
791        fseek(fp, header_offset + 42, 0);
792        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
793        if (name_size > MAXPATHLEN)
794            name_size = MAXPATHLEN;
795
796        p = name;
797        for (i = 0; i < name_size; i++) {
798            *p = (char)getc(fp);
799            if (*p == '/')
800                *p = SEP;
801            p++;
802        }
803        *p = 0;         /* Add terminating null byte */
804        header_offset += header_size;
805
806        bootstrap = 0;
807        if (flags & 0x0800)
808            charset = "utf-8";
809        else if (!PyThreadState_GET()->interp->codecs_initialized) {
810            /* During bootstrap, we may need to load the encodings
811               package from a ZIP file. But the cp437 encoding is implemented
812               in Python in the encodings package.
813
814               Break out of this dependency by assuming that the path to
815               the encodings module is ASCII-only. */
816            charset = "ascii";
817            bootstrap = 1;
818        }
819        else
820            charset = "cp437";
821        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
822        if (nameobj == NULL) {
823            if (bootstrap)
824                PyErr_Format(PyExc_NotImplementedError,
825                    "bootstrap issue: python%i%i.zip contains non-ASCII "
826                    "filenames without the unicode flag",
827                    PY_MAJOR_VERSION, PY_MINOR_VERSION);
828            goto error;
829        }
830        Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
831
832        pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
833        if (pathobj == NULL)
834            goto error;
835        t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
836                          file_size, file_offset, time, date, crc);
837        if (t == NULL)
838            goto error;
839        err = PyDict_SetItem(files, nameobj, t);
840        Py_CLEAR(nameobj);
841        Py_DECREF(t);
842        if (err != 0)
843            goto error;
844        count++;
845    }
846    fclose(fp);
847    if (Py_VerboseFlag)
848        PySys_FormatStderr("# zipimport: found %ld names in %U\n",
849            count, archive_obj);
850    return files;
851error:
852    fclose(fp);
853    Py_XDECREF(files);
854    Py_XDECREF(nameobj);
855    return NULL;
856}
857
858/* Return the zlib.decompress function object, or NULL if zlib couldn't
859   be imported. The function is cached when found, so subsequent calls
860   don't import zlib again. */
861static PyObject *
862get_decompress_func(void)
863{
864    static int importing_zlib = 0;
865    PyObject *zlib;
866    PyObject *decompress;
867
868    if (importing_zlib != 0)
869        /* Someone has a zlib.py[co] in their Zip file;
870           let's avoid a stack overflow. */
871        return NULL;
872    importing_zlib = 1;
873    zlib = PyImport_ImportModuleNoBlock("zlib");
874    importing_zlib = 0;
875    if (zlib != NULL) {
876        decompress = PyObject_GetAttrString(zlib,
877                                            "decompress");
878        Py_DECREF(zlib);
879    }
880    else {
881        PyErr_Clear();
882        decompress = NULL;
883    }
884    if (Py_VerboseFlag)
885        PySys_WriteStderr("# zipimport: zlib %s\n",
886            zlib != NULL ? "available": "UNAVAILABLE");
887    return decompress;
888}
889
890/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
891   data as a new reference. */
892static PyObject *
893get_data(PyObject *archive, PyObject *toc_entry)
894{
895    PyObject *raw_data, *data = NULL, *decompress;
896    char *buf;
897    FILE *fp;
898    int err;
899    Py_ssize_t bytes_read = 0;
900    long l;
901    PyObject *datapath;
902    long compress, data_size, file_size, file_offset, bytes_size;
903    long time, date, crc;
904
905    if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
906                          &data_size, &file_size, &file_offset, &time,
907                          &date, &crc)) {
908        return NULL;
909    }
910
911    fp = _Py_fopen(archive, "rb");
912    if (!fp) {
913        if (!PyErr_Occurred())
914            PyErr_Format(PyExc_IOError,
915               "zipimport: can not open file %U", archive);
916        return NULL;
917    }
918
919    /* Check to make sure the local file header is correct */
920    fseek(fp, file_offset, 0);
921    l = PyMarshal_ReadLongFromFile(fp);
922    if (l != 0x04034B50) {
923        /* Bad: Local File Header */
924        PyErr_Format(ZipImportError,
925                     "bad local file header in %U",
926                     archive);
927        fclose(fp);
928        return NULL;
929    }
930    fseek(fp, file_offset + 26, 0);
931    l = 30 + PyMarshal_ReadShortFromFile(fp) +
932        PyMarshal_ReadShortFromFile(fp);        /* local header size */
933    file_offset += l;           /* Start of file data */
934
935    bytes_size = compress == 0 ? data_size : data_size + 1;
936    if (bytes_size == 0)
937        bytes_size++;
938    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
939
940    if (raw_data == NULL) {
941        fclose(fp);
942        return NULL;
943    }
944    buf = PyBytes_AsString(raw_data);
945
946    err = fseek(fp, file_offset, 0);
947    if (err == 0)
948        bytes_read = fread(buf, 1, data_size, fp);
949    fclose(fp);
950    if (err || bytes_read != data_size) {
951        PyErr_SetString(PyExc_IOError,
952                        "zipimport: can't read data");
953        Py_DECREF(raw_data);
954        return NULL;
955    }
956
957    if (compress != 0) {
958        buf[data_size] = 'Z';  /* saw this in zipfile.py */
959        data_size++;
960    }
961    buf[data_size] = '\0';
962
963    if (compress == 0) {  /* data is not compressed */
964        data = PyBytes_FromStringAndSize(buf, data_size);
965        Py_DECREF(raw_data);
966        return data;
967    }
968
969    /* Decompress with zlib */
970    decompress = get_decompress_func();
971    if (decompress == NULL) {
972        PyErr_SetString(ZipImportError,
973                        "can't decompress data; "
974                        "zlib not available");
975        goto error;
976    }
977    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
978    Py_DECREF(decompress);
979error:
980    Py_DECREF(raw_data);
981    return data;
982}
983
984/* Lenient date/time comparison function. The precision of the mtime
985   in the archive is lower than the mtime stored in a .pyc: we
986   must allow a difference of at most one second. */
987static int
988eq_mtime(time_t t1, time_t t2)
989{
990    time_t d = t1 - t2;
991    if (d < 0)
992        d = -d;
993    /* dostime only stores even seconds, so be lenient */
994    return d <= 1;
995}
996
997/* Given the contents of a .py[co] file in a buffer, unmarshal the data
998   and return the code object. Return None if it the magic word doesn't
999   match (we do this instead of raising an exception as we fall back
1000   to .py if available and we don't want to mask other errors).
1001   Returns a new reference. */
1002static PyObject *
1003unmarshal_code(char *pathname, PyObject *data, time_t mtime)
1004{
1005    PyObject *code;
1006    char *buf = PyBytes_AsString(data);
1007    Py_ssize_t size = PyBytes_Size(data);
1008
1009    if (size <= 9) {
1010        PyErr_SetString(ZipImportError,
1011                        "bad pyc data");
1012        return NULL;
1013    }
1014
1015    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1016        if (Py_VerboseFlag)
1017            PySys_WriteStderr("# %s has bad magic\n",
1018                              pathname);
1019        Py_INCREF(Py_None);
1020        return Py_None;  /* signal caller to try alternative */
1021    }
1022
1023    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1024                                mtime)) {
1025        if (Py_VerboseFlag)
1026            PySys_WriteStderr("# %s has bad mtime\n",
1027                              pathname);
1028        Py_INCREF(Py_None);
1029        return Py_None;  /* signal caller to try alternative */
1030    }
1031
1032    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
1033    if (code == NULL)
1034        return NULL;
1035    if (!PyCode_Check(code)) {
1036        Py_DECREF(code);
1037        PyErr_Format(PyExc_TypeError,
1038             "compiled module %s is not a code object",
1039             pathname);
1040        return NULL;
1041    }
1042    return code;
1043}
1044
1045/* Replace any occurances of "\r\n?" in the input string with "\n".
1046   This converts DOS and Mac line endings to Unix line endings.
1047   Also append a trailing "\n" to be compatible with
1048   PyParser_SimpleParseFile(). Returns a new reference. */
1049static PyObject *
1050normalize_line_endings(PyObject *source)
1051{
1052    char *buf, *q, *p = PyBytes_AsString(source);
1053    PyObject *fixed_source;
1054    int len = 0;
1055
1056    if (!p) {
1057        return PyBytes_FromStringAndSize("\n\0", 2);
1058    }
1059
1060    /* one char extra for trailing \n and one for terminating \0 */
1061    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1062    if (buf == NULL) {
1063        PyErr_SetString(PyExc_MemoryError,
1064                        "zipimport: no memory to allocate "
1065                        "source buffer");
1066        return NULL;
1067    }
1068    /* replace "\r\n?" by "\n" */
1069    for (q = buf; *p != '\0'; p++) {
1070        if (*p == '\r') {
1071            *q++ = '\n';
1072            if (*(p + 1) == '\n')
1073                p++;
1074        }
1075        else
1076            *q++ = *p;
1077        len++;
1078    }
1079    *q++ = '\n';  /* add trailing \n */
1080    *q = '\0';
1081    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1082    PyMem_Free(buf);
1083    return fixed_source;
1084}
1085
1086/* Given a string buffer containing Python source code, compile it
1087   return and return a code object as a new reference. */
1088static PyObject *
1089compile_source(char *pathname, PyObject *source)
1090{
1091    PyObject *code, *fixed_source;
1092
1093    fixed_source = normalize_line_endings(source);
1094    if (fixed_source == NULL)
1095        return NULL;
1096
1097    code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1098                            Py_file_input);
1099    Py_DECREF(fixed_source);
1100    return code;
1101}
1102
1103/* Convert the date/time values found in the Zip archive to a value
1104   that's compatible with the time stamp stored in .pyc files. */
1105static time_t
1106parse_dostime(int dostime, int dosdate)
1107{
1108    struct tm stm;
1109
1110    memset((void *) &stm, '\0', sizeof(stm));
1111
1112    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1113    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1114    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1115    stm.tm_mday  =   dosdate        & 0x1f;
1116    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1117    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1118    stm.tm_isdst =   -1; /* wday/yday is ignored */
1119
1120    return mktime(&stm);
1121}
1122
1123/* Given a path to a .pyc or .pyo file in the archive, return the
1124   modification time of the matching .py file, or 0 if no source
1125   is available. */
1126static time_t
1127get_mtime_of_source(ZipImporter *self, char *path)
1128{
1129    PyObject *toc_entry;
1130    time_t mtime = 0;
1131    Py_ssize_t lastchar = strlen(path) - 1;
1132    char savechar = path[lastchar];
1133    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1134    toc_entry = PyDict_GetItemString(self->files, path);
1135    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1136        PyTuple_Size(toc_entry) == 8) {
1137        /* fetch the time stamp of the .py file for comparison
1138           with an embedded pyc time stamp */
1139        int time, date;
1140        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1141        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1142        mtime = parse_dostime(time, date);
1143    }
1144    path[lastchar] = savechar;
1145    return mtime;
1146}
1147
1148/* Return the code object for the module named by 'fullname' from the
1149   Zip archive as a new reference. */
1150static PyObject *
1151get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1152                   time_t mtime, PyObject *toc_entry)
1153{
1154    PyObject *data, *code;
1155    PyObject *modpath;
1156
1157    data = get_data(self->archive, toc_entry);
1158    if (data == NULL)
1159        return NULL;
1160
1161    modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0));
1162    if (modpath == NULL) {
1163        Py_DECREF(data);
1164        return NULL;
1165    }
1166
1167    if (isbytecode)
1168        code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime);
1169    else
1170        code = compile_source(PyBytes_AS_STRING(modpath), data);
1171    Py_DECREF(modpath);
1172    Py_DECREF(data);
1173    return code;
1174}
1175
1176/* Get the code object associated with the module specified by
1177   'fullname'. */
1178static PyObject *
1179get_module_code(ZipImporter *self, char *fullname,
1180                int *p_ispackage, PyObject **p_modpath)
1181{
1182    PyObject *toc_entry;
1183    char *subname, path[MAXPATHLEN + 1];
1184    int len;
1185    struct st_zip_searchorder *zso;
1186
1187    subname = get_subname(fullname);
1188
1189    len = make_filename(self->prefix, subname, path, sizeof(path));
1190    if (len < 0)
1191        return NULL;
1192
1193    for (zso = zip_searchorder; *zso->suffix; zso++) {
1194        PyObject *code = NULL;
1195
1196        strcpy(path + len, zso->suffix);
1197        if (Py_VerboseFlag > 1)
1198            PySys_FormatStderr("# trying %U%c%s\n",
1199                               self->archive, (int)SEP, path);
1200        toc_entry = PyDict_GetItemString(self->files, path);
1201        if (toc_entry != NULL) {
1202            time_t mtime = 0;
1203            int ispackage = zso->type & IS_PACKAGE;
1204            int isbytecode = zso->type & IS_BYTECODE;
1205
1206            if (isbytecode)
1207                mtime = get_mtime_of_source(self, path);
1208            if (p_ispackage != NULL)
1209                *p_ispackage = ispackage;
1210            code = get_code_from_data(self, ispackage,
1211                                      isbytecode, mtime,
1212                                      toc_entry);
1213            if (code == Py_None) {
1214                /* bad magic number or non-matching mtime
1215                   in byte code, try next */
1216                Py_DECREF(code);
1217                continue;
1218            }
1219            if (code != NULL && p_modpath != NULL) {
1220                *p_modpath = PyTuple_GetItem(toc_entry, 0);
1221                Py_INCREF(*p_modpath);
1222            }
1223            return code;
1224        }
1225    }
1226    PyErr_Format(ZipImportError, "can't find module '%s'", fullname);
1227    return NULL;
1228}
1229
1230
1231/* Module init */
1232
1233PyDoc_STRVAR(zipimport_doc,
1234"zipimport provides support for importing Python modules from Zip archives.\n\
1235\n\
1236This module exports three objects:\n\
1237- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1238- ZipImportError: exception raised by zipimporter objects. It's a\n\
1239  subclass of ImportError, so it can be caught as ImportError, too.\n\
1240- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1241  info dicts, as used in zipimporter._files.\n\
1242\n\
1243It is usually not needed to use the zipimport module explicitly; it is\n\
1244used by the builtin import mechanism for sys.path items that are paths\n\
1245to Zip archives.");
1246
1247static struct PyModuleDef zipimportmodule = {
1248    PyModuleDef_HEAD_INIT,
1249    "zipimport",
1250    zipimport_doc,
1251    -1,
1252    NULL,
1253    NULL,
1254    NULL,
1255    NULL,
1256    NULL
1257};
1258
1259PyMODINIT_FUNC
1260PyInit_zipimport(void)
1261{
1262    PyObject *mod;
1263
1264    if (PyType_Ready(&ZipImporter_Type) < 0)
1265        return NULL;
1266
1267    /* Correct directory separator */
1268    zip_searchorder[0].suffix[0] = SEP;
1269    zip_searchorder[1].suffix[0] = SEP;
1270    zip_searchorder[2].suffix[0] = SEP;
1271    if (Py_OptimizeFlag) {
1272        /* Reverse *.pyc and *.pyo */
1273        struct st_zip_searchorder tmp;
1274        tmp = zip_searchorder[0];
1275        zip_searchorder[0] = zip_searchorder[1];
1276        zip_searchorder[1] = tmp;
1277        tmp = zip_searchorder[3];
1278        zip_searchorder[3] = zip_searchorder[4];
1279        zip_searchorder[4] = tmp;
1280    }
1281
1282    mod = PyModule_Create(&zipimportmodule);
1283    if (mod == NULL)
1284        return NULL;
1285
1286    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1287                                        PyExc_ImportError, NULL);
1288    if (ZipImportError == NULL)
1289        return NULL;
1290
1291    Py_INCREF(ZipImportError);
1292    if (PyModule_AddObject(mod, "ZipImportError",
1293                           ZipImportError) < 0)
1294        return NULL;
1295
1296    Py_INCREF(&ZipImporter_Type);
1297    if (PyModule_AddObject(mod, "zipimporter",
1298                           (PyObject *)&ZipImporter_Type) < 0)
1299        return NULL;
1300
1301    zip_directory_cache = PyDict_New();
1302    if (zip_directory_cache == NULL)
1303        return NULL;
1304    Py_INCREF(zip_directory_cache);
1305    if (PyModule_AddObject(mod, "_zip_directory_cache",
1306                           zip_directory_cache) < 0)
1307        return NULL;
1308    return mod;
1309}
1310