zipimport.c revision 3f528f0c1b79f32b1c00348f53f4b5b2007f16c5
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive,
39                           decoded from the filesystem encoding */
40    PyObject *prefix;   /* file prefix: "a/sub/directory/",
41                           encoded to the filesystem encoding */
42    PyObject *files;    /* dict with file info {path: toc_entry} */
43};
44
45static PyObject *ZipImportError;
46/* read_directory() cache */
47static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
50static PyObject *read_directory(PyObject *archive);
51static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
52static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
53                                 int *p_ispackage, PyObject **p_modpath);
54
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60   Split the "subdirectory" from the Zip archive path, lookup a matching
61   entry in sys.path_importer_cache, fetch the file directory from there
62   if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
66    PyObject *pathobj, *files;
67    Py_UCS4 *path, *p, *prefix, buf[MAXPATHLEN+2];
68    Py_ssize_t len;
69
70    if (!_PyArg_NoKeywords("zipimporter()", kwds))
71        return -1;
72
73    if (!PyArg_ParseTuple(args, "O&:zipimporter",
74                          PyUnicode_FSDecoder, &pathobj))
75        return -1;
76
77    if (PyUnicode_READY(pathobj) == -1)
78        return -1;
79
80    /* copy path to buf */
81    len = PyUnicode_GET_LENGTH(pathobj);
82    if (len == 0) {
83        PyErr_SetString(ZipImportError, "archive path is empty");
84        goto error;
85    }
86    if (len >= MAXPATHLEN) {
87        PyErr_SetString(ZipImportError,
88                        "archive path too long");
89        goto error;
90    }
91    if (!PyUnicode_AsUCS4(pathobj, buf, Py_ARRAY_LENGTH(buf), 1))
92        goto error;
93
94#ifdef ALTSEP
95    for (p = buf; *p; p++) {
96        if (*p == ALTSEP)
97            *p = SEP;
98    }
99#endif
100
101    path = NULL;
102    prefix = NULL;
103    for (;;) {
104        struct stat statbuf;
105        int rv;
106
107        if (pathobj == NULL) {
108            pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
109                                                buf, len);
110            if (pathobj == NULL)
111                goto error;
112        }
113        rv = _Py_stat(pathobj, &statbuf);
114        if (rv == 0) {
115            /* it exists */
116            if (S_ISREG(statbuf.st_mode))
117                /* it's a file */
118                path = buf;
119            break;
120        }
121        else if (PyErr_Occurred())
122            goto error;
123        /* back up one path element */
124        p = Py_UCS4_strrchr(buf, SEP);
125        if (prefix != NULL)
126            *prefix = SEP;
127        if (p == NULL)
128            break;
129        *p = '\0';
130        len = p - buf;
131        prefix = p;
132        Py_CLEAR(pathobj);
133    }
134    if (path == NULL) {
135        PyErr_SetString(ZipImportError, "not a Zip file");
136        goto error;
137    }
138
139    files = PyDict_GetItem(zip_directory_cache, pathobj);
140    if (files == NULL) {
141        files = read_directory(pathobj);
142        if (files == NULL)
143            goto error;
144        if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
145            goto error;
146    }
147    else
148        Py_INCREF(files);
149    self->files = files;
150
151    self->archive = pathobj;
152    pathobj = NULL;
153
154    if (prefix != NULL) {
155        prefix++;
156        len = Py_UCS4_strlen(prefix);
157        if (prefix[len-1] != SEP) {
158            /* add trailing SEP */
159            prefix[len] = SEP;
160            prefix[len + 1] = '\0';
161            len++;
162        }
163    }
164    else
165        len = 0;
166    self->prefix = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
167                                               prefix, len);
168    if (self->prefix == NULL)
169        goto error;
170
171    return 0;
172
173error:
174    Py_XDECREF(pathobj);
175    return -1;
176}
177
178/* GC support. */
179static int
180zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
181{
182    ZipImporter *self = (ZipImporter *)obj;
183    Py_VISIT(self->files);
184    return 0;
185}
186
187static void
188zipimporter_dealloc(ZipImporter *self)
189{
190    PyObject_GC_UnTrack(self);
191    Py_XDECREF(self->archive);
192    Py_XDECREF(self->prefix);
193    Py_XDECREF(self->files);
194    Py_TYPE(self)->tp_free((PyObject *)self);
195}
196
197static PyObject *
198zipimporter_repr(ZipImporter *self)
199{
200    if (self->archive == NULL)
201        return PyUnicode_FromString("<zipimporter object \"???\">");
202    else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
203        return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
204                                    self->archive, SEP, self->prefix);
205    else
206        return PyUnicode_FromFormat("<zipimporter object \"%U\">",
207                                    self->archive);
208}
209
210/* return fullname.split(".")[-1] */
211static PyObject *
212get_subname(PyObject *fullname)
213{
214    Py_ssize_t len;
215    Py_UCS4 *subname, *fullname_ucs4;
216    fullname_ucs4 = PyUnicode_AsUCS4Copy(fullname);
217    if (!fullname_ucs4)
218        return NULL;
219    subname = Py_UCS4_strrchr(fullname_ucs4, '.');
220    if (subname == NULL) {
221        PyMem_Free(fullname_ucs4);
222        Py_INCREF(fullname);
223        return fullname;
224    } else {
225        PyObject *result;
226        subname++;
227        len = PyUnicode_GET_LENGTH(fullname);
228        len -= subname - fullname_ucs4;
229        result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
230                                           subname, len);
231        PyMem_Free(fullname_ucs4);
232        return result;
233    }
234}
235
236/* Given a (sub)modulename, write the potential file path in the
237   archive (without extension) to the path buffer. Return the
238   length of the resulting string.
239
240   return self.prefix + name.replace('.', os.sep) */
241static PyObject*
242make_filename(PyObject *prefix, PyObject *name)
243{
244    PyObject *pathobj;
245    Py_UCS4 *p, *buf;
246    Py_ssize_t len;
247
248    len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
249    p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
250    if (buf == NULL) {
251        PyErr_NoMemory();
252        return NULL;
253    }
254
255    if (!PyUnicode_AsUCS4(prefix, p, len, 0))
256        return NULL;
257    p += PyUnicode_GET_LENGTH(prefix);
258    len -= PyUnicode_GET_LENGTH(prefix);
259    if (!PyUnicode_AsUCS4(name, p, len, 1))
260        return NULL;
261    for (; *p; p++) {
262        if (*p == '.')
263            *p = SEP;
264    }
265    pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
266                                        buf, p-buf);
267    PyMem_Free(buf);
268    return pathobj;
269}
270
271enum zi_module_info {
272    MI_ERROR,
273    MI_NOT_FOUND,
274    MI_MODULE,
275    MI_PACKAGE
276};
277
278/* Return some information about a module. */
279static enum zi_module_info
280get_module_info(ZipImporter *self, PyObject *fullname)
281{
282    PyObject *subname;
283    PyObject *path, *fullpath, *item;
284    struct st_zip_searchorder *zso;
285
286    subname = get_subname(fullname);
287    if (subname == NULL)
288        return MI_ERROR;
289
290    path = make_filename(self->prefix, subname);
291    Py_DECREF(subname);
292    if (path == NULL)
293        return MI_ERROR;
294
295    for (zso = zip_searchorder; *zso->suffix; zso++) {
296        fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
297        if (fullpath == NULL) {
298            Py_DECREF(path);
299            return MI_ERROR;
300        }
301        item = PyDict_GetItem(self->files, fullpath);
302        Py_DECREF(fullpath);
303        if (item != NULL) {
304            Py_DECREF(path);
305            if (zso->type & IS_PACKAGE)
306                return MI_PACKAGE;
307            else
308                return MI_MODULE;
309        }
310    }
311    Py_DECREF(path);
312    return MI_NOT_FOUND;
313}
314
315/* Check whether we can satisfy the import of the module named by
316   'fullname'. Return self if we can, None if we can't. */
317static PyObject *
318zipimporter_find_module(PyObject *obj, PyObject *args)
319{
320    ZipImporter *self = (ZipImporter *)obj;
321    PyObject *path = NULL;
322    PyObject *fullname;
323    enum zi_module_info mi;
324
325    if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module",
326                          &fullname, &path))
327        return NULL;
328
329    mi = get_module_info(self, fullname);
330    if (mi == MI_ERROR)
331        return NULL;
332    if (mi == MI_NOT_FOUND) {
333        Py_INCREF(Py_None);
334        return Py_None;
335    }
336    Py_INCREF(self);
337    return (PyObject *)self;
338}
339
340/* Load and return the module named by 'fullname'. */
341static PyObject *
342zipimporter_load_module(PyObject *obj, PyObject *args)
343{
344    ZipImporter *self = (ZipImporter *)obj;
345    PyObject *code = NULL, *mod, *dict;
346    PyObject *fullname;
347    PyObject *modpath = NULL;
348    int ispackage;
349
350    if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
351                          &fullname))
352        return NULL;
353    if (PyUnicode_READY(fullname) == -1)
354        return NULL;
355
356    code = get_module_code(self, fullname, &ispackage, &modpath);
357    if (code == NULL)
358        goto error;
359
360    mod = PyImport_AddModuleObject(fullname);
361    if (mod == NULL)
362        goto error;
363    dict = PyModule_GetDict(mod);
364
365    /* mod.__loader__ = self */
366    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
367        goto error;
368
369    if (ispackage) {
370        /* add __path__ to the module *before* the code gets
371           executed */
372        PyObject *pkgpath, *fullpath;
373        PyObject *subname = get_subname(fullname);
374        int err;
375
376        fullpath = PyUnicode_FromFormat("%U%c%U%U",
377                                self->archive, SEP,
378                                self->prefix, subname);
379        Py_DECREF(subname);
380        if (fullpath == NULL)
381            goto error;
382
383        pkgpath = Py_BuildValue("[N]", fullpath);
384        if (pkgpath == NULL)
385            goto error;
386        err = PyDict_SetItemString(dict, "__path__", pkgpath);
387        Py_DECREF(pkgpath);
388        if (err != 0)
389            goto error;
390    }
391    mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
392    Py_CLEAR(code);
393    if (mod == NULL)
394        goto error;
395
396    if (Py_VerboseFlag)
397        PySys_FormatStderr("import %U # loaded from Zip %U\n",
398                           fullname, modpath);
399    Py_DECREF(modpath);
400    return mod;
401error:
402    Py_XDECREF(code);
403    Py_XDECREF(modpath);
404    return NULL;
405}
406
407/* Return a string matching __file__ for the named module */
408static PyObject *
409zipimporter_get_filename(PyObject *obj, PyObject *args)
410{
411    ZipImporter *self = (ZipImporter *)obj;
412    PyObject *fullname, *code, *modpath;
413    int ispackage;
414
415    if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
416                          &fullname))
417        return NULL;
418
419    /* Deciding the filename requires working out where the code
420       would come from if the module was actually loaded */
421    code = get_module_code(self, fullname, &ispackage, &modpath);
422    if (code == NULL)
423        return NULL;
424    Py_DECREF(code); /* Only need the path info */
425
426    return modpath;
427}
428
429/* Return a bool signifying whether the module is a package or not. */
430static PyObject *
431zipimporter_is_package(PyObject *obj, PyObject *args)
432{
433    ZipImporter *self = (ZipImporter *)obj;
434    PyObject *fullname;
435    enum zi_module_info mi;
436
437    if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
438                          &fullname))
439        return NULL;
440
441    mi = get_module_info(self, fullname);
442    if (mi == MI_ERROR)
443        return NULL;
444    if (mi == MI_NOT_FOUND) {
445        PyErr_Format(ZipImportError, "can't find module %R", fullname);
446        return NULL;
447    }
448    return PyBool_FromLong(mi == MI_PACKAGE);
449}
450
451
452static PyObject *
453zipimporter_get_data(PyObject *obj, PyObject *args)
454{
455    ZipImporter *self = (ZipImporter *)obj;
456    PyObject *pathobj, *key;
457    const Py_UCS4 *path;
458#ifdef ALTSEP
459    Py_UCS4 *p;
460#endif
461    PyObject *toc_entry;
462    Py_ssize_t path_len, len;
463    Py_UCS4 buf[MAXPATHLEN + 1], archive[MAXPATHLEN + 1];
464
465    if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
466        return NULL;
467
468    if (PyUnicode_READY(pathobj) == -1)
469        return NULL;
470
471    path_len = PyUnicode_GET_LENGTH(pathobj);
472    if (path_len >= MAXPATHLEN) {
473        PyErr_SetString(ZipImportError, "path too long");
474        return NULL;
475    }
476    if (!PyUnicode_AsUCS4(pathobj, buf, Py_ARRAY_LENGTH(buf), 1))
477        return NULL;
478    path = buf;
479#ifdef ALTSEP
480    for (p = buf; *p; p++) {
481        if (*p == ALTSEP)
482            *p = SEP;
483    }
484#endif
485    len = PyUnicode_GET_LENGTH(self->archive);
486    if ((size_t)len < Py_UCS4_strlen(path)) {
487        if (!PyUnicode_AsUCS4(self->archive, archive, Py_ARRAY_LENGTH(archive), 1))
488            return NULL;
489        if (Py_UCS4_strncmp(path, archive, len) == 0 &&
490            path[len] == SEP) {
491            path += len + 1;
492            path_len -= len + 1;
493        }
494    }
495
496    key = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
497                                    path, path_len);
498    if (key == NULL)
499        return NULL;
500    toc_entry = PyDict_GetItem(self->files, key);
501    if (toc_entry == NULL) {
502        PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
503        Py_DECREF(key);
504        return NULL;
505    }
506    Py_DECREF(key);
507    return get_data(self->archive, toc_entry);
508}
509
510static PyObject *
511zipimporter_get_code(PyObject *obj, PyObject *args)
512{
513    ZipImporter *self = (ZipImporter *)obj;
514    PyObject *fullname;
515
516    if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
517        return NULL;
518
519    return get_module_code(self, fullname, NULL, NULL);
520}
521
522static PyObject *
523zipimporter_get_source(PyObject *obj, PyObject *args)
524{
525    ZipImporter *self = (ZipImporter *)obj;
526    PyObject *toc_entry;
527    PyObject *fullname, *subname, *path, *fullpath;
528    enum zi_module_info mi;
529
530    if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
531        return NULL;
532
533    mi = get_module_info(self, fullname);
534    if (mi == MI_ERROR)
535        return NULL;
536    if (mi == MI_NOT_FOUND) {
537        PyErr_Format(ZipImportError, "can't find module %R", fullname);
538        return NULL;
539    }
540
541    subname = get_subname(fullname);
542    if (subname == NULL)
543        return NULL;
544
545    path = make_filename(self->prefix, subname);
546    Py_DECREF(subname);
547    if (path == NULL)
548        return NULL;
549
550    if (mi == MI_PACKAGE)
551        fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
552    else
553        fullpath = PyUnicode_FromFormat("%U.py", path);
554    Py_DECREF(path);
555    if (fullpath == NULL)
556        return NULL;
557
558    toc_entry = PyDict_GetItem(self->files, fullpath);
559    Py_DECREF(fullpath);
560    if (toc_entry != NULL) {
561        PyObject *res, *bytes;
562        bytes = get_data(self->archive, toc_entry);
563        if (bytes == NULL)
564            return NULL;
565        res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
566                                          PyBytes_GET_SIZE(bytes));
567        Py_DECREF(bytes);
568        return res;
569    }
570
571    /* we have the module, but no source */
572    Py_INCREF(Py_None);
573    return Py_None;
574}
575
576PyDoc_STRVAR(doc_find_module,
577"find_module(fullname, path=None) -> self or None.\n\
578\n\
579Search for a module specified by 'fullname'. 'fullname' must be the\n\
580fully qualified (dotted) module name. It returns the zipimporter\n\
581instance itself if the module was found, or None if it wasn't.\n\
582The optional 'path' argument is ignored -- it's there for compatibility\n\
583with the importer protocol.");
584
585PyDoc_STRVAR(doc_load_module,
586"load_module(fullname) -> module.\n\
587\n\
588Load the module specified by 'fullname'. 'fullname' must be the\n\
589fully qualified (dotted) module name. It returns the imported\n\
590module, or raises ZipImportError if it wasn't found.");
591
592PyDoc_STRVAR(doc_get_data,
593"get_data(pathname) -> string with file data.\n\
594\n\
595Return the data associated with 'pathname'. Raise IOError if\n\
596the file wasn't found.");
597
598PyDoc_STRVAR(doc_is_package,
599"is_package(fullname) -> bool.\n\
600\n\
601Return True if the module specified by fullname is a package.\n\
602Raise ZipImportError if the module couldn't be found.");
603
604PyDoc_STRVAR(doc_get_code,
605"get_code(fullname) -> code object.\n\
606\n\
607Return the code object for the specified module. Raise ZipImportError\n\
608if the module couldn't be found.");
609
610PyDoc_STRVAR(doc_get_source,
611"get_source(fullname) -> source string.\n\
612\n\
613Return the source code for the specified module. Raise ZipImportError\n\
614if the module couldn't be found, return None if the archive does\n\
615contain the module, but has no source for it.");
616
617
618PyDoc_STRVAR(doc_get_filename,
619"get_filename(fullname) -> filename string.\n\
620\n\
621Return the filename for the specified module.");
622
623static PyMethodDef zipimporter_methods[] = {
624    {"find_module", zipimporter_find_module, METH_VARARGS,
625     doc_find_module},
626    {"load_module", zipimporter_load_module, METH_VARARGS,
627     doc_load_module},
628    {"get_data", zipimporter_get_data, METH_VARARGS,
629     doc_get_data},
630    {"get_code", zipimporter_get_code, METH_VARARGS,
631     doc_get_code},
632    {"get_source", zipimporter_get_source, METH_VARARGS,
633     doc_get_source},
634    {"get_filename", zipimporter_get_filename, METH_VARARGS,
635     doc_get_filename},
636    {"is_package", zipimporter_is_package, METH_VARARGS,
637     doc_is_package},
638    {NULL,              NULL}   /* sentinel */
639};
640
641static PyMemberDef zipimporter_members[] = {
642    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
643    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
644    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
645    {NULL}
646};
647
648PyDoc_STRVAR(zipimporter_doc,
649"zipimporter(archivepath) -> zipimporter object\n\
650\n\
651Create a new zipimporter instance. 'archivepath' must be a path to\n\
652a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
653'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
654valid directory inside the archive.\n\
655\n\
656'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
657archive.\n\
658\n\
659The 'archive' attribute of zipimporter objects contains the name of the\n\
660zipfile targeted.");
661
662#define DEFERRED_ADDRESS(ADDR) 0
663
664static PyTypeObject ZipImporter_Type = {
665    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
666    "zipimport.zipimporter",
667    sizeof(ZipImporter),
668    0,                                          /* tp_itemsize */
669    (destructor)zipimporter_dealloc,            /* tp_dealloc */
670    0,                                          /* tp_print */
671    0,                                          /* tp_getattr */
672    0,                                          /* tp_setattr */
673    0,                                          /* tp_reserved */
674    (reprfunc)zipimporter_repr,                 /* tp_repr */
675    0,                                          /* tp_as_number */
676    0,                                          /* tp_as_sequence */
677    0,                                          /* tp_as_mapping */
678    0,                                          /* tp_hash */
679    0,                                          /* tp_call */
680    0,                                          /* tp_str */
681    PyObject_GenericGetAttr,                    /* tp_getattro */
682    0,                                          /* tp_setattro */
683    0,                                          /* tp_as_buffer */
684    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
685        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
686    zipimporter_doc,                            /* tp_doc */
687    zipimporter_traverse,                       /* tp_traverse */
688    0,                                          /* tp_clear */
689    0,                                          /* tp_richcompare */
690    0,                                          /* tp_weaklistoffset */
691    0,                                          /* tp_iter */
692    0,                                          /* tp_iternext */
693    zipimporter_methods,                        /* tp_methods */
694    zipimporter_members,                        /* tp_members */
695    0,                                          /* tp_getset */
696    0,                                          /* tp_base */
697    0,                                          /* tp_dict */
698    0,                                          /* tp_descr_get */
699    0,                                          /* tp_descr_set */
700    0,                                          /* tp_dictoffset */
701    (initproc)zipimporter_init,                 /* tp_init */
702    PyType_GenericAlloc,                        /* tp_alloc */
703    PyType_GenericNew,                          /* tp_new */
704    PyObject_GC_Del,                            /* tp_free */
705};
706
707
708/* implementation */
709
710/* Given a buffer, return the long that is represented by the first
711   4 bytes, encoded as little endian. This partially reimplements
712   marshal.c:r_long() */
713static long
714get_long(unsigned char *buf) {
715    long x;
716    x =  buf[0];
717    x |= (long)buf[1] <<  8;
718    x |= (long)buf[2] << 16;
719    x |= (long)buf[3] << 24;
720#if SIZEOF_LONG > 4
721    /* Sign extension for 64-bit machines */
722    x |= -(x & 0x80000000L);
723#endif
724    return x;
725}
726
727/*
728   read_directory(archive) -> files dict (new reference)
729
730   Given a path to a Zip archive, build a dict, mapping file names
731   (local to the archive, using SEP as a separator) to toc entries.
732
733   A toc_entry is a tuple:
734
735   (__file__,      # value to use for __file__, available for all files,
736                   # encoded to the filesystem encoding
737    compress,      # compression kind; 0 for uncompressed
738    data_size,     # size of compressed data on disk
739    file_size,     # size of decompressed data
740    file_offset,   # offset of file header from start of archive
741    time,          # mod time of file (in dos format)
742    date,          # mod data of file (in dos format)
743    crc,           # crc checksum of the data
744   )
745
746   Directories can be recognized by the trailing SEP in the name,
747   data_size and file_offset are 0.
748*/
749static PyObject *
750read_directory(PyObject *archive)
751{
752    PyObject *files = NULL;
753    FILE *fp;
754    unsigned short flags;
755    long compress, crc, data_size, file_size, file_offset, date, time;
756    long header_offset, name_size, header_size, header_position;
757    long l, count;
758    Py_ssize_t i;
759    size_t length;
760    Py_UCS4 path[MAXPATHLEN + 5];
761    char name[MAXPATHLEN + 5];
762    PyObject *nameobj = NULL;
763    char *p, endof_central_dir[22];
764    long arc_offset; /* offset from beginning of file to start of zip-archive */
765    PyObject *pathobj;
766    const char *charset;
767    int bootstrap;
768
769    if (PyUnicode_GET_LENGTH(archive) > MAXPATHLEN) {
770        PyErr_SetString(PyExc_OverflowError,
771                        "Zip path name is too long");
772        return NULL;
773    }
774    if (!PyUnicode_AsUCS4(archive, path, Py_ARRAY_LENGTH(path), 1))
775        return NULL;
776
777    fp = _Py_fopen(archive, "rb");
778    if (fp == NULL) {
779        PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
780        return NULL;
781    }
782    fseek(fp, -22, SEEK_END);
783    header_position = ftell(fp);
784    if (fread(endof_central_dir, 1, 22, fp) != 22) {
785        fclose(fp);
786        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
787        return NULL;
788    }
789    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
790        /* Bad: End of Central Dir signature */
791        fclose(fp);
792        PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
793        return NULL;
794    }
795
796    header_size = get_long((unsigned char *)endof_central_dir + 12);
797    header_offset = get_long((unsigned char *)endof_central_dir + 16);
798    arc_offset = header_position - header_offset - header_size;
799    header_offset += arc_offset;
800
801    files = PyDict_New();
802    if (files == NULL)
803        goto error;
804
805    length = Py_UCS4_strlen(path);
806    path[length] = SEP;
807
808    /* Start of Central Directory */
809    count = 0;
810    for (;;) {
811        PyObject *t;
812        int err;
813
814        fseek(fp, header_offset, 0);  /* Start of file header */
815        l = PyMarshal_ReadLongFromFile(fp);
816        if (l != 0x02014B50)
817            break;              /* Bad: Central Dir File Header */
818        fseek(fp, header_offset + 8, 0);
819        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
820        compress = PyMarshal_ReadShortFromFile(fp);
821        time = PyMarshal_ReadShortFromFile(fp);
822        date = PyMarshal_ReadShortFromFile(fp);
823        crc = PyMarshal_ReadLongFromFile(fp);
824        data_size = PyMarshal_ReadLongFromFile(fp);
825        file_size = PyMarshal_ReadLongFromFile(fp);
826        name_size = PyMarshal_ReadShortFromFile(fp);
827        header_size = 46 + name_size +
828           PyMarshal_ReadShortFromFile(fp) +
829           PyMarshal_ReadShortFromFile(fp);
830        fseek(fp, header_offset + 42, 0);
831        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
832        if (name_size > MAXPATHLEN)
833            name_size = MAXPATHLEN;
834
835        p = name;
836        for (i = 0; i < (Py_ssize_t)name_size; i++) {
837            *p = (char)getc(fp);
838            if (*p == '/')
839                *p = SEP;
840            p++;
841        }
842        *p = 0;         /* Add terminating null byte */
843        header_offset += header_size;
844
845        bootstrap = 0;
846        if (flags & 0x0800)
847            charset = "utf-8";
848        else if (!PyThreadState_GET()->interp->codecs_initialized) {
849            /* During bootstrap, we may need to load the encodings
850               package from a ZIP file. But the cp437 encoding is implemented
851               in Python in the encodings package.
852
853               Break out of this dependency by assuming that the path to
854               the encodings module is ASCII-only. */
855            charset = "ascii";
856            bootstrap = 1;
857        }
858        else
859            charset = "cp437";
860        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
861        if (PyUnicode_READY(nameobj) == -1)
862            goto error;
863        if (nameobj == NULL) {
864            if (bootstrap)
865                PyErr_Format(PyExc_NotImplementedError,
866                    "bootstrap issue: python%i%i.zip contains non-ASCII "
867                    "filenames without the unicode flag",
868                    PY_MAJOR_VERSION, PY_MINOR_VERSION);
869            goto error;
870        }
871        for (i = 0; (i < (MAXPATHLEN - (Py_ssize_t)length - 1)) &&
872                 (i < PyUnicode_GET_LENGTH(nameobj)); i++)
873            path[length + 1 + i] = PyUnicode_READ_CHAR(nameobj, i);
874        path[length + 1 + i] = 0;
875        pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
876                                            path, Py_UCS4_strlen(path));
877        if (pathobj == NULL)
878            goto error;
879        t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
880                          file_size, file_offset, time, date, crc);
881        if (t == NULL)
882            goto error;
883        err = PyDict_SetItem(files, nameobj, t);
884        Py_CLEAR(nameobj);
885        Py_DECREF(t);
886        if (err != 0)
887            goto error;
888        count++;
889    }
890    fclose(fp);
891    if (Py_VerboseFlag)
892        PySys_FormatStderr("# zipimport: found %ld names in %R\n",
893                           count, archive);
894    return files;
895error:
896    fclose(fp);
897    Py_XDECREF(files);
898    Py_XDECREF(nameobj);
899    return NULL;
900}
901
902/* Return the zlib.decompress function object, or NULL if zlib couldn't
903   be imported. The function is cached when found, so subsequent calls
904   don't import zlib again. */
905static PyObject *
906get_decompress_func(void)
907{
908    static int importing_zlib = 0;
909    PyObject *zlib;
910    PyObject *decompress;
911    _Py_identifier(decompress);
912
913    if (importing_zlib != 0)
914        /* Someone has a zlib.py[co] in their Zip file;
915           let's avoid a stack overflow. */
916        return NULL;
917    importing_zlib = 1;
918    zlib = PyImport_ImportModuleNoBlock("zlib");
919    importing_zlib = 0;
920    if (zlib != NULL) {
921        decompress = _PyObject_GetAttrId(zlib,
922                                         &PyId_decompress);
923        Py_DECREF(zlib);
924    }
925    else {
926        PyErr_Clear();
927        decompress = NULL;
928    }
929    if (Py_VerboseFlag)
930        PySys_WriteStderr("# zipimport: zlib %s\n",
931            zlib != NULL ? "available": "UNAVAILABLE");
932    return decompress;
933}
934
935/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
936   data as a new reference. */
937static PyObject *
938get_data(PyObject *archive, PyObject *toc_entry)
939{
940    PyObject *raw_data, *data = NULL, *decompress;
941    char *buf;
942    FILE *fp;
943    int err;
944    Py_ssize_t bytes_read = 0;
945    long l;
946    PyObject *datapath;
947    long compress, data_size, file_size, file_offset, bytes_size;
948    long time, date, crc;
949
950    if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
951                          &data_size, &file_size, &file_offset, &time,
952                          &date, &crc)) {
953        return NULL;
954    }
955
956    fp = _Py_fopen(archive, "rb");
957    if (!fp) {
958        PyErr_Format(PyExc_IOError,
959           "zipimport: can not open file %U", archive);
960        return NULL;
961    }
962
963    /* Check to make sure the local file header is correct */
964    fseek(fp, file_offset, 0);
965    l = PyMarshal_ReadLongFromFile(fp);
966    if (l != 0x04034B50) {
967        /* Bad: Local File Header */
968        PyErr_Format(ZipImportError,
969                     "bad local file header in %U",
970                     archive);
971        fclose(fp);
972        return NULL;
973    }
974    fseek(fp, file_offset + 26, 0);
975    l = 30 + PyMarshal_ReadShortFromFile(fp) +
976        PyMarshal_ReadShortFromFile(fp);        /* local header size */
977    file_offset += l;           /* Start of file data */
978
979    bytes_size = compress == 0 ? data_size : data_size + 1;
980    if (bytes_size == 0)
981        bytes_size++;
982    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
983
984    if (raw_data == NULL) {
985        fclose(fp);
986        return NULL;
987    }
988    buf = PyBytes_AsString(raw_data);
989
990    err = fseek(fp, file_offset, 0);
991    if (err == 0)
992        bytes_read = fread(buf, 1, data_size, fp);
993    fclose(fp);
994    if (err || bytes_read != data_size) {
995        PyErr_SetString(PyExc_IOError,
996                        "zipimport: can't read data");
997        Py_DECREF(raw_data);
998        return NULL;
999    }
1000
1001    if (compress != 0) {
1002        buf[data_size] = 'Z';  /* saw this in zipfile.py */
1003        data_size++;
1004    }
1005    buf[data_size] = '\0';
1006
1007    if (compress == 0) {  /* data is not compressed */
1008        data = PyBytes_FromStringAndSize(buf, data_size);
1009        Py_DECREF(raw_data);
1010        return data;
1011    }
1012
1013    /* Decompress with zlib */
1014    decompress = get_decompress_func();
1015    if (decompress == NULL) {
1016        PyErr_SetString(ZipImportError,
1017                        "can't decompress data; "
1018                        "zlib not available");
1019        goto error;
1020    }
1021    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1022    Py_DECREF(decompress);
1023error:
1024    Py_DECREF(raw_data);
1025    return data;
1026}
1027
1028/* Lenient date/time comparison function. The precision of the mtime
1029   in the archive is lower than the mtime stored in a .pyc: we
1030   must allow a difference of at most one second. */
1031static int
1032eq_mtime(time_t t1, time_t t2)
1033{
1034    time_t d = t1 - t2;
1035    if (d < 0)
1036        d = -d;
1037    /* dostime only stores even seconds, so be lenient */
1038    return d <= 1;
1039}
1040
1041/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1042   and return the code object. Return None if it the magic word doesn't
1043   match (we do this instead of raising an exception as we fall back
1044   to .py if available and we don't want to mask other errors).
1045   Returns a new reference. */
1046static PyObject *
1047unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
1048{
1049    PyObject *code;
1050    char *buf = PyBytes_AsString(data);
1051    Py_ssize_t size = PyBytes_Size(data);
1052
1053    if (size <= 9) {
1054        PyErr_SetString(ZipImportError,
1055                        "bad pyc data");
1056        return NULL;
1057    }
1058
1059    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1060        if (Py_VerboseFlag)
1061            PySys_FormatStderr("# %R has bad magic\n",
1062                               pathname);
1063        Py_INCREF(Py_None);
1064        return Py_None;  /* signal caller to try alternative */
1065    }
1066
1067    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1068                                mtime)) {
1069        if (Py_VerboseFlag)
1070            PySys_FormatStderr("# %R has bad mtime\n",
1071                               pathname);
1072        Py_INCREF(Py_None);
1073        return Py_None;  /* signal caller to try alternative */
1074    }
1075
1076    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
1077    if (code == NULL)
1078        return NULL;
1079    if (!PyCode_Check(code)) {
1080        Py_DECREF(code);
1081        PyErr_Format(PyExc_TypeError,
1082             "compiled module %R is not a code object",
1083             pathname);
1084        return NULL;
1085    }
1086    return code;
1087}
1088
1089/* Replace any occurances of "\r\n?" in the input string with "\n".
1090   This converts DOS and Mac line endings to Unix line endings.
1091   Also append a trailing "\n" to be compatible with
1092   PyParser_SimpleParseFile(). Returns a new reference. */
1093static PyObject *
1094normalize_line_endings(PyObject *source)
1095{
1096    char *buf, *q, *p;
1097    PyObject *fixed_source;
1098    int len = 0;
1099
1100    p = PyBytes_AsString(source);
1101    if (p == NULL) {
1102        return PyBytes_FromStringAndSize("\n\0", 2);
1103    }
1104
1105    /* one char extra for trailing \n and one for terminating \0 */
1106    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1107    if (buf == NULL) {
1108        PyErr_SetString(PyExc_MemoryError,
1109                        "zipimport: no memory to allocate "
1110                        "source buffer");
1111        return NULL;
1112    }
1113    /* replace "\r\n?" by "\n" */
1114    for (q = buf; *p != '\0'; p++) {
1115        if (*p == '\r') {
1116            *q++ = '\n';
1117            if (*(p + 1) == '\n')
1118                p++;
1119        }
1120        else
1121            *q++ = *p;
1122        len++;
1123    }
1124    *q++ = '\n';  /* add trailing \n */
1125    *q = '\0';
1126    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1127    PyMem_Free(buf);
1128    return fixed_source;
1129}
1130
1131/* Given a string buffer containing Python source code, compile it
1132   return and return a code object as a new reference. */
1133static PyObject *
1134compile_source(PyObject *pathname, PyObject *source)
1135{
1136    PyObject *code, *fixed_source, *pathbytes;
1137
1138    pathbytes = PyUnicode_EncodeFSDefault(pathname);
1139    if (pathbytes == NULL)
1140        return NULL;
1141
1142    fixed_source = normalize_line_endings(source);
1143    if (fixed_source == NULL) {
1144        Py_DECREF(pathbytes);
1145        return NULL;
1146    }
1147
1148    code = Py_CompileString(PyBytes_AsString(fixed_source),
1149                            PyBytes_AsString(pathbytes),
1150                            Py_file_input);
1151    Py_DECREF(pathbytes);
1152    Py_DECREF(fixed_source);
1153    return code;
1154}
1155
1156/* Convert the date/time values found in the Zip archive to a value
1157   that's compatible with the time stamp stored in .pyc files. */
1158static time_t
1159parse_dostime(int dostime, int dosdate)
1160{
1161    struct tm stm;
1162
1163    memset((void *) &stm, '\0', sizeof(stm));
1164
1165    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1166    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1167    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1168    stm.tm_mday  =   dosdate        & 0x1f;
1169    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1170    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1171    stm.tm_isdst =   -1; /* wday/yday is ignored */
1172
1173    return mktime(&stm);
1174}
1175
1176/* Given a path to a .pyc or .pyo file in the archive, return the
1177   modification time of the matching .py file, or 0 if no source
1178   is available. */
1179static time_t
1180get_mtime_of_source(ZipImporter *self, PyObject *path)
1181{
1182    PyObject *toc_entry, *stripped;
1183    time_t mtime;
1184
1185    /* strip 'c' or 'o' from *.py[co] */
1186    if (PyUnicode_READY(path) == -1)
1187        return (time_t)-1;
1188    stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1189                                         PyUnicode_DATA(path),
1190                                         PyUnicode_GET_LENGTH(path) - 1);
1191    if (stripped == NULL)
1192        return (time_t)-1;
1193
1194    toc_entry = PyDict_GetItem(self->files, stripped);
1195    Py_DECREF(stripped);
1196    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1197        PyTuple_Size(toc_entry) == 8) {
1198        /* fetch the time stamp of the .py file for comparison
1199           with an embedded pyc time stamp */
1200        int time, date;
1201        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1202        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1203        mtime = parse_dostime(time, date);
1204    } else
1205        mtime = 0;
1206    return mtime;
1207}
1208
1209/* Return the code object for the module named by 'fullname' from the
1210   Zip archive as a new reference. */
1211static PyObject *
1212get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1213                   time_t mtime, PyObject *toc_entry)
1214{
1215    PyObject *data, *modpath, *code;
1216
1217    data = get_data(self->archive, toc_entry);
1218    if (data == NULL)
1219        return NULL;
1220
1221    modpath = PyTuple_GetItem(toc_entry, 0);
1222    if (isbytecode)
1223        code = unmarshal_code(modpath, data, mtime);
1224    else
1225        code = compile_source(modpath, data);
1226    Py_DECREF(data);
1227    return code;
1228}
1229
1230/* Get the code object associated with the module specified by
1231   'fullname'. */
1232static PyObject *
1233get_module_code(ZipImporter *self, PyObject *fullname,
1234                int *p_ispackage, PyObject **p_modpath)
1235{
1236    PyObject *code = NULL, *toc_entry, *subname;
1237    PyObject *path, *fullpath = NULL;
1238    struct st_zip_searchorder *zso;
1239
1240    subname = get_subname(fullname);
1241    if (subname == NULL)
1242        return NULL;
1243
1244    path = make_filename(self->prefix, subname);
1245    Py_DECREF(subname);
1246    if (path == NULL)
1247        return NULL;
1248
1249    for (zso = zip_searchorder; *zso->suffix; zso++) {
1250        code = NULL;
1251
1252        fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1253        if (fullpath == NULL)
1254            goto exit;
1255
1256        if (Py_VerboseFlag > 1)
1257            PySys_FormatStderr("# trying %U%c%U\n",
1258                               self->archive, (int)SEP, fullpath);
1259        toc_entry = PyDict_GetItem(self->files, fullpath);
1260        if (toc_entry != NULL) {
1261            time_t mtime = 0;
1262            int ispackage = zso->type & IS_PACKAGE;
1263            int isbytecode = zso->type & IS_BYTECODE;
1264
1265            if (isbytecode) {
1266                mtime = get_mtime_of_source(self, fullpath);
1267                if (mtime == (time_t)-1 && PyErr_Occurred()) {
1268                    goto exit;
1269                }
1270            }
1271            Py_CLEAR(fullpath);
1272            if (p_ispackage != NULL)
1273                *p_ispackage = ispackage;
1274            code = get_code_from_data(self, ispackage,
1275                                      isbytecode, mtime,
1276                                      toc_entry);
1277            if (code == Py_None) {
1278                /* bad magic number or non-matching mtime
1279                   in byte code, try next */
1280                Py_DECREF(code);
1281                continue;
1282            }
1283            if (code != NULL && p_modpath != NULL) {
1284                *p_modpath = PyTuple_GetItem(toc_entry, 0);
1285                Py_INCREF(*p_modpath);
1286            }
1287            goto exit;
1288        }
1289        else
1290            Py_CLEAR(fullpath);
1291    }
1292    PyErr_Format(ZipImportError, "can't find module %R", fullname);
1293exit:
1294    Py_DECREF(path);
1295    Py_XDECREF(fullpath);
1296    return code;
1297}
1298
1299
1300/* Module init */
1301
1302PyDoc_STRVAR(zipimport_doc,
1303"zipimport provides support for importing Python modules from Zip archives.\n\
1304\n\
1305This module exports three objects:\n\
1306- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1307- ZipImportError: exception raised by zipimporter objects. It's a\n\
1308  subclass of ImportError, so it can be caught as ImportError, too.\n\
1309- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1310  info dicts, as used in zipimporter._files.\n\
1311\n\
1312It is usually not needed to use the zipimport module explicitly; it is\n\
1313used by the builtin import mechanism for sys.path items that are paths\n\
1314to Zip archives.");
1315
1316static struct PyModuleDef zipimportmodule = {
1317    PyModuleDef_HEAD_INIT,
1318    "zipimport",
1319    zipimport_doc,
1320    -1,
1321    NULL,
1322    NULL,
1323    NULL,
1324    NULL,
1325    NULL
1326};
1327
1328PyMODINIT_FUNC
1329PyInit_zipimport(void)
1330{
1331    PyObject *mod;
1332
1333    if (PyType_Ready(&ZipImporter_Type) < 0)
1334        return NULL;
1335
1336    /* Correct directory separator */
1337    zip_searchorder[0].suffix[0] = SEP;
1338    zip_searchorder[1].suffix[0] = SEP;
1339    zip_searchorder[2].suffix[0] = SEP;
1340    if (Py_OptimizeFlag) {
1341        /* Reverse *.pyc and *.pyo */
1342        struct st_zip_searchorder tmp;
1343        tmp = zip_searchorder[0];
1344        zip_searchorder[0] = zip_searchorder[1];
1345        zip_searchorder[1] = tmp;
1346        tmp = zip_searchorder[3];
1347        zip_searchorder[3] = zip_searchorder[4];
1348        zip_searchorder[4] = tmp;
1349    }
1350
1351    mod = PyModule_Create(&zipimportmodule);
1352    if (mod == NULL)
1353        return NULL;
1354
1355    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1356                                        PyExc_ImportError, NULL);
1357    if (ZipImportError == NULL)
1358        return NULL;
1359
1360    Py_INCREF(ZipImportError);
1361    if (PyModule_AddObject(mod, "ZipImportError",
1362                           ZipImportError) < 0)
1363        return NULL;
1364
1365    Py_INCREF(&ZipImporter_Type);
1366    if (PyModule_AddObject(mod, "zipimporter",
1367                           (PyObject *)&ZipImporter_Type) < 0)
1368        return NULL;
1369
1370    zip_directory_cache = PyDict_New();
1371    if (zip_directory_cache == NULL)
1372        return NULL;
1373    Py_INCREF(zip_directory_cache);
1374    if (PyModule_AddObject(mod, "_zip_directory_cache",
1375                           zip_directory_cache) < 0)
1376        return NULL;
1377    return mod;
1378}
1379