zipimport.c revision 21e7d4cd5eb5a1ee153baf4c7915db80e6ca59e1
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive,
39                           decoded from the filesystem encoding */
40    PyObject *prefix;   /* file prefix: "a/sub/directory/",
41                           encoded to the filesystem encoding */
42    PyObject *files;    /* dict with file info {path: toc_entry} */
43};
44
45static PyObject *ZipImportError;
46/* read_directory() cache */
47static PyObject *zip_directory_cache = NULL;
48static PyObject *zip_stat_cache = NULL;
49/* posix.fstat or nt.fstat function.  Used due to posixmodule.c's
50 * superior fstat implementation over libc's on Windows. */
51static PyObject *fstat_function = NULL;  /* posix.fstat() or nt.fstat() */
52
53/* forward decls */
54static FILE *fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p);
55static FILE *safely_reopen_archive(ZipImporter *self);
56static PyObject *read_directory(FILE *fp, PyObject *archive);
57static PyObject *get_data(FILE *fp, PyObject *archive, PyObject *toc_entry);
58static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
59                                 int *p_ispackage, PyObject **p_modpath);
60
61
62#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
63
64
65/* zipimporter.__init__
66   Split the "subdirectory" from the Zip archive path, lookup a matching
67   entry in sys.path_importer_cache, fetch the file directory from there
68   if found, or else read it from the archive. */
69static int
70zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
71{
72    PyObject *path, *files, *tmp;
73    PyObject *filename = NULL;
74    Py_ssize_t len, flen;
75#ifdef ALTSEP
76    _Py_IDENTIFIER(replace);
77#endif
78
79    if (!_PyArg_NoKeywords("zipimporter()", kwds))
80        return -1;
81
82    if (!PyArg_ParseTuple(args, "O&:zipimporter",
83                          PyUnicode_FSDecoder, &path))
84        return -1;
85
86    if (PyUnicode_READY(path) == -1)
87        return -1;
88
89    len = PyUnicode_GET_LENGTH(path);
90    if (len == 0) {
91        PyErr_SetString(ZipImportError, "archive path is empty");
92        goto error;
93    }
94
95#ifdef ALTSEP
96    tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
97    if (!tmp)
98        goto error;
99    Py_DECREF(path);
100    path = tmp;
101#endif
102
103    filename = path;
104    Py_INCREF(filename);
105    flen = len;
106    for (;;) {
107        struct stat statbuf;
108        int rv;
109
110        rv = _Py_stat(filename, &statbuf);
111        if (rv == -2)
112            goto error;
113        if (rv == 0) {
114            /* it exists */
115            if (!S_ISREG(statbuf.st_mode))
116                /* it's a not file */
117                Py_CLEAR(filename);
118            break;
119        }
120        Py_CLEAR(filename);
121        /* back up one path element */
122        flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
123        if (flen == -1)
124            break;
125        filename = PyUnicode_Substring(path, 0, flen);
126    }
127    if (filename == NULL) {
128        PyErr_SetString(ZipImportError, "not a Zip file");
129        goto error;
130    }
131
132    if (PyUnicode_READY(filename) < 0)
133        goto error;
134
135    files = PyDict_GetItem(zip_directory_cache, filename);
136    if (files == NULL) {
137        PyObject *zip_stat = NULL;
138        FILE *fp = fopen_rb_and_stat(filename, &zip_stat);
139        if (fp == NULL) {
140            if (!PyErr_Occurred())
141                PyErr_Format(ZipImportError, "can't open Zip file: %R",
142                             filename);
143
144            Py_XDECREF(zip_stat);
145            goto error;
146        }
147
148        if (Py_VerboseFlag)
149            PySys_FormatStderr("# zipimport: %U not cached, "
150                               "reading TOC.\n", filename);
151
152        files = read_directory(fp, filename);
153        fclose(fp);
154        if (files == NULL) {
155            Py_XDECREF(zip_stat);
156            goto error;
157        }
158        if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) {
159            Py_DECREF(files);
160            Py_XDECREF(zip_stat);
161            goto error;
162        }
163        if (zip_stat && PyDict_SetItem(zip_stat_cache, filename,
164                                       zip_stat) != 0) {
165            Py_DECREF(files);
166            Py_DECREF(zip_stat);
167            goto error;
168        }
169        Py_XDECREF(zip_stat);
170    }
171    else
172        Py_INCREF(files);
173    self->files = files;
174
175    /* Transfer reference */
176    self->archive = filename;
177    filename = NULL;
178
179    /* Check if there is a prefix directory following the filename. */
180    if (flen != len) {
181        tmp = PyUnicode_Substring(path, flen+1,
182                                  PyUnicode_GET_LENGTH(path));
183        if (tmp == NULL)
184            goto error;
185        self->prefix = tmp;
186        if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
187            /* add trailing SEP */
188            tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
189            if (tmp == NULL)
190                goto error;
191            Py_DECREF(self->prefix);
192            self->prefix = tmp;
193        }
194    }
195    else
196        self->prefix = PyUnicode_New(0, 0);
197    Py_DECREF(path);
198    return 0;
199
200error:
201    Py_DECREF(path);
202    Py_XDECREF(filename);
203    return -1;
204}
205
206/* GC support. */
207static int
208zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
209{
210    ZipImporter *self = (ZipImporter *)obj;
211    Py_VISIT(self->files);
212    return 0;
213}
214
215static void
216zipimporter_dealloc(ZipImporter *self)
217{
218    PyObject_GC_UnTrack(self);
219    Py_XDECREF(self->archive);
220    Py_XDECREF(self->prefix);
221    Py_XDECREF(self->files);
222    Py_TYPE(self)->tp_free((PyObject *)self);
223}
224
225static PyObject *
226zipimporter_repr(ZipImporter *self)
227{
228    if (self->archive == NULL)
229        return PyUnicode_FromString("<zipimporter object \"???\">");
230    else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
231        return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
232                                    self->archive, SEP, self->prefix);
233    else
234        return PyUnicode_FromFormat("<zipimporter object \"%U\">",
235                                    self->archive);
236}
237
238/* return fullname.split(".")[-1] */
239static PyObject *
240get_subname(PyObject *fullname)
241{
242    Py_ssize_t len, dot;
243    if (PyUnicode_READY(fullname) < 0)
244        return NULL;
245    len = PyUnicode_GET_LENGTH(fullname);
246    dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
247    if (dot == -1) {
248        Py_INCREF(fullname);
249        return fullname;
250    } else
251        return PyUnicode_Substring(fullname, dot+1, len);
252}
253
254/* Given a (sub)modulename, write the potential file path in the
255   archive (without extension) to the path buffer. Return the
256   length of the resulting string.
257
258   return self.prefix + name.replace('.', os.sep) */
259static PyObject*
260make_filename(PyObject *prefix, PyObject *name)
261{
262    PyObject *pathobj;
263    Py_UCS4 *p, *buf;
264    Py_ssize_t len;
265
266    len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
267    p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
268    if (buf == NULL) {
269        PyErr_NoMemory();
270        return NULL;
271    }
272
273    if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
274        PyMem_Free(buf);
275        return NULL;
276    }
277    p += PyUnicode_GET_LENGTH(prefix);
278    len -= PyUnicode_GET_LENGTH(prefix);
279    if (!PyUnicode_AsUCS4(name, p, len, 1)) {
280        PyMem_Free(buf);
281        return NULL;
282    }
283    for (; *p; p++) {
284        if (*p == '.')
285            *p = SEP;
286    }
287    pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
288                                        buf, p-buf);
289    PyMem_Free(buf);
290    return pathobj;
291}
292
293enum zi_module_info {
294    MI_ERROR,
295    MI_NOT_FOUND,
296    MI_MODULE,
297    MI_PACKAGE
298};
299
300/* Does this path represent a directory?
301   on error, return < 0
302   if not a dir, return 0
303   if a dir, return 1
304*/
305static int
306check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
307{
308    PyObject *dirpath;
309    int res;
310
311    /* See if this is a "directory". If so, it's eligible to be part
312       of a namespace package. We test by seeing if the name, with an
313       appended path separator, exists. */
314    dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
315    if (dirpath == NULL)
316        return -1;
317    /* If dirpath is present in self->files, we have a directory. */
318    res = PyDict_Contains(self->files, dirpath);
319    Py_DECREF(dirpath);
320    return res;
321}
322
323/* Return some information about a module. */
324static enum zi_module_info
325get_module_info(ZipImporter *self, PyObject *fullname)
326{
327    PyObject *subname;
328    PyObject *path, *fullpath, *item;
329    struct st_zip_searchorder *zso;
330
331    subname = get_subname(fullname);
332    if (subname == NULL)
333        return MI_ERROR;
334
335    path = make_filename(self->prefix, subname);
336    Py_DECREF(subname);
337    if (path == NULL)
338        return MI_ERROR;
339
340    for (zso = zip_searchorder; *zso->suffix; zso++) {
341        fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
342        if (fullpath == NULL) {
343            Py_DECREF(path);
344            return MI_ERROR;
345        }
346        item = PyDict_GetItem(self->files, fullpath);
347        Py_DECREF(fullpath);
348        if (item != NULL) {
349            Py_DECREF(path);
350            if (zso->type & IS_PACKAGE)
351                return MI_PACKAGE;
352            else
353                return MI_MODULE;
354        }
355    }
356    Py_DECREF(path);
357    return MI_NOT_FOUND;
358}
359
360typedef enum {
361    FL_ERROR,
362    FL_NOT_FOUND,
363    FL_MODULE_FOUND,
364    FL_NS_FOUND
365} find_loader_result;
366
367/* The guts of "find_loader" and "find_module". Return values:
368   -1: error
369    0: no loader or namespace portions found
370    1: module/package found
371    2: namespace portion found: *namespace_portion will point to the name
372*/
373static find_loader_result
374find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
375{
376    enum zi_module_info mi;
377
378    *namespace_portion = NULL;
379
380    mi = get_module_info(self, fullname);
381    if (mi == MI_ERROR)
382        return FL_ERROR;
383    if (mi == MI_NOT_FOUND) {
384        /* Not a module or regular package. See if this is a directory, and
385           therefore possibly a portion of a namespace package. */
386        int is_dir = check_is_directory(self, self->prefix, fullname);
387        if (is_dir < 0)
388            return -1;
389        if (is_dir) {
390            /* This is possibly a portion of a namespace
391               package. Return the string representing its path,
392               without a trailing separator. */
393            *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
394                                                      self->archive, SEP,
395                                                      self->prefix, fullname);
396            if (*namespace_portion == NULL)
397                return FL_ERROR;
398            return FL_NS_FOUND;
399        }
400        return FL_NOT_FOUND;
401    }
402    /* This is a module or package. */
403    return FL_MODULE_FOUND;
404}
405
406
407/* Check whether we can satisfy the import of the module named by
408   'fullname'. Return self if we can, None if we can't. */
409static PyObject *
410zipimporter_find_module(PyObject *obj, PyObject *args)
411{
412    ZipImporter *self = (ZipImporter *)obj;
413    PyObject *path = NULL;
414    PyObject *fullname;
415    PyObject *namespace_portion = NULL;
416    PyObject *result = NULL;
417
418    if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
419        return NULL;
420
421    switch (find_loader(self, fullname, &namespace_portion)) {
422    case FL_ERROR:
423        return NULL;
424    case FL_NS_FOUND:
425        /* A namespace portion is not allowed via find_module, so return None. */
426        Py_DECREF(namespace_portion);
427        /* FALL THROUGH */
428    case FL_NOT_FOUND:
429        result = Py_None;
430        break;
431    case FL_MODULE_FOUND:
432        result = (PyObject *)self;
433        break;
434    }
435    Py_INCREF(result);
436    return result;
437}
438
439
440/* Check whether we can satisfy the import of the module named by
441   'fullname', or whether it could be a portion of a namespace
442   package. Return self if we can load it, a string containing the
443   full path if it's a possible namespace portion, None if we
444   can't load it. */
445static PyObject *
446zipimporter_find_loader(PyObject *obj, PyObject *args)
447{
448    ZipImporter *self = (ZipImporter *)obj;
449    PyObject *path = NULL;
450    PyObject *fullname;
451    PyObject *result = NULL;
452    PyObject *namespace_portion = NULL;
453
454    if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
455        return NULL;
456
457    switch (find_loader(self, fullname, &namespace_portion)) {
458    case FL_ERROR:
459        return NULL;
460    case FL_NOT_FOUND:        /* Not found, return (None, []) */
461        result = Py_BuildValue("O[]", Py_None);
462        break;
463    case FL_MODULE_FOUND:     /* Return (self, []) */
464        result = Py_BuildValue("O[]", self);
465        break;
466    case FL_NS_FOUND:         /* Return (None, [namespace_portion]) */
467        result = Py_BuildValue("O[O]", Py_None, namespace_portion);
468        Py_DECREF(namespace_portion);
469        return result;
470    }
471    return result;
472}
473
474/* Load and return the module named by 'fullname'. */
475static PyObject *
476zipimporter_load_module(PyObject *obj, PyObject *args)
477{
478    ZipImporter *self = (ZipImporter *)obj;
479    PyObject *code = NULL, *mod, *dict;
480    PyObject *fullname;
481    PyObject *modpath = NULL;
482    int ispackage;
483
484    if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
485                          &fullname))
486        return NULL;
487    if (PyUnicode_READY(fullname) == -1)
488        return NULL;
489
490    code = get_module_code(self, fullname, &ispackage, &modpath);
491    if (code == NULL)
492        goto error;
493
494    mod = PyImport_AddModuleObject(fullname);
495    if (mod == NULL)
496        goto error;
497    dict = PyModule_GetDict(mod);
498
499    /* mod.__loader__ = self */
500    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
501        goto error;
502
503    if (ispackage) {
504        /* add __path__ to the module *before* the code gets
505           executed */
506        PyObject *pkgpath, *fullpath;
507        PyObject *subname = get_subname(fullname);
508        int err;
509
510        fullpath = PyUnicode_FromFormat("%U%c%U%U",
511                                self->archive, SEP,
512                                self->prefix, subname);
513        Py_DECREF(subname);
514        if (fullpath == NULL)
515            goto error;
516
517        pkgpath = Py_BuildValue("[N]", fullpath);
518        if (pkgpath == NULL)
519            goto error;
520        err = PyDict_SetItemString(dict, "__path__", pkgpath);
521        Py_DECREF(pkgpath);
522        if (err != 0)
523            goto error;
524    }
525    mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
526    Py_CLEAR(code);
527    if (mod == NULL)
528        goto error;
529
530    if (Py_VerboseFlag)
531        PySys_FormatStderr("import %U # loaded from Zip %U\n",
532                           fullname, modpath);
533    Py_DECREF(modpath);
534    return mod;
535error:
536    Py_XDECREF(code);
537    Py_XDECREF(modpath);
538    return NULL;
539}
540
541/* Return a string matching __file__ for the named module */
542static PyObject *
543zipimporter_get_filename(PyObject *obj, PyObject *args)
544{
545    ZipImporter *self = (ZipImporter *)obj;
546    PyObject *fullname, *code, *modpath;
547    int ispackage;
548
549    if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
550                          &fullname))
551        return NULL;
552
553    /* Deciding the filename requires working out where the code
554       would come from if the module was actually loaded */
555    code = get_module_code(self, fullname, &ispackage, &modpath);
556    if (code == NULL)
557        return NULL;
558    Py_DECREF(code); /* Only need the path info */
559
560    return modpath;
561}
562
563/* Return a bool signifying whether the module is a package or not. */
564static PyObject *
565zipimporter_is_package(PyObject *obj, PyObject *args)
566{
567    ZipImporter *self = (ZipImporter *)obj;
568    PyObject *fullname;
569    enum zi_module_info mi;
570
571    if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
572                          &fullname))
573        return NULL;
574
575    mi = get_module_info(self, fullname);
576    if (mi == MI_ERROR)
577        return NULL;
578    if (mi == MI_NOT_FOUND) {
579        PyErr_Format(ZipImportError, "can't find module %R", fullname);
580        return NULL;
581    }
582    return PyBool_FromLong(mi == MI_PACKAGE);
583}
584
585
586static PyObject *
587zipimporter_get_data(PyObject *obj, PyObject *args)
588{
589    ZipImporter *self = (ZipImporter *)obj;
590    PyObject *path, *key;
591    FILE *fp;
592#ifdef ALTSEP
593    _Py_IDENTIFIER(replace);
594#endif
595    PyObject *toc_entry, *data;
596    Py_ssize_t path_start, path_len, len;
597
598    if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
599        return NULL;
600
601#ifdef ALTSEP
602    path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
603    if (!path)
604        return NULL;
605#else
606    Py_INCREF(path);
607#endif
608    if (PyUnicode_READY(path) == -1)
609        goto error;
610
611    path_len = PyUnicode_GET_LENGTH(path);
612
613    len = PyUnicode_GET_LENGTH(self->archive);
614    path_start = 0;
615    if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
616        && PyUnicode_READ_CHAR(path, len) == SEP) {
617        path_start = len + 1;
618    }
619
620    key = PyUnicode_Substring(path, path_start, path_len);
621    if (key == NULL)
622        goto error;
623
624    fp = safely_reopen_archive(self);
625    if (fp == NULL)
626        goto error;
627
628    toc_entry = PyDict_GetItem(self->files, key);
629    if (toc_entry == NULL) {
630        PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
631        Py_DECREF(key);
632        fclose(fp);
633        goto error;
634    }
635    Py_DECREF(key);
636    Py_DECREF(path);
637    data = get_data(fp, self->archive, toc_entry);
638    fclose(fp);
639    return data;
640  error:
641    Py_DECREF(path);
642    return NULL;
643}
644
645static PyObject *
646zipimporter_get_code(PyObject *obj, PyObject *args)
647{
648    ZipImporter *self = (ZipImporter *)obj;
649    PyObject *fullname;
650
651    if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
652        return NULL;
653
654    return get_module_code(self, fullname, NULL, NULL);
655}
656
657static PyObject *
658zipimporter_get_source(PyObject *obj, PyObject *args)
659{
660    ZipImporter *self = (ZipImporter *)obj;
661    PyObject *toc_entry;
662    PyObject *fullname, *subname, *path, *fullpath;
663    enum zi_module_info mi;
664    FILE *fp;
665
666    if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
667        return NULL;
668
669    mi = get_module_info(self, fullname);
670    if (mi == MI_ERROR)
671        return NULL;
672    if (mi == MI_NOT_FOUND) {
673        PyErr_Format(ZipImportError, "can't find module %R", fullname);
674        return NULL;
675    }
676
677    subname = get_subname(fullname);
678    if (subname == NULL)
679        return NULL;
680
681    path = make_filename(self->prefix, subname);
682    Py_DECREF(subname);
683    if (path == NULL)
684        return NULL;
685
686    if (mi == MI_PACKAGE)
687        fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
688    else
689        fullpath = PyUnicode_FromFormat("%U.py", path);
690    Py_DECREF(path);
691    if (fullpath == NULL)
692        return NULL;
693
694    fp = safely_reopen_archive(self);
695    if (fp == NULL) {
696        Py_DECREF(fullpath);
697        return NULL;
698    }
699
700    toc_entry = PyDict_GetItem(self->files, fullpath);
701    Py_DECREF(fullpath);
702    if (toc_entry != NULL) {
703        PyObject *res, *bytes;
704        bytes = get_data(fp, self->archive, toc_entry);
705        fclose(fp);
706        if (bytes == NULL)
707            return NULL;
708        res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
709                                          PyBytes_GET_SIZE(bytes));
710        Py_DECREF(bytes);
711        return res;
712    }
713    fclose(fp);
714
715    /* we have the module, but no source */
716    Py_RETURN_NONE;
717}
718
719PyDoc_STRVAR(doc_find_module,
720"find_module(fullname, path=None) -> self or None.\n\
721\n\
722Search for a module specified by 'fullname'. 'fullname' must be the\n\
723fully qualified (dotted) module name. It returns the zipimporter\n\
724instance itself if the module was found, or None if it wasn't.\n\
725The optional 'path' argument is ignored -- it's there for compatibility\n\
726with the importer protocol.");
727
728PyDoc_STRVAR(doc_find_loader,
729"find_loader(fullname, path=None) -> self, str or None.\n\
730\n\
731Search for a module specified by 'fullname'. 'fullname' must be the\n\
732fully qualified (dotted) module name. It returns the zipimporter\n\
733instance itself if the module was found, a string containing the\n\
734full path name if it's possibly a portion of a namespace package,\n\
735or None otherwise. The optional 'path' argument is ignored -- it's\n\
736 there for compatibility with the importer protocol.");
737
738PyDoc_STRVAR(doc_load_module,
739"load_module(fullname) -> module.\n\
740\n\
741Load the module specified by 'fullname'. 'fullname' must be the\n\
742fully qualified (dotted) module name. It returns the imported\n\
743module, or raises ZipImportError if it wasn't found.");
744
745PyDoc_STRVAR(doc_get_data,
746"get_data(pathname) -> string with file data.\n\
747\n\
748Return the data associated with 'pathname'. Raise IOError if\n\
749the file wasn't found.");
750
751PyDoc_STRVAR(doc_is_package,
752"is_package(fullname) -> bool.\n\
753\n\
754Return True if the module specified by fullname is a package.\n\
755Raise ZipImportError if the module couldn't be found.");
756
757PyDoc_STRVAR(doc_get_code,
758"get_code(fullname) -> code object.\n\
759\n\
760Return the code object for the specified module. Raise ZipImportError\n\
761if the module couldn't be found.");
762
763PyDoc_STRVAR(doc_get_source,
764"get_source(fullname) -> source string.\n\
765\n\
766Return the source code for the specified module. Raise ZipImportError\n\
767if the module couldn't be found, return None if the archive does\n\
768contain the module, but has no source for it.");
769
770
771PyDoc_STRVAR(doc_get_filename,
772"get_filename(fullname) -> filename string.\n\
773\n\
774Return the filename for the specified module.");
775
776static PyMethodDef zipimporter_methods[] = {
777    {"find_module", zipimporter_find_module, METH_VARARGS,
778     doc_find_module},
779    {"find_loader", zipimporter_find_loader, METH_VARARGS,
780     doc_find_loader},
781    {"load_module", zipimporter_load_module, METH_VARARGS,
782     doc_load_module},
783    {"get_data", zipimporter_get_data, METH_VARARGS,
784     doc_get_data},
785    {"get_code", zipimporter_get_code, METH_VARARGS,
786     doc_get_code},
787    {"get_source", zipimporter_get_source, METH_VARARGS,
788     doc_get_source},
789    {"get_filename", zipimporter_get_filename, METH_VARARGS,
790     doc_get_filename},
791    {"is_package", zipimporter_is_package, METH_VARARGS,
792     doc_is_package},
793    {NULL,              NULL}   /* sentinel */
794};
795
796static PyMemberDef zipimporter_members[] = {
797    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
798    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
799    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
800    {NULL}
801};
802
803PyDoc_STRVAR(zipimporter_doc,
804"zipimporter(archivepath) -> zipimporter object\n\
805\n\
806Create a new zipimporter instance. 'archivepath' must be a path to\n\
807a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
808'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
809valid directory inside the archive.\n\
810\n\
811'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
812archive.\n\
813\n\
814The 'archive' attribute of zipimporter objects contains the name of the\n\
815zipfile targeted.");
816
817#define DEFERRED_ADDRESS(ADDR) 0
818
819static PyTypeObject ZipImporter_Type = {
820    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
821    "zipimport.zipimporter",
822    sizeof(ZipImporter),
823    0,                                          /* tp_itemsize */
824    (destructor)zipimporter_dealloc,            /* tp_dealloc */
825    0,                                          /* tp_print */
826    0,                                          /* tp_getattr */
827    0,                                          /* tp_setattr */
828    0,                                          /* tp_reserved */
829    (reprfunc)zipimporter_repr,                 /* tp_repr */
830    0,                                          /* tp_as_number */
831    0,                                          /* tp_as_sequence */
832    0,                                          /* tp_as_mapping */
833    0,                                          /* tp_hash */
834    0,                                          /* tp_call */
835    0,                                          /* tp_str */
836    PyObject_GenericGetAttr,                    /* tp_getattro */
837    0,                                          /* tp_setattro */
838    0,                                          /* tp_as_buffer */
839    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
840        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
841    zipimporter_doc,                            /* tp_doc */
842    zipimporter_traverse,                       /* tp_traverse */
843    0,                                          /* tp_clear */
844    0,                                          /* tp_richcompare */
845    0,                                          /* tp_weaklistoffset */
846    0,                                          /* tp_iter */
847    0,                                          /* tp_iternext */
848    zipimporter_methods,                        /* tp_methods */
849    zipimporter_members,                        /* tp_members */
850    0,                                          /* tp_getset */
851    0,                                          /* tp_base */
852    0,                                          /* tp_dict */
853    0,                                          /* tp_descr_get */
854    0,                                          /* tp_descr_set */
855    0,                                          /* tp_dictoffset */
856    (initproc)zipimporter_init,                 /* tp_init */
857    PyType_GenericAlloc,                        /* tp_alloc */
858    PyType_GenericNew,                          /* tp_new */
859    PyObject_GC_Del,                            /* tp_free */
860};
861
862
863/* implementation */
864
865/* Given a buffer, return the long that is represented by the first
866   4 bytes, encoded as little endian. This partially reimplements
867   marshal.c:r_long() */
868static long
869get_long(unsigned char *buf) {
870    long x;
871    x =  buf[0];
872    x |= (long)buf[1] <<  8;
873    x |= (long)buf[2] << 16;
874    x |= (long)buf[3] << 24;
875#if SIZEOF_LONG > 4
876    /* Sign extension for 64-bit machines */
877    x |= -(x & 0x80000000L);
878#endif
879    return x;
880}
881
882/* Return 1 if objects a and b fail a Py_EQ test for an attr. */
883static int
884compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name)
885{
886    int problem = 0;
887    PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name);
888    PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name);
889    if (attr_a == NULL || attr_b == NULL)
890        problem = 1;
891    else
892        problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1);
893    Py_XDECREF(attr_a);
894    Py_XDECREF(attr_b);
895    return problem;
896}
897
898/*
899 * Returns an open FILE * on success.
900 * Returns NULL on error with the Python error context set.
901 */
902static FILE *
903safely_reopen_archive(ZipImporter *self)
904{
905    FILE *fp;
906    PyObject *stat_now = NULL;
907
908    fp = fopen_rb_and_stat(self->archive, &stat_now);
909    if (!fp) {
910        PyErr_Format(ZipImportError,
911                     "zipimport: can not open file %U", self->archive);
912        Py_XDECREF(stat_now);
913        return NULL;
914    }
915
916    if (stat_now != NULL) {
917        int problem = 0;
918        PyObject *files;
919        PyObject *prev_stat = PyDict_GetItem(zip_stat_cache, self->archive);
920        /* Test stat_now vs the old cached stat on some key attributes. */
921        if (prev_stat != NULL) {
922            problem = compare_obj_attr_strings(prev_stat, stat_now,
923                                               "st_ino");
924            problem |= compare_obj_attr_strings(prev_stat, stat_now,
925                                                "st_size");
926            problem |= compare_obj_attr_strings(prev_stat, stat_now,
927                                                "st_mtime");
928        } else {
929            if (Py_VerboseFlag)
930                PySys_FormatStderr("# zipimport: no stat data for %U!\n",
931                                   self->archive);
932            problem = 1;
933        }
934
935        if (problem) {
936            if (Py_VerboseFlag)
937                PySys_FormatStderr("# zipimport: %U modified since last"
938                                   " import, rereading TOC.\n", self->archive);
939            files = read_directory(fp, self->archive);
940            if (files == NULL) {
941                Py_DECREF(stat_now);
942                fclose(fp);
943                return NULL;
944            }
945            if (PyDict_SetItem(zip_directory_cache, self->archive,
946                               files) != 0) {
947                Py_DECREF(files);
948                Py_DECREF(stat_now);
949                fclose(fp);
950                return NULL;
951            }
952            if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive,
953                                           stat_now) != 0) {
954                Py_DECREF(files);
955                Py_DECREF(stat_now);
956                fclose(fp);
957                return NULL;
958            }
959            Py_XDECREF(self->files);  /* free the old value. */
960            self->files = files;
961        }
962        Py_DECREF(stat_now);
963    }  /* stat succeeded */
964
965    return fp;
966}
967
968/*
969   fopen_rb_and_stat(path, &py_stat) -> FILE *
970
971   Opens path in "rb" mode and populates the Python py_stat stat_result
972   with information about the opened file.  *py_stat may not be changed
973   if there is no fstat_function or if fstat_function fails.
974
975   Returns NULL and does nothing to *py_stat if the open failed.
976*/
977static FILE *
978fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p)
979{
980    FILE *fp;
981    assert(py_stat_p != NULL);
982    assert(*py_stat_p == NULL);
983
984    fp = _Py_fopen(path, "rb");
985    if (fp == NULL) {
986        if (!PyErr_Occurred())
987            PyErr_Format(ZipImportError,
988                         "zipimport: can not open file %U", path);
989        return NULL;
990    }
991
992    if (fstat_function) {
993        PyObject *stat_result = PyObject_CallFunction(fstat_function,
994                                                      "i", fileno(fp));
995        if (stat_result == NULL) {
996            PyErr_Clear();  /* We can function without it. */
997        } else {
998            *py_stat_p = stat_result;
999        }
1000    }
1001
1002    return fp;
1003}
1004
1005/*
1006   read_directory(fp, archive) -> files dict (new reference)
1007
1008   Given an open Zip archive, build a dict, mapping file names
1009   (local to the archive, using SEP as a separator) to toc entries.
1010
1011   A toc_entry is a tuple:
1012
1013   (__file__,      # value to use for __file__, available for all files,
1014                   # encoded to the filesystem encoding
1015    compress,      # compression kind; 0 for uncompressed
1016    data_size,     # size of compressed data on disk
1017    file_size,     # size of decompressed data
1018    file_offset,   # offset of file header from start of archive
1019    time,          # mod time of file (in dos format)
1020    date,          # mod data of file (in dos format)
1021    crc,           # crc checksum of the data
1022   )
1023
1024   Directories can be recognized by the trailing SEP in the name,
1025   data_size and file_offset are 0.
1026*/
1027static PyObject *
1028read_directory(FILE *fp, PyObject *archive)
1029{
1030    PyObject *files = NULL;
1031    unsigned short flags;
1032    short compress, time, date, name_size;
1033    long crc, data_size, file_size, header_size;
1034    Py_ssize_t file_offset, header_position, header_offset;
1035    long l, count;
1036    Py_ssize_t i;
1037    char name[MAXPATHLEN + 5];
1038    PyObject *nameobj = NULL;
1039    char *p, endof_central_dir[22];
1040    Py_ssize_t arc_offset;  /* Absolute offset to start of the zip-archive. */
1041    PyObject *path;
1042    const char *charset;
1043    int bootstrap;
1044
1045    assert(fp != NULL);
1046    if (fseek(fp, -22, SEEK_END) == -1) {
1047        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1048        return NULL;
1049    }
1050    header_position = ftell(fp);
1051    if (fread(endof_central_dir, 1, 22, fp) != 22) {
1052        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1053        return NULL;
1054    }
1055    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
1056        /* Bad: End of Central Dir signature */
1057        PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
1058        return NULL;
1059    }
1060
1061    header_size = get_long((unsigned char *)endof_central_dir + 12);
1062    header_offset = get_long((unsigned char *)endof_central_dir + 16);
1063    arc_offset = header_position - header_offset - header_size;
1064    header_offset += arc_offset;
1065
1066    files = PyDict_New();
1067    if (files == NULL)
1068        goto error;
1069
1070    /* Start of Central Directory */
1071    count = 0;
1072    for (;;) {
1073        PyObject *t;
1074        int err;
1075
1076        if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
1077            goto fseek_error;
1078        l = PyMarshal_ReadLongFromFile(fp);
1079        if (l != 0x02014B50)
1080            break;              /* Bad: Central Dir File Header */
1081        if (fseek(fp, header_offset + 8, 0) == -1)
1082            goto fseek_error;
1083        flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
1084        compress = PyMarshal_ReadShortFromFile(fp);
1085        time = PyMarshal_ReadShortFromFile(fp);
1086        date = PyMarshal_ReadShortFromFile(fp);
1087        crc = PyMarshal_ReadLongFromFile(fp);
1088        data_size = PyMarshal_ReadLongFromFile(fp);
1089        file_size = PyMarshal_ReadLongFromFile(fp);
1090        name_size = PyMarshal_ReadShortFromFile(fp);
1091        header_size = 46 + name_size +
1092           PyMarshal_ReadShortFromFile(fp) +
1093           PyMarshal_ReadShortFromFile(fp);
1094        if (fseek(fp, header_offset + 42, 0) == -1)
1095            goto fseek_error;
1096        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
1097        if (name_size > MAXPATHLEN)
1098            name_size = MAXPATHLEN;
1099
1100        p = name;
1101        for (i = 0; i < (Py_ssize_t)name_size; i++) {
1102            *p = (char)getc(fp);
1103            if (*p == '/')
1104                *p = SEP;
1105            p++;
1106        }
1107        *p = 0;         /* Add terminating null byte */
1108        header_offset += header_size;
1109
1110        bootstrap = 0;
1111        if (flags & 0x0800)
1112            charset = "utf-8";
1113        else if (!PyThreadState_GET()->interp->codecs_initialized) {
1114            /* During bootstrap, we may need to load the encodings
1115               package from a ZIP file. But the cp437 encoding is implemented
1116               in Python in the encodings package.
1117
1118               Break out of this dependency by assuming that the path to
1119               the encodings module is ASCII-only. */
1120            charset = "ascii";
1121            bootstrap = 1;
1122        }
1123        else
1124            charset = "cp437";
1125        nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
1126        if (nameobj == NULL) {
1127            if (bootstrap)
1128                PyErr_Format(PyExc_NotImplementedError,
1129                    "bootstrap issue: python%i%i.zip contains non-ASCII "
1130                    "filenames without the unicode flag",
1131                    PY_MAJOR_VERSION, PY_MINOR_VERSION);
1132            goto error;
1133        }
1134        if (PyUnicode_READY(nameobj) == -1)
1135            goto error;
1136        path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
1137        if (path == NULL)
1138            goto error;
1139        t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
1140                          file_size, file_offset, time, date, crc);
1141        if (t == NULL)
1142            goto error;
1143        err = PyDict_SetItem(files, nameobj, t);
1144        Py_CLEAR(nameobj);
1145        Py_DECREF(t);
1146        if (err != 0)
1147            goto error;
1148        count++;
1149    }
1150    if (Py_VerboseFlag)
1151        PySys_FormatStderr("# zipimport: found %ld names in %R\n",
1152                           count, archive);
1153    return files;
1154fseek_error:
1155    Py_XDECREF(files);
1156    Py_XDECREF(nameobj);
1157    PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1158    return NULL;
1159error:
1160    Py_XDECREF(files);
1161    Py_XDECREF(nameobj);
1162    return NULL;
1163}
1164
1165/* Return the zlib.decompress function object, or NULL if zlib couldn't
1166   be imported. The function is cached when found, so subsequent calls
1167   don't import zlib again. */
1168static PyObject *
1169get_decompress_func(void)
1170{
1171    static int importing_zlib = 0;
1172    PyObject *zlib;
1173    PyObject *decompress;
1174    _Py_IDENTIFIER(decompress);
1175
1176    if (importing_zlib != 0)
1177        /* Someone has a zlib.py[co] in their Zip file;
1178           let's avoid a stack overflow. */
1179        return NULL;
1180    importing_zlib = 1;
1181    zlib = PyImport_ImportModuleNoBlock("zlib");
1182    importing_zlib = 0;
1183    if (zlib != NULL) {
1184        decompress = _PyObject_GetAttrId(zlib,
1185                                         &PyId_decompress);
1186        Py_DECREF(zlib);
1187    }
1188    else {
1189        PyErr_Clear();
1190        decompress = NULL;
1191    }
1192    if (Py_VerboseFlag)
1193        PySys_WriteStderr("# zipimport: zlib %s\n",
1194            zlib != NULL ? "available": "UNAVAILABLE");
1195    return decompress;
1196}
1197
1198/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed)
1199   data as a new reference. */
1200static PyObject *
1201get_data(FILE *fp, PyObject *archive, PyObject *toc_entry)
1202{
1203    PyObject *raw_data, *data = NULL, *decompress;
1204    char *buf;
1205    int err;
1206    Py_ssize_t bytes_read = 0;
1207    long l;
1208    PyObject *datapath;
1209    long compress, data_size, file_size, file_offset, bytes_size;
1210    long time, date, crc;
1211
1212    if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
1213                          &data_size, &file_size, &file_offset, &time,
1214                          &date, &crc)) {
1215        return NULL;
1216    }
1217
1218    /* Check to make sure the local file header is correct */
1219    if (fseek(fp, file_offset, 0) == -1) {
1220        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1221        return NULL;
1222    }
1223
1224    l = PyMarshal_ReadLongFromFile(fp);
1225    if (l != 0x04034B50) {
1226        /* Bad: Local File Header */
1227        PyErr_Format(ZipImportError,
1228                     "bad local file header in %U",
1229                     archive);
1230        return NULL;
1231    }
1232    if (fseek(fp, file_offset + 26, 0) == -1) {
1233        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1234        return NULL;
1235    }
1236
1237    l = 30 + PyMarshal_ReadShortFromFile(fp) +
1238        PyMarshal_ReadShortFromFile(fp);        /* local header size */
1239    file_offset += l;           /* Start of file data */
1240
1241    bytes_size = compress == 0 ? data_size : data_size + 1;
1242    if (bytes_size == 0)
1243        bytes_size++;
1244    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
1245
1246    if (raw_data == NULL) {
1247        return NULL;
1248    }
1249    buf = PyBytes_AsString(raw_data);
1250
1251    err = fseek(fp, file_offset, 0);
1252    if (err == 0) {
1253        bytes_read = fread(buf, 1, data_size, fp);
1254    } else {
1255        PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1256        return NULL;
1257    }
1258    if (err || bytes_read != data_size) {
1259        PyErr_SetString(PyExc_IOError,
1260                        "zipimport: can't read data");
1261        Py_DECREF(raw_data);
1262        return NULL;
1263    }
1264
1265    if (compress != 0) {
1266        buf[data_size] = 'Z';  /* saw this in zipfile.py */
1267        data_size++;
1268    }
1269    buf[data_size] = '\0';
1270
1271    if (compress == 0) {  /* data is not compressed */
1272        data = PyBytes_FromStringAndSize(buf, data_size);
1273        Py_DECREF(raw_data);
1274        return data;
1275    }
1276
1277    /* Decompress with zlib */
1278    decompress = get_decompress_func();
1279    if (decompress == NULL) {
1280        PyErr_SetString(ZipImportError,
1281                        "can't decompress data; "
1282                        "zlib not available");
1283        goto error;
1284    }
1285    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1286    Py_DECREF(decompress);
1287error:
1288    Py_DECREF(raw_data);
1289    return data;
1290}
1291
1292/* Lenient date/time comparison function. The precision of the mtime
1293   in the archive is lower than the mtime stored in a .pyc: we
1294   must allow a difference of at most one second. */
1295static int
1296eq_mtime(time_t t1, time_t t2)
1297{
1298    time_t d = t1 - t2;
1299    if (d < 0)
1300        d = -d;
1301    /* dostime only stores even seconds, so be lenient */
1302    return d <= 1;
1303}
1304
1305/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1306   and return the code object. Return None if it the magic word doesn't
1307   match (we do this instead of raising an exception as we fall back
1308   to .py if available and we don't want to mask other errors).
1309   Returns a new reference. */
1310static PyObject *
1311unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
1312{
1313    PyObject *code;
1314    char *buf = PyBytes_AsString(data);
1315    Py_ssize_t size = PyBytes_Size(data);
1316
1317    if (size <= 9) {
1318        PyErr_SetString(ZipImportError,
1319                        "bad pyc data");
1320        return NULL;
1321    }
1322
1323    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1324        if (Py_VerboseFlag)
1325            PySys_FormatStderr("# %R has bad magic\n",
1326                               pathname);
1327        Py_INCREF(Py_None);
1328        return Py_None;  /* signal caller to try alternative */
1329    }
1330
1331    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1332                                mtime)) {
1333        if (Py_VerboseFlag)
1334            PySys_FormatStderr("# %R has bad mtime\n",
1335                               pathname);
1336        Py_INCREF(Py_None);
1337        return Py_None;  /* signal caller to try alternative */
1338    }
1339
1340    /* XXX the pyc's size field is ignored; timestamp collisions are probably
1341       unimportant with zip files. */
1342    code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
1343    if (code == NULL)
1344        return NULL;
1345    if (!PyCode_Check(code)) {
1346        Py_DECREF(code);
1347        PyErr_Format(PyExc_TypeError,
1348             "compiled module %R is not a code object",
1349             pathname);
1350        return NULL;
1351    }
1352    return code;
1353}
1354
1355/* Replace any occurances of "\r\n?" in the input string with "\n".
1356   This converts DOS and Mac line endings to Unix line endings.
1357   Also append a trailing "\n" to be compatible with
1358   PyParser_SimpleParseFile(). Returns a new reference. */
1359static PyObject *
1360normalize_line_endings(PyObject *source)
1361{
1362    char *buf, *q, *p;
1363    PyObject *fixed_source;
1364    int len = 0;
1365
1366    p = PyBytes_AsString(source);
1367    if (p == NULL) {
1368        return PyBytes_FromStringAndSize("\n\0", 2);
1369    }
1370
1371    /* one char extra for trailing \n and one for terminating \0 */
1372    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1373    if (buf == NULL) {
1374        PyErr_SetString(PyExc_MemoryError,
1375                        "zipimport: no memory to allocate "
1376                        "source buffer");
1377        return NULL;
1378    }
1379    /* replace "\r\n?" by "\n" */
1380    for (q = buf; *p != '\0'; p++) {
1381        if (*p == '\r') {
1382            *q++ = '\n';
1383            if (*(p + 1) == '\n')
1384                p++;
1385        }
1386        else
1387            *q++ = *p;
1388        len++;
1389    }
1390    *q++ = '\n';  /* add trailing \n */
1391    *q = '\0';
1392    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1393    PyMem_Free(buf);
1394    return fixed_source;
1395}
1396
1397/* Given a string buffer containing Python source code, compile it
1398   return and return a code object as a new reference. */
1399static PyObject *
1400compile_source(PyObject *pathname, PyObject *source)
1401{
1402    PyObject *code, *fixed_source, *pathbytes;
1403
1404    pathbytes = PyUnicode_EncodeFSDefault(pathname);
1405    if (pathbytes == NULL)
1406        return NULL;
1407
1408    fixed_source = normalize_line_endings(source);
1409    if (fixed_source == NULL) {
1410        Py_DECREF(pathbytes);
1411        return NULL;
1412    }
1413
1414    code = Py_CompileString(PyBytes_AsString(fixed_source),
1415                            PyBytes_AsString(pathbytes),
1416                            Py_file_input);
1417    Py_DECREF(pathbytes);
1418    Py_DECREF(fixed_source);
1419    return code;
1420}
1421
1422/* Convert the date/time values found in the Zip archive to a value
1423   that's compatible with the time stamp stored in .pyc files. */
1424static time_t
1425parse_dostime(int dostime, int dosdate)
1426{
1427    struct tm stm;
1428
1429    memset((void *) &stm, '\0', sizeof(stm));
1430
1431    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1432    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1433    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1434    stm.tm_mday  =   dosdate        & 0x1f;
1435    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1436    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1437    stm.tm_isdst =   -1; /* wday/yday is ignored */
1438
1439    return mktime(&stm);
1440}
1441
1442/* Given a path to a .pyc or .pyo file in the archive, return the
1443   modification time of the matching .py file, or 0 if no source
1444   is available. */
1445static time_t
1446get_mtime_of_source(ZipImporter *self, PyObject *path)
1447{
1448    PyObject *toc_entry, *stripped;
1449    time_t mtime;
1450
1451    /* strip 'c' or 'o' from *.py[co] */
1452    if (PyUnicode_READY(path) == -1)
1453        return (time_t)-1;
1454    stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1455                                         PyUnicode_DATA(path),
1456                                         PyUnicode_GET_LENGTH(path) - 1);
1457    if (stripped == NULL)
1458        return (time_t)-1;
1459
1460    toc_entry = PyDict_GetItem(self->files, stripped);
1461    Py_DECREF(stripped);
1462    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1463        PyTuple_Size(toc_entry) == 8) {
1464        /* fetch the time stamp of the .py file for comparison
1465           with an embedded pyc time stamp */
1466        int time, date;
1467        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1468        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1469        mtime = parse_dostime(time, date);
1470    } else
1471        mtime = 0;
1472    return mtime;
1473}
1474
1475/* Return the code object for the module named by 'fullname' from the
1476   Zip archive as a new reference. */
1477static PyObject *
1478get_code_from_data(ZipImporter *self, FILE *fp, int ispackage, int isbytecode,
1479                   time_t mtime, PyObject *toc_entry)
1480{
1481    PyObject *data, *modpath, *code;
1482
1483    data = get_data(fp, self->archive, toc_entry);
1484    if (data == NULL)
1485        return NULL;
1486
1487    modpath = PyTuple_GetItem(toc_entry, 0);
1488    if (isbytecode)
1489        code = unmarshal_code(modpath, data, mtime);
1490    else
1491        code = compile_source(modpath, data);
1492    Py_DECREF(data);
1493    return code;
1494}
1495
1496/* Get the code object associated with the module specified by
1497   'fullname'. */
1498static PyObject *
1499get_module_code(ZipImporter *self, PyObject *fullname,
1500                int *p_ispackage, PyObject **p_modpath)
1501{
1502    PyObject *code = NULL, *toc_entry, *subname;
1503    PyObject *path, *fullpath = NULL;
1504    struct st_zip_searchorder *zso;
1505    FILE *fp;
1506
1507    subname = get_subname(fullname);
1508    if (subname == NULL)
1509        return NULL;
1510
1511    path = make_filename(self->prefix, subname);
1512    Py_DECREF(subname);
1513    if (path == NULL)
1514        return NULL;
1515
1516    fp = safely_reopen_archive(self);
1517    if (fp == NULL) {
1518        Py_DECREF(path);
1519        return NULL;
1520    }
1521
1522    for (zso = zip_searchorder; *zso->suffix; zso++) {
1523        code = NULL;
1524
1525        fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1526        if (fullpath == NULL)
1527            goto exit;
1528
1529        if (Py_VerboseFlag > 1)
1530            PySys_FormatStderr("# trying %U%c%U\n",
1531                               self->archive, (int)SEP, fullpath);
1532
1533        toc_entry = PyDict_GetItem(self->files, fullpath);
1534        if (toc_entry != NULL) {
1535            time_t mtime = 0;
1536            int ispackage = zso->type & IS_PACKAGE;
1537            int isbytecode = zso->type & IS_BYTECODE;
1538
1539            if (isbytecode) {
1540                mtime = get_mtime_of_source(self, fullpath);
1541                if (mtime == (time_t)-1 && PyErr_Occurred()) {
1542                    goto exit;
1543                }
1544            }
1545            Py_CLEAR(fullpath);
1546            if (p_ispackage != NULL)
1547                *p_ispackage = ispackage;
1548            code = get_code_from_data(self, fp, ispackage,
1549                                      isbytecode, mtime,
1550                                      toc_entry);
1551            if (code == Py_None) {
1552                /* bad magic number or non-matching mtime
1553                   in byte code, try next */
1554                Py_DECREF(code);
1555                continue;
1556            }
1557            if (code != NULL && p_modpath != NULL) {
1558                *p_modpath = PyTuple_GetItem(toc_entry, 0);
1559                Py_INCREF(*p_modpath);
1560            }
1561            goto exit;
1562        }
1563        else
1564            Py_CLEAR(fullpath);
1565    }
1566    PyErr_Format(ZipImportError, "can't find module %R", fullname);
1567exit:
1568    fclose(fp);
1569    Py_DECREF(path);
1570    Py_XDECREF(fullpath);
1571    return code;
1572}
1573
1574
1575/* Module init */
1576
1577PyDoc_STRVAR(zipimport_doc,
1578"zipimport provides support for importing Python modules from Zip archives.\n\
1579\n\
1580This module exports three objects:\n\
1581- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1582- ZipImportError: exception raised by zipimporter objects. It's a\n\
1583  subclass of ImportError, so it can be caught as ImportError, too.\n\
1584- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1585  info dicts, as used in zipimporter._files.\n\
1586- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\
1587  info for the .zip the last time anything was imported from it.\n\
1588\n\
1589It is usually not needed to use the zipimport module explicitly; it is\n\
1590used by the builtin import mechanism for sys.path items that are paths\n\
1591to Zip archives.");
1592
1593static struct PyModuleDef zipimportmodule = {
1594    PyModuleDef_HEAD_INIT,
1595    "zipimport",
1596    zipimport_doc,
1597    -1,
1598    NULL,
1599    NULL,
1600    NULL,
1601    NULL,
1602    NULL
1603};
1604
1605PyMODINIT_FUNC
1606PyInit_zipimport(void)
1607{
1608    PyObject *mod;
1609
1610    if (PyType_Ready(&ZipImporter_Type) < 0)
1611        return NULL;
1612
1613    /* Correct directory separator */
1614    zip_searchorder[0].suffix[0] = SEP;
1615    zip_searchorder[1].suffix[0] = SEP;
1616    zip_searchorder[2].suffix[0] = SEP;
1617    if (Py_OptimizeFlag) {
1618        /* Reverse *.pyc and *.pyo */
1619        struct st_zip_searchorder tmp;
1620        tmp = zip_searchorder[0];
1621        zip_searchorder[0] = zip_searchorder[1];
1622        zip_searchorder[1] = tmp;
1623        tmp = zip_searchorder[3];
1624        zip_searchorder[3] = zip_searchorder[4];
1625        zip_searchorder[4] = tmp;
1626    }
1627
1628    mod = PyModule_Create(&zipimportmodule);
1629    if (mod == NULL)
1630        return NULL;
1631
1632    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1633                                        PyExc_ImportError, NULL);
1634    if (ZipImportError == NULL)
1635        return NULL;
1636
1637    Py_INCREF(ZipImportError);
1638    if (PyModule_AddObject(mod, "ZipImportError",
1639                           ZipImportError) < 0)
1640        return NULL;
1641
1642    Py_INCREF(&ZipImporter_Type);
1643    if (PyModule_AddObject(mod, "zipimporter",
1644                           (PyObject *)&ZipImporter_Type) < 0)
1645        return NULL;
1646
1647    Py_XDECREF(zip_directory_cache);  /* Avoid embedded interpreter leaks. */
1648    zip_directory_cache = PyDict_New();
1649    if (zip_directory_cache == NULL)
1650        return NULL;
1651    Py_INCREF(zip_directory_cache);
1652    if (PyModule_AddObject(mod, "_zip_directory_cache",
1653                           zip_directory_cache) < 0)
1654        return NULL;
1655
1656    Py_XDECREF(zip_stat_cache);  /* Avoid embedded interpreter leaks. */
1657    zip_stat_cache = PyDict_New();
1658    if (zip_stat_cache == NULL)
1659        return NULL;
1660    Py_INCREF(zip_stat_cache);
1661    if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0)
1662        return NULL;
1663
1664    {
1665        /* We cannot import "os" here as that is a .py/.pyc file that could
1666         * live within a zipped up standard library.  Import the posix or nt
1667         * builtin that provides the fstat() function we want instead. */
1668        PyObject *os_like_module;
1669        Py_CLEAR(fstat_function);  /* Avoid embedded interpreter leaks. */
1670        os_like_module = PyImport_ImportModule("posix");
1671        if (os_like_module == NULL) {
1672            PyErr_Clear();
1673            os_like_module = PyImport_ImportModule("nt");
1674        }
1675        if (os_like_module != NULL) {
1676            fstat_function = PyObject_GetAttrString(os_like_module, "fstat");
1677            Py_DECREF(os_like_module);
1678        }
1679        if (fstat_function == NULL) {
1680            PyErr_Clear();  /* non-fatal, we'll go on without it. */
1681            if (Py_VerboseFlag)
1682                PySys_WriteStderr("# zipimport unable to use os.fstat().\n");
1683        }
1684    }
1685
1686    return mod;
1687}
1688