zipimport.c revision 2460a43a6569fbf240c5a72d0b052565617213eb
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive */
39    PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40    PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(PyObject *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57   Split the "subdirectory" from the Zip archive path, lookup a matching
58   entry in sys.path_importer_cache, fetch the file directory from there
59   if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63    PyObject *pathobj, *files;
64    Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
65    Py_ssize_t len;
66
67    if (!_PyArg_NoKeywords("zipimporter()", kwds))
68        return -1;
69
70    if (!PyArg_ParseTuple(args, "O&:zipimporter",
71        PyUnicode_FSDecoder, &pathobj))
72        return -1;
73
74    /* copy path to buf */
75    len = PyUnicode_GET_SIZE(pathobj);
76    if (len == 0) {
77        PyErr_SetString(ZipImportError, "archive path is empty");
78        goto error;
79    }
80    if (len >= MAXPATHLEN) {
81        PyErr_SetString(ZipImportError,
82                        "archive path too long");
83        goto error;
84    }
85    Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
86
87#ifdef ALTSEP
88    for (p = buf; *p; p++) {
89        if (*p == ALTSEP)
90            *p = SEP;
91    }
92#endif
93
94    path = NULL;
95    prefix = NULL;
96    for (;;) {
97        struct stat statbuf;
98        int rv;
99
100        if (pathobj == NULL) {
101            pathobj = PyUnicode_FromUnicode(buf, len);
102            if (pathobj == NULL)
103                goto error;
104        }
105        rv = _Py_stat(pathobj, &statbuf);
106        if (rv == 0) {
107            /* it exists */
108            if (S_ISREG(statbuf.st_mode))
109                /* it's a file */
110                path = buf;
111            break;
112        }
113        else if (PyErr_Occurred())
114            goto error;
115        /* back up one path element */
116        p = Py_UNICODE_strrchr(buf, SEP);
117        if (prefix != NULL)
118            *prefix = SEP;
119        if (p == NULL)
120            break;
121        *p = '\0';
122        len = p - buf;
123        prefix = p;
124        Py_CLEAR(pathobj);
125    }
126    if (path == NULL) {
127        PyErr_SetString(ZipImportError, "not a Zip file");
128        goto error;
129    }
130
131    files = PyDict_GetItem(zip_directory_cache, pathobj);
132    if (files == NULL) {
133        files = read_directory(pathobj);
134        if (files == NULL)
135            goto error;
136        if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
137            goto error;
138    }
139    else
140        Py_INCREF(files);
141    self->files = files;
142
143    self->archive = pathobj;
144    pathobj = NULL;
145
146    if (prefix != NULL) {
147        prefix++;
148        len = Py_UNICODE_strlen(prefix);
149        if (prefix[len-1] != SEP) {
150            /* add trailing SEP */
151            prefix[len] = SEP;
152            prefix[len + 1] = '\0';
153            len++;
154        }
155    }
156    else
157        len = 0;
158    self->prefix = PyUnicode_FromUnicode(prefix, len);
159    if (self->prefix == NULL)
160        goto error;
161
162    return 0;
163
164error:
165    Py_XDECREF(pathobj);
166    return -1;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173    ZipImporter *self = (ZipImporter *)obj;
174    Py_VISIT(self->files);
175    return 0;
176}
177
178static void
179zipimporter_dealloc(ZipImporter *self)
180{
181    PyObject_GC_UnTrack(self);
182    Py_XDECREF(self->archive);
183    Py_XDECREF(self->prefix);
184    Py_XDECREF(self->files);
185    Py_TYPE(self)->tp_free((PyObject *)self);
186}
187
188static PyObject *
189zipimporter_repr(ZipImporter *self)
190{
191    char *archive = "???";
192    char *prefix = "";
193
194    if (self->archive != NULL && PyUnicode_Check(self->archive))
195        archive = _PyUnicode_AsString(self->archive);
196    if (self->prefix != NULL && PyUnicode_Check(self->prefix))
197        prefix = _PyUnicode_AsString(self->prefix);
198    if (prefix != NULL && *prefix)
199        return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
200                                    archive, SEP, prefix);
201    else
202        return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
203                                    archive);
204}
205
206/* return fullname.split(".")[-1] */
207static char *
208get_subname(char *fullname)
209{
210    char *subname = strrchr(fullname, '.');
211    if (subname == NULL)
212        subname = fullname;
213    else
214        subname++;
215    return subname;
216}
217
218/* Given a (sub)modulename, write the potential file path in the
219   archive (without extension) to the path buffer. Return the
220   length of the resulting string. */
221static int
222make_filename(char *prefix, char *name, char *path)
223{
224    size_t len;
225    char *p;
226
227    len = strlen(prefix);
228
229    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
230    if (len + strlen(name) + 13 >= MAXPATHLEN) {
231        PyErr_SetString(ZipImportError, "path too long");
232        return -1;
233    }
234
235    strcpy(path, prefix);
236    strcpy(path + len, name);
237    for (p = path + len; *p; p++) {
238        if (*p == '.')
239            *p = SEP;
240    }
241    len += strlen(name);
242    assert(len < INT_MAX);
243    return (int)len;
244}
245
246enum zi_module_info {
247    MI_ERROR,
248    MI_NOT_FOUND,
249    MI_MODULE,
250    MI_PACKAGE
251};
252
253/* Return some information about a module. */
254static enum zi_module_info
255get_module_info(ZipImporter *self, char *fullname)
256{
257    char *subname, path[MAXPATHLEN + 1];
258    int len;
259    struct st_zip_searchorder *zso;
260
261    subname = get_subname(fullname);
262
263    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
264    if (len < 0)
265        return MI_ERROR;
266
267    for (zso = zip_searchorder; *zso->suffix; zso++) {
268        strcpy(path + len, zso->suffix);
269        if (PyDict_GetItemString(self->files, path) != NULL) {
270            if (zso->type & IS_PACKAGE)
271                return MI_PACKAGE;
272            else
273                return MI_MODULE;
274        }
275    }
276    return MI_NOT_FOUND;
277}
278
279/* Check whether we can satisfy the import of the module named by
280   'fullname'. Return self if we can, None if we can't. */
281static PyObject *
282zipimporter_find_module(PyObject *obj, PyObject *args)
283{
284    ZipImporter *self = (ZipImporter *)obj;
285    PyObject *path = NULL;
286    char *fullname;
287    enum zi_module_info mi;
288
289    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
290                          &fullname, &path))
291        return NULL;
292
293    mi = get_module_info(self, fullname);
294    if (mi == MI_ERROR)
295        return NULL;
296    if (mi == MI_NOT_FOUND) {
297        Py_INCREF(Py_None);
298        return Py_None;
299    }
300    Py_INCREF(self);
301    return (PyObject *)self;
302}
303
304/* Load and return the module named by 'fullname'. */
305static PyObject *
306zipimporter_load_module(PyObject *obj, PyObject *args)
307{
308    ZipImporter *self = (ZipImporter *)obj;
309    PyObject *code, *mod, *dict;
310    char *fullname, *modpath;
311    int ispackage;
312
313    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
314                          &fullname))
315        return NULL;
316
317    code = get_module_code(self, fullname, &ispackage, &modpath);
318    if (code == NULL)
319        return NULL;
320
321    mod = PyImport_AddModule(fullname);
322    if (mod == NULL) {
323        Py_DECREF(code);
324        return NULL;
325    }
326    dict = PyModule_GetDict(mod);
327
328    /* mod.__loader__ = self */
329    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
330        goto error;
331
332    if (ispackage) {
333        /* add __path__ to the module *before* the code gets
334           executed */
335        PyObject *pkgpath, *fullpath;
336        char *subname = get_subname(fullname);
337        int err;
338
339        fullpath = PyUnicode_FromFormat("%U%c%U%s",
340                                self->archive, SEP,
341                                self->prefix, subname);
342        if (fullpath == NULL)
343            goto error;
344
345        pkgpath = Py_BuildValue("[O]", fullpath);
346        Py_DECREF(fullpath);
347        if (pkgpath == NULL)
348            goto error;
349        err = PyDict_SetItemString(dict, "__path__", pkgpath);
350        Py_DECREF(pkgpath);
351        if (err != 0)
352            goto error;
353    }
354    mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355    Py_DECREF(code);
356    if (Py_VerboseFlag)
357        PySys_WriteStderr("import %s # loaded from Zip %s\n",
358                          fullname, modpath);
359    return mod;
360error:
361    Py_DECREF(code);
362    Py_DECREF(mod);
363    return NULL;
364}
365
366/* Return a string matching __file__ for the named module */
367static PyObject *
368zipimporter_get_filename(PyObject *obj, PyObject *args)
369{
370    ZipImporter *self = (ZipImporter *)obj;
371    PyObject *code;
372    char *fullname, *modpath;
373    int ispackage;
374
375    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
376                         &fullname))
377    return NULL;
378
379    /* Deciding the filename requires working out where the code
380       would come from if the module was actually loaded */
381    code = get_module_code(self, fullname, &ispackage, &modpath);
382    if (code == NULL)
383    return NULL;
384    Py_DECREF(code); /* Only need the path info */
385
386    return PyUnicode_FromString(modpath);
387}
388
389/* Return a bool signifying whether the module is a package or not. */
390static PyObject *
391zipimporter_is_package(PyObject *obj, PyObject *args)
392{
393    ZipImporter *self = (ZipImporter *)obj;
394    char *fullname;
395    enum zi_module_info mi;
396
397    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
398                          &fullname))
399        return NULL;
400
401    mi = get_module_info(self, fullname);
402    if (mi == MI_ERROR)
403        return NULL;
404    if (mi == MI_NOT_FOUND) {
405        PyErr_Format(ZipImportError, "can't find module '%.200s'",
406                     fullname);
407        return NULL;
408    }
409    return PyBool_FromLong(mi == MI_PACKAGE);
410}
411
412static PyObject *
413zipimporter_get_data(PyObject *obj, PyObject *args)
414{
415    ZipImporter *self = (ZipImporter *)obj;
416    char *path;
417#ifdef ALTSEP
418    char *p, buf[MAXPATHLEN + 1];
419#endif
420    PyObject *toc_entry;
421    Py_ssize_t len;
422    char *archive_str;
423
424    if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
425        return NULL;
426
427#ifdef ALTSEP
428    if (strlen(path) >= MAXPATHLEN) {
429        PyErr_SetString(ZipImportError, "path too long");
430        return NULL;
431    }
432    strcpy(buf, path);
433    for (p = buf; *p; p++) {
434        if (*p == ALTSEP)
435            *p = SEP;
436    }
437    path = buf;
438#endif
439    archive_str = _PyUnicode_AsStringAndSize(self->archive, &len);
440    if ((size_t)len < strlen(path) &&
441        strncmp(path, archive_str, len) == 0 &&
442        path[len] == SEP) {
443        path = path + len + 1;
444    }
445
446    toc_entry = PyDict_GetItemString(self->files, path);
447    if (toc_entry == NULL) {
448        PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
449        return NULL;
450    }
451    return get_data(archive_str, toc_entry);
452}
453
454static PyObject *
455zipimporter_get_code(PyObject *obj, PyObject *args)
456{
457    ZipImporter *self = (ZipImporter *)obj;
458    char *fullname;
459
460    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
461        return NULL;
462
463    return get_module_code(self, fullname, NULL, NULL);
464}
465
466static PyObject *
467zipimporter_get_source(PyObject *obj, PyObject *args)
468{
469    ZipImporter *self = (ZipImporter *)obj;
470    PyObject *toc_entry;
471    char *fullname, *subname, path[MAXPATHLEN+1];
472    int len;
473    enum zi_module_info mi;
474
475    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
476        return NULL;
477
478    mi = get_module_info(self, fullname);
479    if (mi == MI_ERROR)
480        return NULL;
481    if (mi == MI_NOT_FOUND) {
482        PyErr_Format(ZipImportError, "can't find module '%.200s'",
483                     fullname);
484        return NULL;
485    }
486    subname = get_subname(fullname);
487
488    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
489    if (len < 0)
490        return NULL;
491
492    if (mi == MI_PACKAGE) {
493        path[len] = SEP;
494        strcpy(path + len + 1, "__init__.py");
495    }
496    else
497        strcpy(path + len, ".py");
498
499    toc_entry = PyDict_GetItemString(self->files, path);
500    if (toc_entry != NULL) {
501        PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry);
502        PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
503        Py_XDECREF(bytes);
504        return res;
505    }
506
507    /* we have the module, but no source */
508    Py_INCREF(Py_None);
509    return Py_None;
510}
511
512PyDoc_STRVAR(doc_find_module,
513"find_module(fullname, path=None) -> self or None.\n\
514\n\
515Search for a module specified by 'fullname'. 'fullname' must be the\n\
516fully qualified (dotted) module name. It returns the zipimporter\n\
517instance itself if the module was found, or None if it wasn't.\n\
518The optional 'path' argument is ignored -- it's there for compatibility\n\
519with the importer protocol.");
520
521PyDoc_STRVAR(doc_load_module,
522"load_module(fullname) -> module.\n\
523\n\
524Load the module specified by 'fullname'. 'fullname' must be the\n\
525fully qualified (dotted) module name. It returns the imported\n\
526module, or raises ZipImportError if it wasn't found.");
527
528PyDoc_STRVAR(doc_get_data,
529"get_data(pathname) -> string with file data.\n\
530\n\
531Return the data associated with 'pathname'. Raise IOError if\n\
532the file wasn't found.");
533
534PyDoc_STRVAR(doc_is_package,
535"is_package(fullname) -> bool.\n\
536\n\
537Return True if the module specified by fullname is a package.\n\
538Raise ZipImportError if the module couldn't be found.");
539
540PyDoc_STRVAR(doc_get_code,
541"get_code(fullname) -> code object.\n\
542\n\
543Return the code object for the specified module. Raise ZipImportError\n\
544if the module couldn't be found.");
545
546PyDoc_STRVAR(doc_get_source,
547"get_source(fullname) -> source string.\n\
548\n\
549Return the source code for the specified module. Raise ZipImportError\n\
550if the module couldn't be found, return None if the archive does\n\
551contain the module, but has no source for it.");
552
553
554PyDoc_STRVAR(doc_get_filename,
555"get_filename(fullname) -> filename string.\n\
556\n\
557Return the filename for the specified module.");
558
559static PyMethodDef zipimporter_methods[] = {
560    {"find_module", zipimporter_find_module, METH_VARARGS,
561     doc_find_module},
562    {"load_module", zipimporter_load_module, METH_VARARGS,
563     doc_load_module},
564    {"get_data", zipimporter_get_data, METH_VARARGS,
565     doc_get_data},
566    {"get_code", zipimporter_get_code, METH_VARARGS,
567     doc_get_code},
568    {"get_source", zipimporter_get_source, METH_VARARGS,
569     doc_get_source},
570    {"get_filename", zipimporter_get_filename, METH_VARARGS,
571     doc_get_filename},
572    {"is_package", zipimporter_is_package, METH_VARARGS,
573     doc_is_package},
574    {NULL,              NULL}   /* sentinel */
575};
576
577static PyMemberDef zipimporter_members[] = {
578    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
579    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
580    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
581    {NULL}
582};
583
584PyDoc_STRVAR(zipimporter_doc,
585"zipimporter(archivepath) -> zipimporter object\n\
586\n\
587Create a new zipimporter instance. 'archivepath' must be a path to\n\
588a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
589'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
590valid directory inside the archive.\n\
591\n\
592'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
593archive.\n\
594\n\
595The 'archive' attribute of zipimporter objects contains the name of the\n\
596zipfile targeted.");
597
598#define DEFERRED_ADDRESS(ADDR) 0
599
600static PyTypeObject ZipImporter_Type = {
601    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
602    "zipimport.zipimporter",
603    sizeof(ZipImporter),
604    0,                                          /* tp_itemsize */
605    (destructor)zipimporter_dealloc,            /* tp_dealloc */
606    0,                                          /* tp_print */
607    0,                                          /* tp_getattr */
608    0,                                          /* tp_setattr */
609    0,                                          /* tp_reserved */
610    (reprfunc)zipimporter_repr,                 /* tp_repr */
611    0,                                          /* tp_as_number */
612    0,                                          /* tp_as_sequence */
613    0,                                          /* tp_as_mapping */
614    0,                                          /* tp_hash */
615    0,                                          /* tp_call */
616    0,                                          /* tp_str */
617    PyObject_GenericGetAttr,                    /* tp_getattro */
618    0,                                          /* tp_setattro */
619    0,                                          /* tp_as_buffer */
620    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
621        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
622    zipimporter_doc,                            /* tp_doc */
623    zipimporter_traverse,                       /* tp_traverse */
624    0,                                          /* tp_clear */
625    0,                                          /* tp_richcompare */
626    0,                                          /* tp_weaklistoffset */
627    0,                                          /* tp_iter */
628    0,                                          /* tp_iternext */
629    zipimporter_methods,                        /* tp_methods */
630    zipimporter_members,                        /* tp_members */
631    0,                                          /* tp_getset */
632    0,                                          /* tp_base */
633    0,                                          /* tp_dict */
634    0,                                          /* tp_descr_get */
635    0,                                          /* tp_descr_set */
636    0,                                          /* tp_dictoffset */
637    (initproc)zipimporter_init,                 /* tp_init */
638    PyType_GenericAlloc,                        /* tp_alloc */
639    PyType_GenericNew,                          /* tp_new */
640    PyObject_GC_Del,                            /* tp_free */
641};
642
643
644/* implementation */
645
646/* Given a buffer, return the long that is represented by the first
647   4 bytes, encoded as little endian. This partially reimplements
648   marshal.c:r_long() */
649static long
650get_long(unsigned char *buf) {
651    long x;
652    x =  buf[0];
653    x |= (long)buf[1] <<  8;
654    x |= (long)buf[2] << 16;
655    x |= (long)buf[3] << 24;
656#if SIZEOF_LONG > 4
657    /* Sign extension for 64-bit machines */
658    x |= -(x & 0x80000000L);
659#endif
660    return x;
661}
662
663/*
664   read_directory(archive) -> files dict (new reference)
665
666   Given a path to a Zip archive, build a dict, mapping file names
667   (local to the archive, using SEP as a separator) to toc entries.
668
669   A toc_entry is a tuple:
670
671       (__file__,      # value to use for __file__, available for all files
672    compress,      # compression kind; 0 for uncompressed
673    data_size,     # size of compressed data on disk
674    file_size,     # size of decompressed data
675    file_offset,   # offset of file header from start of archive
676    time,          # mod time of file (in dos format)
677    date,          # mod data of file (in dos format)
678    crc,           # crc checksum of the data
679       )
680
681   Directories can be recognized by the trailing SEP in the name,
682   data_size and file_offset are 0.
683*/
684static PyObject *
685read_directory(PyObject *archive_obj)
686{
687    /* FIXME: work on Py_UNICODE* instead of char* */
688    PyObject *files = NULL;
689    FILE *fp;
690    long compress, crc, data_size, file_size, file_offset, date, time;
691    long header_offset, name_size, header_size, header_position;
692    long i, l, count;
693    size_t length;
694    Py_UNICODE path[MAXPATHLEN + 5];
695    char name[MAXPATHLEN + 5];
696    PyObject *nameobj = NULL;
697    char *p, endof_central_dir[22];
698    long arc_offset; /* offset from beginning of file to start of zip-archive */
699    PyObject *pathobj;
700
701    if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
702        PyErr_SetString(PyExc_OverflowError,
703                        "Zip path name is too long");
704        return NULL;
705    }
706    Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
707
708    fp = _Py_fopen(archive_obj, "rb");
709    if (fp == NULL) {
710        PyErr_Format(ZipImportError, "can't open Zip file: "
711                     "'%.200U'", archive_obj);
712        return NULL;
713    }
714    fseek(fp, -22, SEEK_END);
715    header_position = ftell(fp);
716    if (fread(endof_central_dir, 1, 22, fp) != 22) {
717        fclose(fp);
718        PyErr_Format(ZipImportError, "can't read Zip file: "
719                     "'%.200U'", archive_obj);
720        return NULL;
721    }
722    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
723        /* Bad: End of Central Dir signature */
724        fclose(fp);
725        PyErr_Format(ZipImportError, "not a Zip file: "
726                     "'%.200U'", archive_obj);
727        return NULL;
728    }
729
730    header_size = get_long((unsigned char *)endof_central_dir + 12);
731    header_offset = get_long((unsigned char *)endof_central_dir + 16);
732    arc_offset = header_position - header_offset - header_size;
733    header_offset += arc_offset;
734
735    files = PyDict_New();
736    if (files == NULL)
737        goto error;
738
739    length = Py_UNICODE_strlen(path);
740    path[length] = SEP;
741
742    /* Start of Central Directory */
743    count = 0;
744    for (;;) {
745        PyObject *t;
746        int err;
747
748        fseek(fp, header_offset, 0);  /* Start of file header */
749        l = PyMarshal_ReadLongFromFile(fp);
750        if (l != 0x02014B50)
751            break;              /* Bad: Central Dir File Header */
752        fseek(fp, header_offset + 10, 0);
753        compress = PyMarshal_ReadShortFromFile(fp);
754        time = PyMarshal_ReadShortFromFile(fp);
755        date = PyMarshal_ReadShortFromFile(fp);
756        crc = PyMarshal_ReadLongFromFile(fp);
757        data_size = PyMarshal_ReadLongFromFile(fp);
758        file_size = PyMarshal_ReadLongFromFile(fp);
759        name_size = PyMarshal_ReadShortFromFile(fp);
760        header_size = 46 + name_size +
761           PyMarshal_ReadShortFromFile(fp) +
762           PyMarshal_ReadShortFromFile(fp);
763        fseek(fp, header_offset + 42, 0);
764        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
765        if (name_size > MAXPATHLEN)
766            name_size = MAXPATHLEN;
767
768        p = name;
769        for (i = 0; i < name_size; i++) {
770            *p = (char)getc(fp);
771            if (*p == '/')
772                *p = SEP;
773            p++;
774        }
775        *p = 0;         /* Add terminating null byte */
776        header_offset += header_size;
777
778        nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size);
779        if (nameobj == NULL)
780            goto error;
781        Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
782
783        pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
784        if (pathobj == NULL)
785            goto error;
786        t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
787                          file_size, file_offset, time, date, crc);
788        if (t == NULL)
789            goto error;
790        err = PyDict_SetItem(files, nameobj, t);
791        Py_CLEAR(nameobj);
792        Py_DECREF(t);
793        if (err != 0)
794            goto error;
795        count++;
796    }
797    fclose(fp);
798    if (Py_VerboseFlag)
799        PySys_FormatStderr("# zipimport: found %ld names in %U\n",
800            count, archive_obj);
801    return files;
802error:
803    fclose(fp);
804    Py_XDECREF(files);
805    Py_XDECREF(nameobj);
806    return NULL;
807}
808
809/* Return the zlib.decompress function object, or NULL if zlib couldn't
810   be imported. The function is cached when found, so subsequent calls
811   don't import zlib again. Returns a *borrowed* reference.
812   XXX This makes zlib.decompress immortal. */
813static PyObject *
814get_decompress_func(void)
815{
816    static PyObject *decompress = NULL;
817
818    if (decompress == NULL) {
819        PyObject *zlib;
820        static int importing_zlib = 0;
821
822        if (importing_zlib != 0)
823            /* Someone has a zlib.py[co] in their Zip file;
824               let's avoid a stack overflow. */
825            return NULL;
826        importing_zlib = 1;
827        zlib = PyImport_ImportModuleNoBlock("zlib");
828        importing_zlib = 0;
829        if (zlib != NULL) {
830            decompress = PyObject_GetAttrString(zlib,
831                                                "decompress");
832            Py_DECREF(zlib);
833        }
834        else
835            PyErr_Clear();
836        if (Py_VerboseFlag)
837            PySys_WriteStderr("# zipimport: zlib %s\n",
838                zlib != NULL ? "available": "UNAVAILABLE");
839    }
840    return decompress;
841}
842
843/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
844   data as a new reference. */
845static PyObject *
846get_data(char *archive, PyObject *toc_entry)
847{
848    PyObject *raw_data, *data = NULL, *decompress;
849    char *buf;
850    FILE *fp;
851    int err;
852    Py_ssize_t bytes_read = 0;
853    long l;
854    char *datapath;
855    long compress, data_size, file_size, file_offset, bytes_size;
856    long time, date, crc;
857
858    if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
859                          &data_size, &file_size, &file_offset, &time,
860                          &date, &crc)) {
861        return NULL;
862    }
863
864    fp = fopen(archive, "rb");
865    if (!fp) {
866        PyErr_Format(PyExc_IOError,
867           "zipimport: can not open file %s", archive);
868        return NULL;
869    }
870
871    /* Check to make sure the local file header is correct */
872    fseek(fp, file_offset, 0);
873    l = PyMarshal_ReadLongFromFile(fp);
874    if (l != 0x04034B50) {
875        /* Bad: Local File Header */
876        PyErr_Format(ZipImportError,
877                     "bad local file header in %s",
878                     archive);
879        fclose(fp);
880        return NULL;
881    }
882    fseek(fp, file_offset + 26, 0);
883    l = 30 + PyMarshal_ReadShortFromFile(fp) +
884        PyMarshal_ReadShortFromFile(fp);        /* local header size */
885    file_offset += l;           /* Start of file data */
886
887    bytes_size = compress == 0 ? data_size : data_size + 1;
888    if (bytes_size == 0)
889        bytes_size++;
890    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
891
892    if (raw_data == NULL) {
893        fclose(fp);
894        return NULL;
895    }
896    buf = PyBytes_AsString(raw_data);
897
898    err = fseek(fp, file_offset, 0);
899    if (err == 0)
900        bytes_read = fread(buf, 1, data_size, fp);
901    fclose(fp);
902    if (err || bytes_read != data_size) {
903        PyErr_SetString(PyExc_IOError,
904                        "zipimport: can't read data");
905        Py_DECREF(raw_data);
906        return NULL;
907    }
908
909    if (compress != 0) {
910        buf[data_size] = 'Z';  /* saw this in zipfile.py */
911        data_size++;
912    }
913    buf[data_size] = '\0';
914
915    if (compress == 0) {  /* data is not compressed */
916        data = PyBytes_FromStringAndSize(buf, data_size);
917        Py_DECREF(raw_data);
918        return data;
919    }
920
921    /* Decompress with zlib */
922    decompress = get_decompress_func();
923    if (decompress == NULL) {
924        PyErr_SetString(ZipImportError,
925                        "can't decompress data; "
926                        "zlib not available");
927        goto error;
928    }
929    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
930error:
931    Py_DECREF(raw_data);
932    return data;
933}
934
935/* Lenient date/time comparison function. The precision of the mtime
936   in the archive is lower than the mtime stored in a .pyc: we
937   must allow a difference of at most one second. */
938static int
939eq_mtime(time_t t1, time_t t2)
940{
941    time_t d = t1 - t2;
942    if (d < 0)
943        d = -d;
944    /* dostime only stores even seconds, so be lenient */
945    return d <= 1;
946}
947
948/* Given the contents of a .py[co] file in a buffer, unmarshal the data
949   and return the code object. Return None if it the magic word doesn't
950   match (we do this instead of raising an exception as we fall back
951   to .py if available and we don't want to mask other errors).
952   Returns a new reference. */
953static PyObject *
954unmarshal_code(char *pathname, PyObject *data, time_t mtime)
955{
956    PyObject *code;
957    char *buf = PyBytes_AsString(data);
958    Py_ssize_t size = PyBytes_Size(data);
959
960    if (size <= 9) {
961        PyErr_SetString(ZipImportError,
962                        "bad pyc data");
963        return NULL;
964    }
965
966    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
967        if (Py_VerboseFlag)
968            PySys_WriteStderr("# %s has bad magic\n",
969                              pathname);
970        Py_INCREF(Py_None);
971        return Py_None;  /* signal caller to try alternative */
972    }
973
974    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
975                                mtime)) {
976        if (Py_VerboseFlag)
977            PySys_WriteStderr("# %s has bad mtime\n",
978                              pathname);
979        Py_INCREF(Py_None);
980        return Py_None;  /* signal caller to try alternative */
981    }
982
983    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
984    if (code == NULL)
985        return NULL;
986    if (!PyCode_Check(code)) {
987        Py_DECREF(code);
988        PyErr_Format(PyExc_TypeError,
989             "compiled module %.200s is not a code object",
990             pathname);
991        return NULL;
992    }
993    return code;
994}
995
996/* Replace any occurances of "\r\n?" in the input string with "\n".
997   This converts DOS and Mac line endings to Unix line endings.
998   Also append a trailing "\n" to be compatible with
999   PyParser_SimpleParseFile(). Returns a new reference. */
1000static PyObject *
1001normalize_line_endings(PyObject *source)
1002{
1003    char *buf, *q, *p = PyBytes_AsString(source);
1004    PyObject *fixed_source;
1005    int len = 0;
1006
1007    if (!p) {
1008        return PyBytes_FromStringAndSize("\n\0", 2);
1009    }
1010
1011    /* one char extra for trailing \n and one for terminating \0 */
1012    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1013    if (buf == NULL) {
1014        PyErr_SetString(PyExc_MemoryError,
1015                        "zipimport: no memory to allocate "
1016                        "source buffer");
1017        return NULL;
1018    }
1019    /* replace "\r\n?" by "\n" */
1020    for (q = buf; *p != '\0'; p++) {
1021        if (*p == '\r') {
1022            *q++ = '\n';
1023            if (*(p + 1) == '\n')
1024                p++;
1025        }
1026        else
1027            *q++ = *p;
1028        len++;
1029    }
1030    *q++ = '\n';  /* add trailing \n */
1031    *q = '\0';
1032    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1033    PyMem_Free(buf);
1034    return fixed_source;
1035}
1036
1037/* Given a string buffer containing Python source code, compile it
1038   return and return a code object as a new reference. */
1039static PyObject *
1040compile_source(char *pathname, PyObject *source)
1041{
1042    PyObject *code, *fixed_source;
1043
1044    fixed_source = normalize_line_endings(source);
1045    if (fixed_source == NULL)
1046        return NULL;
1047
1048    code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1049                            Py_file_input);
1050    Py_DECREF(fixed_source);
1051    return code;
1052}
1053
1054/* Convert the date/time values found in the Zip archive to a value
1055   that's compatible with the time stamp stored in .pyc files. */
1056static time_t
1057parse_dostime(int dostime, int dosdate)
1058{
1059    struct tm stm;
1060
1061    memset((void *) &stm, '\0', sizeof(stm));
1062
1063    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1064    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1065    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1066    stm.tm_mday  =   dosdate        & 0x1f;
1067    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1068    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1069    stm.tm_isdst =   -1; /* wday/yday is ignored */
1070
1071    return mktime(&stm);
1072}
1073
1074/* Given a path to a .pyc or .pyo file in the archive, return the
1075   modifictaion time of the matching .py file, or 0 if no source
1076   is available. */
1077static time_t
1078get_mtime_of_source(ZipImporter *self, char *path)
1079{
1080    PyObject *toc_entry;
1081    time_t mtime = 0;
1082    Py_ssize_t lastchar = strlen(path) - 1;
1083    char savechar = path[lastchar];
1084    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1085    toc_entry = PyDict_GetItemString(self->files, path);
1086    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1087        PyTuple_Size(toc_entry) == 8) {
1088        /* fetch the time stamp of the .py file for comparison
1089           with an embedded pyc time stamp */
1090        int time, date;
1091        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1092        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1093        mtime = parse_dostime(time, date);
1094    }
1095    path[lastchar] = savechar;
1096    return mtime;
1097}
1098
1099/* Return the code object for the module named by 'fullname' from the
1100   Zip archive as a new reference. */
1101static PyObject *
1102get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1103                   time_t mtime, PyObject *toc_entry)
1104{
1105    PyObject *data, *code;
1106    char *modpath;
1107    char *archive = _PyUnicode_AsString(self->archive);
1108
1109    if (archive == NULL)
1110        return NULL;
1111
1112    data = get_data(archive, toc_entry);
1113    if (data == NULL)
1114        return NULL;
1115
1116    modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
1117
1118    if (isbytecode) {
1119        code = unmarshal_code(modpath, data, mtime);
1120    }
1121    else {
1122        code = compile_source(modpath, data);
1123    }
1124    Py_DECREF(data);
1125    return code;
1126}
1127
1128/* Get the code object assoiciated with the module specified by
1129   'fullname'. */
1130static PyObject *
1131get_module_code(ZipImporter *self, char *fullname,
1132                int *p_ispackage, char **p_modpath)
1133{
1134    PyObject *toc_entry;
1135    char *subname, path[MAXPATHLEN + 1];
1136    int len;
1137    struct st_zip_searchorder *zso;
1138
1139    subname = get_subname(fullname);
1140
1141    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1142    if (len < 0)
1143        return NULL;
1144
1145    for (zso = zip_searchorder; *zso->suffix; zso++) {
1146        PyObject *code = NULL;
1147
1148        strcpy(path + len, zso->suffix);
1149        if (Py_VerboseFlag > 1)
1150            PySys_WriteStderr("# trying %s%c%s\n",
1151                              _PyUnicode_AsString(self->archive),
1152                              (int)SEP, path);
1153        toc_entry = PyDict_GetItemString(self->files, path);
1154        if (toc_entry != NULL) {
1155            time_t mtime = 0;
1156            int ispackage = zso->type & IS_PACKAGE;
1157            int isbytecode = zso->type & IS_BYTECODE;
1158
1159            if (isbytecode)
1160                mtime = get_mtime_of_source(self, path);
1161            if (p_ispackage != NULL)
1162                *p_ispackage = ispackage;
1163            code = get_code_from_data(self, ispackage,
1164                                      isbytecode, mtime,
1165                                      toc_entry);
1166            if (code == Py_None) {
1167                /* bad magic number or non-matching mtime
1168                   in byte code, try next */
1169                Py_DECREF(code);
1170                continue;
1171            }
1172            if (code != NULL && p_modpath != NULL)
1173                *p_modpath = _PyUnicode_AsString(
1174                    PyTuple_GetItem(toc_entry, 0));
1175            return code;
1176        }
1177    }
1178    PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1179    return NULL;
1180}
1181
1182
1183/* Module init */
1184
1185PyDoc_STRVAR(zipimport_doc,
1186"zipimport provides support for importing Python modules from Zip archives.\n\
1187\n\
1188This module exports three objects:\n\
1189- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1190- ZipImportError: exception raised by zipimporter objects. It's a\n\
1191  subclass of ImportError, so it can be caught as ImportError, too.\n\
1192- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1193  info dicts, as used in zipimporter._files.\n\
1194\n\
1195It is usually not needed to use the zipimport module explicitly; it is\n\
1196used by the builtin import mechanism for sys.path items that are paths\n\
1197to Zip archives.");
1198
1199static struct PyModuleDef zipimportmodule = {
1200    PyModuleDef_HEAD_INIT,
1201    "zipimport",
1202    zipimport_doc,
1203    -1,
1204    NULL,
1205    NULL,
1206    NULL,
1207    NULL,
1208    NULL
1209};
1210
1211PyMODINIT_FUNC
1212PyInit_zipimport(void)
1213{
1214    PyObject *mod;
1215
1216    if (PyType_Ready(&ZipImporter_Type) < 0)
1217        return NULL;
1218
1219    /* Correct directory separator */
1220    zip_searchorder[0].suffix[0] = SEP;
1221    zip_searchorder[1].suffix[0] = SEP;
1222    zip_searchorder[2].suffix[0] = SEP;
1223    if (Py_OptimizeFlag) {
1224        /* Reverse *.pyc and *.pyo */
1225        struct st_zip_searchorder tmp;
1226        tmp = zip_searchorder[0];
1227        zip_searchorder[0] = zip_searchorder[1];
1228        zip_searchorder[1] = tmp;
1229        tmp = zip_searchorder[3];
1230        zip_searchorder[3] = zip_searchorder[4];
1231        zip_searchorder[4] = tmp;
1232    }
1233
1234    mod = PyModule_Create(&zipimportmodule);
1235    if (mod == NULL)
1236        return NULL;
1237
1238    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1239                                        PyExc_ImportError, NULL);
1240    if (ZipImportError == NULL)
1241        return NULL;
1242
1243    Py_INCREF(ZipImportError);
1244    if (PyModule_AddObject(mod, "ZipImportError",
1245                           ZipImportError) < 0)
1246        return NULL;
1247
1248    Py_INCREF(&ZipImporter_Type);
1249    if (PyModule_AddObject(mod, "zipimporter",
1250                           (PyObject *)&ZipImporter_Type) < 0)
1251        return NULL;
1252
1253    zip_directory_cache = PyDict_New();
1254    if (zip_directory_cache == NULL)
1255        return NULL;
1256    Py_INCREF(zip_directory_cache);
1257    if (PyModule_AddObject(mod, "_zip_directory_cache",
1258                           zip_directory_cache) < 0)
1259        return NULL;
1260    return mod;
1261}
1262