zipimport.c revision 13925008dc11f2a235627dc8c0440c0ce99171d9
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive */
39    PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40    PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57   Split the "subdirectory" from the Zip archive path, lookup a matching
58   entry in sys.path_importer_cache, fetch the file directory from there
59   if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63    char *path, *p, *prefix, buf[MAXPATHLEN+2];
64    size_t len;
65
66    if (!_PyArg_NoKeywords("zipimporter()", kwds))
67        return -1;
68
69    if (!PyArg_ParseTuple(args, "s:zipimporter", &path))
70        return -1;
71
72    len = strlen(path);
73    if (len == 0) {
74        PyErr_SetString(ZipImportError, "archive path is empty");
75        return -1;
76    }
77    if (len >= MAXPATHLEN) {
78        PyErr_SetString(ZipImportError,
79                        "archive path too long");
80        return -1;
81    }
82    strcpy(buf, path);
83
84#ifdef ALTSEP
85    for (p = buf; *p; p++) {
86        if (*p == ALTSEP)
87            *p = SEP;
88    }
89#endif
90
91    path = NULL;
92    prefix = NULL;
93    for (;;) {
94        struct stat statbuf;
95        int rv;
96
97        rv = stat(buf, &statbuf);
98        if (rv == 0) {
99            /* it exists */
100            if (S_ISREG(statbuf.st_mode))
101                /* it's a file */
102                path = buf;
103            break;
104        }
105        /* back up one path element */
106        p = strrchr(buf, SEP);
107        if (prefix != NULL)
108            *prefix = SEP;
109        if (p == NULL)
110            break;
111        *p = '\0';
112        prefix = p;
113    }
114    if (path != NULL) {
115        PyObject *files;
116        files = PyDict_GetItemString(zip_directory_cache, path);
117        if (files == NULL) {
118            files = read_directory(buf);
119            if (files == NULL)
120                return -1;
121            if (PyDict_SetItemString(zip_directory_cache, path,
122                                     files) != 0)
123                return -1;
124        }
125        else
126            Py_INCREF(files);
127        self->files = files;
128    }
129    else {
130        PyErr_SetString(ZipImportError, "not a Zip file");
131        return -1;
132    }
133
134    if (prefix == NULL)
135        prefix = "";
136    else {
137        prefix++;
138        len = strlen(prefix);
139        if (prefix[len-1] != SEP) {
140            /* add trailing SEP */
141            prefix[len] = SEP;
142            prefix[len + 1] = '\0';
143        }
144    }
145
146    self->archive = PyUnicode_FromString(buf);
147    if (self->archive == NULL)
148        return -1;
149
150    self->prefix = PyUnicode_FromString(prefix);
151    if (self->prefix == NULL)
152        return -1;
153
154    return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161    ZipImporter *self = (ZipImporter *)obj;
162    Py_VISIT(self->files);
163    return 0;
164}
165
166static void
167zipimporter_dealloc(ZipImporter *self)
168{
169    PyObject_GC_UnTrack(self);
170    Py_XDECREF(self->archive);
171    Py_XDECREF(self->prefix);
172    Py_XDECREF(self->files);
173    Py_TYPE(self)->tp_free((PyObject *)self);
174}
175
176static PyObject *
177zipimporter_repr(ZipImporter *self)
178{
179    char *archive = "???";
180    char *prefix = "";
181
182    if (self->archive != NULL && PyUnicode_Check(self->archive))
183        archive = _PyUnicode_AsString(self->archive);
184    if (self->prefix != NULL && PyUnicode_Check(self->prefix))
185        prefix = _PyUnicode_AsString(self->prefix);
186    if (prefix != NULL && *prefix)
187        return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
188                                    archive, SEP, prefix);
189    else
190        return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
191                                    archive);
192}
193
194/* return fullname.split(".")[-1] */
195static char *
196get_subname(char *fullname)
197{
198    char *subname = strrchr(fullname, '.');
199    if (subname == NULL)
200        subname = fullname;
201    else
202        subname++;
203    return subname;
204}
205
206/* Given a (sub)modulename, write the potential file path in the
207   archive (without extension) to the path buffer. Return the
208   length of the resulting string. */
209static int
210make_filename(char *prefix, char *name, char *path)
211{
212    size_t len;
213    char *p;
214
215    len = strlen(prefix);
216
217    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
218    if (len + strlen(name) + 13 >= MAXPATHLEN) {
219        PyErr_SetString(ZipImportError, "path too long");
220        return -1;
221    }
222
223    strcpy(path, prefix);
224    strcpy(path + len, name);
225    for (p = path + len; *p; p++) {
226        if (*p == '.')
227            *p = SEP;
228    }
229    len += strlen(name);
230    assert(len < INT_MAX);
231    return (int)len;
232}
233
234enum zi_module_info {
235    MI_ERROR,
236    MI_NOT_FOUND,
237    MI_MODULE,
238    MI_PACKAGE
239};
240
241/* Return some information about a module. */
242static enum zi_module_info
243get_module_info(ZipImporter *self, char *fullname)
244{
245    char *subname, path[MAXPATHLEN + 1];
246    int len;
247    struct st_zip_searchorder *zso;
248
249    subname = get_subname(fullname);
250
251    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
252    if (len < 0)
253        return MI_ERROR;
254
255    for (zso = zip_searchorder; *zso->suffix; zso++) {
256        strcpy(path + len, zso->suffix);
257        if (PyDict_GetItemString(self->files, path) != NULL) {
258            if (zso->type & IS_PACKAGE)
259                return MI_PACKAGE;
260            else
261                return MI_MODULE;
262        }
263    }
264    return MI_NOT_FOUND;
265}
266
267/* Check whether we can satisfy the import of the module named by
268   'fullname'. Return self if we can, None if we can't. */
269static PyObject *
270zipimporter_find_module(PyObject *obj, PyObject *args)
271{
272    ZipImporter *self = (ZipImporter *)obj;
273    PyObject *path = NULL;
274    char *fullname;
275    enum zi_module_info mi;
276
277    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
278                          &fullname, &path))
279        return NULL;
280
281    mi = get_module_info(self, fullname);
282    if (mi == MI_ERROR)
283        return NULL;
284    if (mi == MI_NOT_FOUND) {
285        Py_INCREF(Py_None);
286        return Py_None;
287    }
288    Py_INCREF(self);
289    return (PyObject *)self;
290}
291
292/* Load and return the module named by 'fullname'. */
293static PyObject *
294zipimporter_load_module(PyObject *obj, PyObject *args)
295{
296    ZipImporter *self = (ZipImporter *)obj;
297    PyObject *code, *mod, *dict;
298    char *fullname, *modpath;
299    int ispackage;
300
301    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
302                          &fullname))
303        return NULL;
304
305    code = get_module_code(self, fullname, &ispackage, &modpath);
306    if (code == NULL)
307        return NULL;
308
309    mod = PyImport_AddModule(fullname);
310    if (mod == NULL) {
311        Py_DECREF(code);
312        return NULL;
313    }
314    dict = PyModule_GetDict(mod);
315
316    /* mod.__loader__ = self */
317    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
318        goto error;
319
320    if (ispackage) {
321        /* add __path__ to the module *before* the code gets
322           executed */
323        PyObject *pkgpath, *fullpath;
324        char *subname = get_subname(fullname);
325        int err;
326
327        fullpath = PyUnicode_FromFormat("%U%c%U%s",
328                                self->archive, SEP,
329                                self->prefix, subname);
330        if (fullpath == NULL)
331            goto error;
332
333        pkgpath = Py_BuildValue("[O]", fullpath);
334        Py_DECREF(fullpath);
335        if (pkgpath == NULL)
336            goto error;
337        err = PyDict_SetItemString(dict, "__path__", pkgpath);
338        Py_DECREF(pkgpath);
339        if (err != 0)
340            goto error;
341    }
342    mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
343    Py_DECREF(code);
344    if (Py_VerboseFlag)
345        PySys_WriteStderr("import %s # loaded from Zip %s\n",
346                          fullname, modpath);
347    return mod;
348error:
349    Py_DECREF(code);
350    Py_DECREF(mod);
351    return NULL;
352}
353
354/* Return a string matching __file__ for the named module */
355static PyObject *
356zipimporter_get_filename(PyObject *obj, PyObject *args)
357{
358    ZipImporter *self = (ZipImporter *)obj;
359    PyObject *code;
360    char *fullname, *modpath;
361    int ispackage;
362
363    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
364                         &fullname))
365    return NULL;
366
367    /* Deciding the filename requires working out where the code
368       would come from if the module was actually loaded */
369    code = get_module_code(self, fullname, &ispackage, &modpath);
370    if (code == NULL)
371    return NULL;
372    Py_DECREF(code); /* Only need the path info */
373
374    return PyUnicode_FromString(modpath);
375}
376
377/* Return a bool signifying whether the module is a package or not. */
378static PyObject *
379zipimporter_is_package(PyObject *obj, PyObject *args)
380{
381    ZipImporter *self = (ZipImporter *)obj;
382    char *fullname;
383    enum zi_module_info mi;
384
385    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
386                          &fullname))
387        return NULL;
388
389    mi = get_module_info(self, fullname);
390    if (mi == MI_ERROR)
391        return NULL;
392    if (mi == MI_NOT_FOUND) {
393        PyErr_Format(ZipImportError, "can't find module '%.200s'",
394                     fullname);
395        return NULL;
396    }
397    return PyBool_FromLong(mi == MI_PACKAGE);
398}
399
400static PyObject *
401zipimporter_get_data(PyObject *obj, PyObject *args)
402{
403    ZipImporter *self = (ZipImporter *)obj;
404    char *path;
405#ifdef ALTSEP
406    char *p, buf[MAXPATHLEN + 1];
407#endif
408    PyObject *toc_entry;
409    Py_ssize_t len;
410    char *archive_str;
411
412    if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413        return NULL;
414
415#ifdef ALTSEP
416    if (strlen(path) >= MAXPATHLEN) {
417        PyErr_SetString(ZipImportError, "path too long");
418        return NULL;
419    }
420    strcpy(buf, path);
421    for (p = buf; *p; p++) {
422        if (*p == ALTSEP)
423            *p = SEP;
424    }
425    path = buf;
426#endif
427    archive_str = _PyUnicode_AsStringAndSize(self->archive, &len);
428    if ((size_t)len < strlen(path) &&
429        strncmp(path, archive_str, len) == 0 &&
430        path[len] == SEP) {
431        path = path + len + 1;
432    }
433
434    toc_entry = PyDict_GetItemString(self->files, path);
435    if (toc_entry == NULL) {
436        PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
437        return NULL;
438    }
439    return get_data(archive_str, toc_entry);
440}
441
442static PyObject *
443zipimporter_get_code(PyObject *obj, PyObject *args)
444{
445    ZipImporter *self = (ZipImporter *)obj;
446    char *fullname;
447
448    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
449        return NULL;
450
451    return get_module_code(self, fullname, NULL, NULL);
452}
453
454static PyObject *
455zipimporter_get_source(PyObject *obj, PyObject *args)
456{
457    ZipImporter *self = (ZipImporter *)obj;
458    PyObject *toc_entry;
459    char *fullname, *subname, path[MAXPATHLEN+1];
460    int len;
461    enum zi_module_info mi;
462
463    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
464        return NULL;
465
466    mi = get_module_info(self, fullname);
467    if (mi == MI_ERROR)
468        return NULL;
469    if (mi == MI_NOT_FOUND) {
470        PyErr_Format(ZipImportError, "can't find module '%.200s'",
471                     fullname);
472        return NULL;
473    }
474    subname = get_subname(fullname);
475
476    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
477    if (len < 0)
478        return NULL;
479
480    if (mi == MI_PACKAGE) {
481        path[len] = SEP;
482        strcpy(path + len + 1, "__init__.py");
483    }
484    else
485        strcpy(path + len, ".py");
486
487    toc_entry = PyDict_GetItemString(self->files, path);
488    if (toc_entry != NULL) {
489        PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry);
490        PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
491        Py_XDECREF(bytes);
492        return res;
493    }
494
495    /* we have the module, but no source */
496    Py_INCREF(Py_None);
497    return Py_None;
498}
499
500PyDoc_STRVAR(doc_find_module,
501"find_module(fullname, path=None) -> self or None.\n\
502\n\
503Search for a module specified by 'fullname'. 'fullname' must be the\n\
504fully qualified (dotted) module name. It returns the zipimporter\n\
505instance itself if the module was found, or None if it wasn't.\n\
506The optional 'path' argument is ignored -- it's there for compatibility\n\
507with the importer protocol.");
508
509PyDoc_STRVAR(doc_load_module,
510"load_module(fullname) -> module.\n\
511\n\
512Load the module specified by 'fullname'. 'fullname' must be the\n\
513fully qualified (dotted) module name. It returns the imported\n\
514module, or raises ZipImportError if it wasn't found.");
515
516PyDoc_STRVAR(doc_get_data,
517"get_data(pathname) -> string with file data.\n\
518\n\
519Return the data associated with 'pathname'. Raise IOError if\n\
520the file wasn't found.");
521
522PyDoc_STRVAR(doc_is_package,
523"is_package(fullname) -> bool.\n\
524\n\
525Return True if the module specified by fullname is a package.\n\
526Raise ZipImportError if the module couldn't be found.");
527
528PyDoc_STRVAR(doc_get_code,
529"get_code(fullname) -> code object.\n\
530\n\
531Return the code object for the specified module. Raise ZipImportError\n\
532if the module couldn't be found.");
533
534PyDoc_STRVAR(doc_get_source,
535"get_source(fullname) -> source string.\n\
536\n\
537Return the source code for the specified module. Raise ZipImportError\n\
538if the module couldn't be found, return None if the archive does\n\
539contain the module, but has no source for it.");
540
541
542PyDoc_STRVAR(doc_get_filename,
543"get_filename(fullname) -> filename string.\n\
544\n\
545Return the filename for the specified module.");
546
547static PyMethodDef zipimporter_methods[] = {
548    {"find_module", zipimporter_find_module, METH_VARARGS,
549     doc_find_module},
550    {"load_module", zipimporter_load_module, METH_VARARGS,
551     doc_load_module},
552    {"get_data", zipimporter_get_data, METH_VARARGS,
553     doc_get_data},
554    {"get_code", zipimporter_get_code, METH_VARARGS,
555     doc_get_code},
556    {"get_source", zipimporter_get_source, METH_VARARGS,
557     doc_get_source},
558    {"get_filename", zipimporter_get_filename, METH_VARARGS,
559     doc_get_filename},
560    {"is_package", zipimporter_is_package, METH_VARARGS,
561     doc_is_package},
562    {NULL,              NULL}   /* sentinel */
563};
564
565static PyMemberDef zipimporter_members[] = {
566    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
567    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
568    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
569    {NULL}
570};
571
572PyDoc_STRVAR(zipimporter_doc,
573"zipimporter(archivepath) -> zipimporter object\n\
574\n\
575Create a new zipimporter instance. 'archivepath' must be a path to\n\
576a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
577'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
578valid directory inside the archive.\n\
579\n\
580'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
581archive.\n\
582\n\
583The 'archive' attribute of zipimporter objects contains the name of the\n\
584zipfile targeted.");
585
586#define DEFERRED_ADDRESS(ADDR) 0
587
588static PyTypeObject ZipImporter_Type = {
589    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
590    "zipimport.zipimporter",
591    sizeof(ZipImporter),
592    0,                                          /* tp_itemsize */
593    (destructor)zipimporter_dealloc,            /* tp_dealloc */
594    0,                                          /* tp_print */
595    0,                                          /* tp_getattr */
596    0,                                          /* tp_setattr */
597    0,                                          /* tp_reserved */
598    (reprfunc)zipimporter_repr,                 /* tp_repr */
599    0,                                          /* tp_as_number */
600    0,                                          /* tp_as_sequence */
601    0,                                          /* tp_as_mapping */
602    0,                                          /* tp_hash */
603    0,                                          /* tp_call */
604    0,                                          /* tp_str */
605    PyObject_GenericGetAttr,                    /* tp_getattro */
606    0,                                          /* tp_setattro */
607    0,                                          /* tp_as_buffer */
608    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
609        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
610    zipimporter_doc,                            /* tp_doc */
611    zipimporter_traverse,                       /* tp_traverse */
612    0,                                          /* tp_clear */
613    0,                                          /* tp_richcompare */
614    0,                                          /* tp_weaklistoffset */
615    0,                                          /* tp_iter */
616    0,                                          /* tp_iternext */
617    zipimporter_methods,                        /* tp_methods */
618    zipimporter_members,                        /* tp_members */
619    0,                                          /* tp_getset */
620    0,                                          /* tp_base */
621    0,                                          /* tp_dict */
622    0,                                          /* tp_descr_get */
623    0,                                          /* tp_descr_set */
624    0,                                          /* tp_dictoffset */
625    (initproc)zipimporter_init,                 /* tp_init */
626    PyType_GenericAlloc,                        /* tp_alloc */
627    PyType_GenericNew,                          /* tp_new */
628    PyObject_GC_Del,                            /* tp_free */
629};
630
631
632/* implementation */
633
634/* Given a buffer, return the long that is represented by the first
635   4 bytes, encoded as little endian. This partially reimplements
636   marshal.c:r_long() */
637static long
638get_long(unsigned char *buf) {
639    long x;
640    x =  buf[0];
641    x |= (long)buf[1] <<  8;
642    x |= (long)buf[2] << 16;
643    x |= (long)buf[3] << 24;
644#if SIZEOF_LONG > 4
645    /* Sign extension for 64-bit machines */
646    x |= -(x & 0x80000000L);
647#endif
648    return x;
649}
650
651/*
652   read_directory(archive) -> files dict (new reference)
653
654   Given a path to a Zip archive, build a dict, mapping file names
655   (local to the archive, using SEP as a separator) to toc entries.
656
657   A toc_entry is a tuple:
658
659       (__file__,      # value to use for __file__, available for all files
660    compress,      # compression kind; 0 for uncompressed
661    data_size,     # size of compressed data on disk
662    file_size,     # size of decompressed data
663    file_offset,   # offset of file header from start of archive
664    time,          # mod time of file (in dos format)
665    date,          # mod data of file (in dos format)
666    crc,           # crc checksum of the data
667       )
668
669   Directories can be recognized by the trailing SEP in the name,
670   data_size and file_offset are 0.
671*/
672static PyObject *
673read_directory(char *archive)
674{
675    PyObject *files = NULL;
676    FILE *fp;
677    long compress, crc, data_size, file_size, file_offset, date, time;
678    long header_offset, name_size, header_size, header_position;
679    long i, l, count;
680    size_t length;
681    char path[MAXPATHLEN + 5];
682    char name[MAXPATHLEN + 5];
683    char *p, endof_central_dir[22];
684    long arc_offset; /* offset from beginning of file to start of zip-archive */
685
686    if (strlen(archive) > MAXPATHLEN) {
687        PyErr_SetString(PyExc_OverflowError,
688                        "Zip path name is too long");
689        return NULL;
690    }
691    strcpy(path, archive);
692
693    fp = fopen(archive, "rb");
694    if (fp == NULL) {
695        PyErr_Format(ZipImportError, "can't open Zip file: "
696                     "'%.200s'", archive);
697        return NULL;
698    }
699    fseek(fp, -22, SEEK_END);
700    header_position = ftell(fp);
701    if (fread(endof_central_dir, 1, 22, fp) != 22) {
702        fclose(fp);
703        PyErr_Format(ZipImportError, "can't read Zip file: "
704                     "'%.200s'", archive);
705        return NULL;
706    }
707    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
708        /* Bad: End of Central Dir signature */
709        fclose(fp);
710        PyErr_Format(ZipImportError, "not a Zip file: "
711                     "'%.200s'", archive);
712        return NULL;
713    }
714
715    header_size = get_long((unsigned char *)endof_central_dir + 12);
716    header_offset = get_long((unsigned char *)endof_central_dir + 16);
717    arc_offset = header_position - header_offset - header_size;
718    header_offset += arc_offset;
719
720    files = PyDict_New();
721    if (files == NULL)
722        goto error;
723
724    length = (long)strlen(path);
725    path[length] = SEP;
726
727    /* Start of Central Directory */
728    count = 0;
729    for (;;) {
730        PyObject *t;
731        int err;
732
733        fseek(fp, header_offset, 0);  /* Start of file header */
734        l = PyMarshal_ReadLongFromFile(fp);
735        if (l != 0x02014B50)
736            break;              /* Bad: Central Dir File Header */
737        fseek(fp, header_offset + 10, 0);
738        compress = PyMarshal_ReadShortFromFile(fp);
739        time = PyMarshal_ReadShortFromFile(fp);
740        date = PyMarshal_ReadShortFromFile(fp);
741        crc = PyMarshal_ReadLongFromFile(fp);
742        data_size = PyMarshal_ReadLongFromFile(fp);
743        file_size = PyMarshal_ReadLongFromFile(fp);
744        name_size = PyMarshal_ReadShortFromFile(fp);
745        header_size = 46 + name_size +
746           PyMarshal_ReadShortFromFile(fp) +
747           PyMarshal_ReadShortFromFile(fp);
748        fseek(fp, header_offset + 42, 0);
749        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
750        if (name_size > MAXPATHLEN)
751            name_size = MAXPATHLEN;
752
753        p = name;
754        for (i = 0; i < name_size; i++) {
755            *p = (char)getc(fp);
756            if (*p == '/')
757                *p = SEP;
758            p++;
759        }
760        *p = 0;         /* Add terminating null byte */
761        header_offset += header_size;
762
763        strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
764
765        t = Py_BuildValue("siiiiiii", path, compress, data_size,
766                          file_size, file_offset, time, date, crc);
767        if (t == NULL)
768            goto error;
769        err = PyDict_SetItemString(files, name, t);
770        Py_DECREF(t);
771        if (err != 0)
772            goto error;
773        count++;
774    }
775    fclose(fp);
776    if (Py_VerboseFlag)
777        PySys_WriteStderr("# zipimport: found %ld names in %s\n",
778            count, archive);
779    return files;
780error:
781    fclose(fp);
782    Py_XDECREF(files);
783    return NULL;
784}
785
786/* Return the zlib.decompress function object, or NULL if zlib couldn't
787   be imported. The function is cached when found, so subsequent calls
788   don't import zlib again. Returns a *borrowed* reference.
789   XXX This makes zlib.decompress immortal. */
790static PyObject *
791get_decompress_func(void)
792{
793    static PyObject *decompress = NULL;
794
795    if (decompress == NULL) {
796        PyObject *zlib;
797        static int importing_zlib = 0;
798
799        if (importing_zlib != 0)
800            /* Someone has a zlib.py[co] in their Zip file;
801               let's avoid a stack overflow. */
802            return NULL;
803        importing_zlib = 1;
804        zlib = PyImport_ImportModuleNoBlock("zlib");
805        importing_zlib = 0;
806        if (zlib != NULL) {
807            decompress = PyObject_GetAttrString(zlib,
808                                                "decompress");
809            Py_DECREF(zlib);
810        }
811        else
812            PyErr_Clear();
813        if (Py_VerboseFlag)
814            PySys_WriteStderr("# zipimport: zlib %s\n",
815                zlib != NULL ? "available": "UNAVAILABLE");
816    }
817    return decompress;
818}
819
820/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
821   data as a new reference. */
822static PyObject *
823get_data(char *archive, PyObject *toc_entry)
824{
825    PyObject *raw_data, *data = NULL, *decompress;
826    char *buf;
827    FILE *fp;
828    int err;
829    Py_ssize_t bytes_read = 0;
830    long l;
831    char *datapath;
832    long compress, data_size, file_size, file_offset, bytes_size;
833    long time, date, crc;
834
835    if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
836                          &data_size, &file_size, &file_offset, &time,
837                          &date, &crc)) {
838        return NULL;
839    }
840
841    fp = fopen(archive, "rb");
842    if (!fp) {
843        PyErr_Format(PyExc_IOError,
844           "zipimport: can not open file %s", archive);
845        return NULL;
846    }
847
848    /* Check to make sure the local file header is correct */
849    fseek(fp, file_offset, 0);
850    l = PyMarshal_ReadLongFromFile(fp);
851    if (l != 0x04034B50) {
852        /* Bad: Local File Header */
853        PyErr_Format(ZipImportError,
854                     "bad local file header in %s",
855                     archive);
856        fclose(fp);
857        return NULL;
858    }
859    fseek(fp, file_offset + 26, 0);
860    l = 30 + PyMarshal_ReadShortFromFile(fp) +
861        PyMarshal_ReadShortFromFile(fp);        /* local header size */
862    file_offset += l;           /* Start of file data */
863
864    bytes_size = compress == 0 ? data_size : data_size + 1;
865    if (bytes_size == 0)
866        bytes_size++;
867    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
868
869    if (raw_data == NULL) {
870        fclose(fp);
871        return NULL;
872    }
873    buf = PyBytes_AsString(raw_data);
874
875    err = fseek(fp, file_offset, 0);
876    if (err == 0)
877        bytes_read = fread(buf, 1, data_size, fp);
878    fclose(fp);
879    if (err || bytes_read != data_size) {
880        PyErr_SetString(PyExc_IOError,
881                        "zipimport: can't read data");
882        Py_DECREF(raw_data);
883        return NULL;
884    }
885
886    if (compress != 0) {
887        buf[data_size] = 'Z';  /* saw this in zipfile.py */
888        data_size++;
889    }
890    buf[data_size] = '\0';
891
892    if (compress == 0) {  /* data is not compressed */
893        data = PyBytes_FromStringAndSize(buf, data_size);
894        Py_DECREF(raw_data);
895        return data;
896    }
897
898    /* Decompress with zlib */
899    decompress = get_decompress_func();
900    if (decompress == NULL) {
901        PyErr_SetString(ZipImportError,
902                        "can't decompress data; "
903                        "zlib not available");
904        goto error;
905    }
906    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
907error:
908    Py_DECREF(raw_data);
909    return data;
910}
911
912/* Lenient date/time comparison function. The precision of the mtime
913   in the archive is lower than the mtime stored in a .pyc: we
914   must allow a difference of at most one second. */
915static int
916eq_mtime(time_t t1, time_t t2)
917{
918    time_t d = t1 - t2;
919    if (d < 0)
920        d = -d;
921    /* dostime only stores even seconds, so be lenient */
922    return d <= 1;
923}
924
925/* Given the contents of a .py[co] file in a buffer, unmarshal the data
926   and return the code object. Return None if it the magic word doesn't
927   match (we do this instead of raising an exception as we fall back
928   to .py if available and we don't want to mask other errors).
929   Returns a new reference. */
930static PyObject *
931unmarshal_code(char *pathname, PyObject *data, time_t mtime)
932{
933    PyObject *code;
934    char *buf = PyBytes_AsString(data);
935    Py_ssize_t size = PyBytes_Size(data);
936
937    if (size <= 9) {
938        PyErr_SetString(ZipImportError,
939                        "bad pyc data");
940        return NULL;
941    }
942
943    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
944        if (Py_VerboseFlag)
945            PySys_WriteStderr("# %s has bad magic\n",
946                              pathname);
947        Py_INCREF(Py_None);
948        return Py_None;  /* signal caller to try alternative */
949    }
950
951    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
952                                mtime)) {
953        if (Py_VerboseFlag)
954            PySys_WriteStderr("# %s has bad mtime\n",
955                              pathname);
956        Py_INCREF(Py_None);
957        return Py_None;  /* signal caller to try alternative */
958    }
959
960    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
961    if (code == NULL)
962        return NULL;
963    if (!PyCode_Check(code)) {
964        Py_DECREF(code);
965        PyErr_Format(PyExc_TypeError,
966             "compiled module %.200s is not a code object",
967             pathname);
968        return NULL;
969    }
970    return code;
971}
972
973/* Replace any occurances of "\r\n?" in the input string with "\n".
974   This converts DOS and Mac line endings to Unix line endings.
975   Also append a trailing "\n" to be compatible with
976   PyParser_SimpleParseFile(). Returns a new reference. */
977static PyObject *
978normalize_line_endings(PyObject *source)
979{
980    char *buf, *q, *p = PyBytes_AsString(source);
981    PyObject *fixed_source;
982    int len = 0;
983
984    if (!p) {
985        return PyBytes_FromStringAndSize("\n\0", 2);
986    }
987
988    /* one char extra for trailing \n and one for terminating \0 */
989    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
990    if (buf == NULL) {
991        PyErr_SetString(PyExc_MemoryError,
992                        "zipimport: no memory to allocate "
993                        "source buffer");
994        return NULL;
995    }
996    /* replace "\r\n?" by "\n" */
997    for (q = buf; *p != '\0'; p++) {
998        if (*p == '\r') {
999            *q++ = '\n';
1000            if (*(p + 1) == '\n')
1001                p++;
1002        }
1003        else
1004            *q++ = *p;
1005        len++;
1006    }
1007    *q++ = '\n';  /* add trailing \n */
1008    *q = '\0';
1009    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1010    PyMem_Free(buf);
1011    return fixed_source;
1012}
1013
1014/* Given a string buffer containing Python source code, compile it
1015   return and return a code object as a new reference. */
1016static PyObject *
1017compile_source(char *pathname, PyObject *source)
1018{
1019    PyObject *code, *fixed_source;
1020
1021    fixed_source = normalize_line_endings(source);
1022    if (fixed_source == NULL)
1023        return NULL;
1024
1025    code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1026                            Py_file_input);
1027    Py_DECREF(fixed_source);
1028    return code;
1029}
1030
1031/* Convert the date/time values found in the Zip archive to a value
1032   that's compatible with the time stamp stored in .pyc files. */
1033static time_t
1034parse_dostime(int dostime, int dosdate)
1035{
1036    struct tm stm;
1037
1038    memset((void *) &stm, '\0', sizeof(stm));
1039
1040    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1041    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1042    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1043    stm.tm_mday  =   dosdate        & 0x1f;
1044    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1045    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1046    stm.tm_isdst =   -1; /* wday/yday is ignored */
1047
1048    return mktime(&stm);
1049}
1050
1051/* Given a path to a .pyc or .pyo file in the archive, return the
1052   modification time of the matching .py file, or 0 if no source
1053   is available. */
1054static time_t
1055get_mtime_of_source(ZipImporter *self, char *path)
1056{
1057    PyObject *toc_entry;
1058    time_t mtime = 0;
1059    Py_ssize_t lastchar = strlen(path) - 1;
1060    char savechar = path[lastchar];
1061    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1062    toc_entry = PyDict_GetItemString(self->files, path);
1063    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1064        PyTuple_Size(toc_entry) == 8) {
1065        /* fetch the time stamp of the .py file for comparison
1066           with an embedded pyc time stamp */
1067        int time, date;
1068        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1069        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1070        mtime = parse_dostime(time, date);
1071    }
1072    path[lastchar] = savechar;
1073    return mtime;
1074}
1075
1076/* Return the code object for the module named by 'fullname' from the
1077   Zip archive as a new reference. */
1078static PyObject *
1079get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1080                   time_t mtime, PyObject *toc_entry)
1081{
1082    PyObject *data, *code;
1083    char *modpath;
1084    char *archive = _PyUnicode_AsString(self->archive);
1085
1086    if (archive == NULL)
1087        return NULL;
1088
1089    data = get_data(archive, toc_entry);
1090    if (data == NULL)
1091        return NULL;
1092
1093    modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
1094
1095    if (isbytecode) {
1096        code = unmarshal_code(modpath, data, mtime);
1097    }
1098    else {
1099        code = compile_source(modpath, data);
1100    }
1101    Py_DECREF(data);
1102    return code;
1103}
1104
1105/* Get the code object associated with the module specified by
1106   'fullname'. */
1107static PyObject *
1108get_module_code(ZipImporter *self, char *fullname,
1109                int *p_ispackage, char **p_modpath)
1110{
1111    PyObject *toc_entry;
1112    char *subname, path[MAXPATHLEN + 1];
1113    int len;
1114    struct st_zip_searchorder *zso;
1115
1116    subname = get_subname(fullname);
1117
1118    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1119    if (len < 0)
1120        return NULL;
1121
1122    for (zso = zip_searchorder; *zso->suffix; zso++) {
1123        PyObject *code = NULL;
1124
1125        strcpy(path + len, zso->suffix);
1126        if (Py_VerboseFlag > 1)
1127            PySys_WriteStderr("# trying %s%c%s\n",
1128                              _PyUnicode_AsString(self->archive),
1129                              (int)SEP, path);
1130        toc_entry = PyDict_GetItemString(self->files, path);
1131        if (toc_entry != NULL) {
1132            time_t mtime = 0;
1133            int ispackage = zso->type & IS_PACKAGE;
1134            int isbytecode = zso->type & IS_BYTECODE;
1135
1136            if (isbytecode)
1137                mtime = get_mtime_of_source(self, path);
1138            if (p_ispackage != NULL)
1139                *p_ispackage = ispackage;
1140            code = get_code_from_data(self, ispackage,
1141                                      isbytecode, mtime,
1142                                      toc_entry);
1143            if (code == Py_None) {
1144                /* bad magic number or non-matching mtime
1145                   in byte code, try next */
1146                Py_DECREF(code);
1147                continue;
1148            }
1149            if (code != NULL && p_modpath != NULL)
1150                *p_modpath = _PyUnicode_AsString(
1151                    PyTuple_GetItem(toc_entry, 0));
1152            return code;
1153        }
1154    }
1155    PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1156    return NULL;
1157}
1158
1159
1160/* Module init */
1161
1162PyDoc_STRVAR(zipimport_doc,
1163"zipimport provides support for importing Python modules from Zip archives.\n\
1164\n\
1165This module exports three objects:\n\
1166- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1167- ZipImportError: exception raised by zipimporter objects. It's a\n\
1168  subclass of ImportError, so it can be caught as ImportError, too.\n\
1169- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1170  info dicts, as used in zipimporter._files.\n\
1171\n\
1172It is usually not needed to use the zipimport module explicitly; it is\n\
1173used by the builtin import mechanism for sys.path items that are paths\n\
1174to Zip archives.");
1175
1176static struct PyModuleDef zipimportmodule = {
1177    PyModuleDef_HEAD_INIT,
1178    "zipimport",
1179    zipimport_doc,
1180    -1,
1181    NULL,
1182    NULL,
1183    NULL,
1184    NULL,
1185    NULL
1186};
1187
1188PyMODINIT_FUNC
1189PyInit_zipimport(void)
1190{
1191    PyObject *mod;
1192
1193    if (PyType_Ready(&ZipImporter_Type) < 0)
1194        return NULL;
1195
1196    /* Correct directory separator */
1197    zip_searchorder[0].suffix[0] = SEP;
1198    zip_searchorder[1].suffix[0] = SEP;
1199    zip_searchorder[2].suffix[0] = SEP;
1200    if (Py_OptimizeFlag) {
1201        /* Reverse *.pyc and *.pyo */
1202        struct st_zip_searchorder tmp;
1203        tmp = zip_searchorder[0];
1204        zip_searchorder[0] = zip_searchorder[1];
1205        zip_searchorder[1] = tmp;
1206        tmp = zip_searchorder[3];
1207        zip_searchorder[3] = zip_searchorder[4];
1208        zip_searchorder[4] = tmp;
1209    }
1210
1211    mod = PyModule_Create(&zipimportmodule);
1212    if (mod == NULL)
1213        return NULL;
1214
1215    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1216                                        PyExc_ImportError, NULL);
1217    if (ZipImportError == NULL)
1218        return NULL;
1219
1220    Py_INCREF(ZipImportError);
1221    if (PyModule_AddObject(mod, "ZipImportError",
1222                           ZipImportError) < 0)
1223        return NULL;
1224
1225    Py_INCREF(&ZipImporter_Type);
1226    if (PyModule_AddObject(mod, "zipimporter",
1227                           (PyObject *)&ZipImporter_Type) < 0)
1228        return NULL;
1229
1230    zip_directory_cache = PyDict_New();
1231    if (zip_directory_cache == NULL)
1232        return NULL;
1233    Py_INCREF(zip_directory_cache);
1234    if (PyModule_AddObject(mod, "_zip_directory_cache",
1235                           zip_directory_cache) < 0)
1236        return NULL;
1237    return mod;
1238}
1239